Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Basic regression in pytorch

Open In Colab

Basic regression in pytorch

import pandas as pd
import torch
import statsmodels.formula.api as smf
import statsmodels as sm
import seaborn as sns
import matplotlib.pyplot as plt

## Read in the data and display a few rows
dat = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/oasis.csv")
dat.head(4)
Loading...
sns.scatterplot(x = dat['T2'], y = dat['PD'])
<Axes: xlabel='T2', ylabel='PD'>
<Figure size 640x480 with 1 Axes>
fit = smf.ols('PD ~ T2', data = dat).fit()
fit.summary()
Loading...
# The in sample predictions
yhat = fit.predict(dat['T2'])

# Make sure that it's adding the intercept
#test = 0.3138 + dat['T2'] * 0.7832
#sns.scatterplot(yhat,test)

## A plot of the in sample predicted values
## versus the actual outcomes
sns.scatterplot(x = yhat, y = dat['PD'])
plt.plot([-1, 3], [-1, 3], linewidth=2)
<Figure size 640x480 with 1 Axes>
n = dat.shape[0]

## Get the y and x from 
xtraining = torch.from_numpy(dat['T2'].values)
ytraining = torch.from_numpy(dat['PD'].values)

## PT wants floats
xtraining = xtraining.float()
ytraining = ytraining.float()

## Dimension is 1xn not nx1
## squeeze the second dimension
xtraining = xtraining.unsqueeze(1)
ytraining = ytraining.unsqueeze(1)

## Show that everything is the right size
[xtraining.shape, 
 ytraining.shape,
 [n, 1]
 ]

[torch.Size([100, 1]), torch.Size([100, 1]), [100, 1]]
## Show that linear regression is a pytorch 
model = torch.nn.Sequential(
    torch.nn.Linear(1, 1)
)

## MSE is the loss function
loss_fn = torch.nn.MSELoss(reduction='sum')

## Set the optimizer
## AdamW optimizer is often thought of as the best general purpose optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2)

## Loop over iterations
for t in range(10000):

    ## Forward propagation
  y_pred = model(xtraining)
    
  ## the loss for this interation
  loss = loss_fn(y_pred, ytraining)

  #print(t, loss.item() / n)

  ## Zero out the gradients before adding them up 
  optimizer.zero_grad()
  
  ## Backprop
  loss.backward()
  
  ## Optimization step
  optimizer.step()

ytest = model(xtraining).detach().numpy().reshape(-1)
sns.scatterplot(x = ytest, y = yhat)
plt.plot([-1, 3], [-1, 3], linewidth=2);
<Figure size 640x480 with 1 Axes>
for param in model.parameters():
  print(param.data)
tensor([[0.7831]])
tensor([0.3138])