Classifier for the MNIST dataset using PyTorch¶

This is a "Hello world!" example with PyTorch.
It trains a convolutional neural network on the MNIST dataset.

Credits:
MNIST dataset, see http://yann.lecun.com/exdb/mnist/
Code adapted from the documentation of the PyTorch project

In [1]:

import torch
torch.__version__

Out[1]:

'2.0.0a0'

In [2]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

In [3]:

# Define the model, this uses convolutional neural networks
# The model uses convolutional neural network layers

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [4]:

# The code to run the training step

def train(model, device, train_loader, optimizer, epoch):
    log_interval = 100
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [5]:

# The code for the evaluating the model using the test dataset

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [6]:

# Code for reading the data, training and evaluating the model on test data
def main():

    torch.manual_seed(1)

    # Use GPUs for training and testing
    device = torch.device("cuda")
    # device = torch.device("cpu")

    train_kwargs = {'batch_size': 64}
    test_kwargs = {'batch_size': 1000}
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
    dataset1 = datasets.MNIST('../data', train=True, download=True,
                       transform=transform)
    dataset2 = datasets.MNIST('../data', train=False,
                       transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

    gamma = 0.7
    epochs = 5

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters())

    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

In [7]:

# Run the training and evaluation of the model
main()

Train Epoch: 1 [0/60000 (0%)]  Loss: 2.282550
Train Epoch: 1 [6400/60000 (11%)]   Loss: 0.382226
Train Epoch: 1 [12800/60000 (21%)]  Loss: 0.247412
Train Epoch: 1 [19200/60000 (32%)]  Loss: 0.183478
Train Epoch: 1 [25600/60000 (43%)]  Loss: 0.143933
Train Epoch: 1 [32000/60000 (53%)]  Loss: 0.131150
Train Epoch: 1 [38400/60000 (64%)]  Loss: 0.092372
Train Epoch: 1 [44800/60000 (75%)]  Loss: 0.045216
Train Epoch: 1 [51200/60000 (85%)]  Loss: 0.113212
Train Epoch: 1 [57600/60000 (96%)]  Loss: 0.033559

Test set: Average loss: 0.0477, Accuracy: 9839/10000 (98%)

Train Epoch: 2 [0/60000 (0%)]   Loss: 0.066562
Train Epoch: 2 [6400/60000 (11%)]   Loss: 0.078290
Train Epoch: 2 [12800/60000 (21%)]  Loss: 0.063742
Train Epoch: 2 [19200/60000 (32%)]  Loss: 0.044201
Train Epoch: 2 [25600/60000 (43%)]  Loss: 0.087734
Train Epoch: 2 [32000/60000 (53%)]  Loss: 0.038111
Train Epoch: 2 [38400/60000 (64%)]  Loss: 0.037815
Train Epoch: 2 [44800/60000 (75%)]  Loss: 0.119937
Train Epoch: 2 [51200/60000 (85%)]  Loss: 0.008435
Train Epoch: 2 [57600/60000 (96%)]  Loss: 0.154045

Test set: Average loss: 0.0320, Accuracy: 9893/10000 (99%)

Train Epoch: 3 [0/60000 (0%)]   Loss: 0.055466
Train Epoch: 3 [6400/60000 (11%)]   Loss: 0.018367
Train Epoch: 3 [12800/60000 (21%)]  Loss: 0.036571
Train Epoch: 3 [19200/60000 (32%)]  Loss: 0.026443
Train Epoch: 3 [25600/60000 (43%)]  Loss: 0.024813
Train Epoch: 3 [32000/60000 (53%)]  Loss: 0.021004
Train Epoch: 3 [38400/60000 (64%)]  Loss: 0.025752
Train Epoch: 3 [44800/60000 (75%)]  Loss: 0.016083
Train Epoch: 3 [51200/60000 (85%)]  Loss: 0.061993
Train Epoch: 3 [57600/60000 (96%)]  Loss: 0.015852

Test set: Average loss: 0.0316, Accuracy: 9899/10000 (99%)

Train Epoch: 4 [0/60000 (0%)]   Loss: 0.029524
Train Epoch: 4 [6400/60000 (11%)]   Loss: 0.021736
Train Epoch: 4 [12800/60000 (21%)]  Loss: 0.058835
Train Epoch: 4 [19200/60000 (32%)]  Loss: 0.005212
Train Epoch: 4 [25600/60000 (43%)]  Loss: 0.097908
Train Epoch: 4 [32000/60000 (53%)]  Loss: 0.002269
Train Epoch: 4 [38400/60000 (64%)]  Loss: 0.010071
Train Epoch: 4 [44800/60000 (75%)]  Loss: 0.003673
Train Epoch: 4 [51200/60000 (85%)]  Loss: 0.004167
Train Epoch: 4 [57600/60000 (96%)]  Loss: 0.038350

Test set: Average loss: 0.0291, Accuracy: 9906/10000 (99%)

Train Epoch: 5 [0/60000 (0%)]   Loss: 0.047914
Train Epoch: 5 [6400/60000 (11%)]   Loss: 0.009362
Train Epoch: 5 [12800/60000 (21%)]  Loss: 0.002667
Train Epoch: 5 [19200/60000 (32%)]  Loss: 0.009794
Train Epoch: 5 [25600/60000 (43%)]  Loss: 0.005070
Train Epoch: 5 [32000/60000 (53%)]  Loss: 0.079600
Train Epoch: 5 [38400/60000 (64%)]  Loss: 0.029702
Train Epoch: 5 [44800/60000 (75%)]  Loss: 0.012784
Train Epoch: 5 [51200/60000 (85%)]  Loss: 0.063581
Train Epoch: 5 [57600/60000 (96%)]  Loss: 0.004085

Test set: Average loss: 0.0259, Accuracy: 9925/10000 (99%)

In [ ]: