{ "cells": [ { "cell_type": "markdown", "id": "f3c939e6", "metadata": {}, "source": [ "# Classifier for the MNIST dataset using PyTorch\n", "\n", "This is a \"Hello world!\" example with PyTorch. \n", "It trains a convolutional neural network on the MNIST dataset. \n", "\n", "Credits: \n", "MNIST dataset, see http://yann.lecun.com/exdb/mnist/ \n", "Code adapted from the documentation of the PyTorch project" ] }, { "cell_type": "code", "execution_count": 1, "id": "5302d63d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.0.0a0'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "torch.__version__" ] }, { "cell_type": "code", "execution_count": 2, "id": "23dce87e", "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "from torchvision import datasets, transforms\n", "from torch.optim.lr_scheduler import StepLR" ] }, { "cell_type": "code", "execution_count": 3, "id": "d623966a", "metadata": {}, "outputs": [], "source": [ "# Define the model, this uses convolutional neural networks\n", "# The model uses convolutional neural network layers\n", "\n", "class Net(nn.Module):\n", " def __init__(self):\n", " super(Net, self).__init__()\n", " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n", " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n", " self.dropout1 = nn.Dropout(0.25)\n", " self.dropout2 = nn.Dropout(0.5)\n", " self.fc1 = nn.Linear(9216, 128)\n", " self.fc2 = nn.Linear(128, 10)\n", "\n", " def forward(self, x):\n", " x = self.conv1(x)\n", " x = F.relu(x)\n", " x = self.conv2(x)\n", " x = F.relu(x)\n", " x = F.max_pool2d(x, 2)\n", " x = self.dropout1(x)\n", " x = torch.flatten(x, 1)\n", " x = self.fc1(x)\n", " x = F.relu(x)\n", " x = self.dropout2(x)\n", " x = self.fc2(x)\n", " output = F.log_softmax(x, dim=1)\n", " return output\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "d58c11fd", "metadata": {}, "outputs": [], "source": [ "# The code to run the training step\n", "\n", "def train(model, device, train_loader, optimizer, epoch):\n", " log_interval = 100\n", " model.train()\n", " for batch_idx, (data, target) in enumerate(train_loader):\n", " data, target = data.to(device), target.to(device)\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = F.nll_loss(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " if batch_idx % log_interval == 0:\n", " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", " epoch, batch_idx * len(data), len(train_loader.dataset),\n", " 100. * batch_idx / len(train_loader), loss.item()))\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "e4ed05b8", "metadata": {}, "outputs": [], "source": [ "# The code for the evaluating the model using the test dataset\n", "\n", "def test(model, device, test_loader):\n", " model.eval()\n", " test_loss = 0\n", " correct = 0\n", " with torch.no_grad():\n", " for data, target in test_loader:\n", " data, target = data.to(device), target.to(device)\n", " output = model(data)\n", " test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n", " pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n", " correct += pred.eq(target.view_as(pred)).sum().item()\n", "\n", " test_loss /= len(test_loader.dataset)\n", "\n", " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", " test_loss, correct, len(test_loader.dataset),\n", " 100. * correct / len(test_loader.dataset)))\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "05dff25d", "metadata": {}, "outputs": [], "source": [ "# Code for reading the data, training and evaluating the model on test data\n", "def main():\n", "\n", " torch.manual_seed(1)\n", "\n", " # Use GPUs for training and testing\n", " device = torch.device(\"cuda\")\n", " # device = torch.device(\"cpu\")\n", "\n", " train_kwargs = {'batch_size': 64}\n", " test_kwargs = {'batch_size': 1000}\n", " cuda_kwargs = {'num_workers': 1,\n", " 'pin_memory': True,\n", " 'shuffle': True}\n", " train_kwargs.update(cuda_kwargs)\n", " test_kwargs.update(cuda_kwargs)\n", "\n", " transform=transforms.Compose([\n", " transforms.ToTensor(),\n", " transforms.Normalize((0.1307,), (0.3081,))\n", " ])\n", " dataset1 = datasets.MNIST('../data', train=True, download=True,\n", " transform=transform)\n", " dataset2 = datasets.MNIST('../data', train=False,\n", " transform=transform)\n", " train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)\n", " test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)\n", "\n", " gamma = 0.7\n", " epochs = 5\n", " \n", " model = Net().to(device)\n", " optimizer = optim.Adam(model.parameters())\n", "\n", " scheduler = StepLR(optimizer, step_size=1, gamma=gamma)\n", " for epoch in range(1, epochs + 1):\n", " train(model, device, train_loader, optimizer, epoch)\n", " test(model, device, test_loader)\n", " scheduler.step()\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "f9c43ca3", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Epoch: 1 [0/60000 (0%)]\tLoss: 2.282550\n", "Train Epoch: 1 [6400/60000 (11%)]\tLoss: 0.382226\n", "Train Epoch: 1 [12800/60000 (21%)]\tLoss: 0.247412\n", "Train Epoch: 1 [19200/60000 (32%)]\tLoss: 0.183478\n", "Train Epoch: 1 [25600/60000 (43%)]\tLoss: 0.143933\n", "Train Epoch: 1 [32000/60000 (53%)]\tLoss: 0.131150\n", "Train Epoch: 1 [38400/60000 (64%)]\tLoss: 0.092372\n", "Train Epoch: 1 [44800/60000 (75%)]\tLoss: 0.045216\n", "Train Epoch: 1 [51200/60000 (85%)]\tLoss: 0.113212\n", "Train Epoch: 1 [57600/60000 (96%)]\tLoss: 0.033559\n", "\n", "Test set: Average loss: 0.0477, Accuracy: 9839/10000 (98%)\n", "\n", "Train Epoch: 2 [0/60000 (0%)]\tLoss: 0.066562\n", "Train Epoch: 2 [6400/60000 (11%)]\tLoss: 0.078290\n", "Train Epoch: 2 [12800/60000 (21%)]\tLoss: 0.063742\n", "Train Epoch: 2 [19200/60000 (32%)]\tLoss: 0.044201\n", "Train Epoch: 2 [25600/60000 (43%)]\tLoss: 0.087734\n", "Train Epoch: 2 [32000/60000 (53%)]\tLoss: 0.038111\n", "Train Epoch: 2 [38400/60000 (64%)]\tLoss: 0.037815\n", "Train Epoch: 2 [44800/60000 (75%)]\tLoss: 0.119937\n", "Train Epoch: 2 [51200/60000 (85%)]\tLoss: 0.008435\n", "Train Epoch: 2 [57600/60000 (96%)]\tLoss: 0.154045\n", "\n", "Test set: Average loss: 0.0320, Accuracy: 9893/10000 (99%)\n", "\n", "Train Epoch: 3 [0/60000 (0%)]\tLoss: 0.055466\n", "Train Epoch: 3 [6400/60000 (11%)]\tLoss: 0.018367\n", "Train Epoch: 3 [12800/60000 (21%)]\tLoss: 0.036571\n", "Train Epoch: 3 [19200/60000 (32%)]\tLoss: 0.026443\n", "Train Epoch: 3 [25600/60000 (43%)]\tLoss: 0.024813\n", "Train Epoch: 3 [32000/60000 (53%)]\tLoss: 0.021004\n", "Train Epoch: 3 [38400/60000 (64%)]\tLoss: 0.025752\n", "Train Epoch: 3 [44800/60000 (75%)]\tLoss: 0.016083\n", "Train Epoch: 3 [51200/60000 (85%)]\tLoss: 0.061993\n", "Train Epoch: 3 [57600/60000 (96%)]\tLoss: 0.015852\n", "\n", "Test set: Average loss: 0.0316, Accuracy: 9899/10000 (99%)\n", "\n", "Train Epoch: 4 [0/60000 (0%)]\tLoss: 0.029524\n", "Train Epoch: 4 [6400/60000 (11%)]\tLoss: 0.021736\n", "Train Epoch: 4 [12800/60000 (21%)]\tLoss: 0.058835\n", "Train Epoch: 4 [19200/60000 (32%)]\tLoss: 0.005212\n", "Train Epoch: 4 [25600/60000 (43%)]\tLoss: 0.097908\n", "Train Epoch: 4 [32000/60000 (53%)]\tLoss: 0.002269\n", "Train Epoch: 4 [38400/60000 (64%)]\tLoss: 0.010071\n", "Train Epoch: 4 [44800/60000 (75%)]\tLoss: 0.003673\n", "Train Epoch: 4 [51200/60000 (85%)]\tLoss: 0.004167\n", "Train Epoch: 4 [57600/60000 (96%)]\tLoss: 0.038350\n", "\n", "Test set: Average loss: 0.0291, Accuracy: 9906/10000 (99%)\n", "\n", "Train Epoch: 5 [0/60000 (0%)]\tLoss: 0.047914\n", "Train Epoch: 5 [6400/60000 (11%)]\tLoss: 0.009362\n", "Train Epoch: 5 [12800/60000 (21%)]\tLoss: 0.002667\n", "Train Epoch: 5 [19200/60000 (32%)]\tLoss: 0.009794\n", "Train Epoch: 5 [25600/60000 (43%)]\tLoss: 0.005070\n", "Train Epoch: 5 [32000/60000 (53%)]\tLoss: 0.079600\n", "Train Epoch: 5 [38400/60000 (64%)]\tLoss: 0.029702\n", "Train Epoch: 5 [44800/60000 (75%)]\tLoss: 0.012784\n", "Train Epoch: 5 [51200/60000 (85%)]\tLoss: 0.063581\n", "Train Epoch: 5 [57600/60000 (96%)]\tLoss: 0.004085\n", "\n", "Test set: Average loss: 0.0259, Accuracy: 9925/10000 (99%)\n", "\n" ] } ], "source": [ "# Run the training and evaluation of the model\n", "main()" ] }, { "cell_type": "code", "execution_count": null, "id": "4159c154", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }