{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f3c939e6",
   "metadata": {},
   "source": [
    "# Classifier for the MNIST dataset using PyTorch\n",
    "\n",
    "This is a \"Hello world!\" example with PyTorch.  \n",
    "It trains a convolutional neural network on the MNIST dataset.  \n",
    "\n",
    "Credits:   \n",
    "MNIST dataset, see http://yann.lecun.com/exdb/mnist/  \n",
    "Code adapted from the documentation of the PyTorch project"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5302d63d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2.0.0a0'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "torch.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "23dce87e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "from torchvision import datasets, transforms\n",
    "from torch.optim.lr_scheduler import StepLR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d623966a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the model, this uses convolutional neural networks\n",
    "# The model uses convolutional neural network layers\n",
    "\n",
    "class Net(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(1, 32, 3, 1)\n",
    "        self.conv2 = nn.Conv2d(32, 64, 3, 1)\n",
    "        self.dropout1 = nn.Dropout(0.25)\n",
    "        self.dropout2 = nn.Dropout(0.5)\n",
    "        self.fc1 = nn.Linear(9216, 128)\n",
    "        self.fc2 = nn.Linear(128, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.conv1(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.conv2(x)\n",
    "        x = F.relu(x)\n",
    "        x = F.max_pool2d(x, 2)\n",
    "        x = self.dropout1(x)\n",
    "        x = torch.flatten(x, 1)\n",
    "        x = self.fc1(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.dropout2(x)\n",
    "        x = self.fc2(x)\n",
    "        output = F.log_softmax(x, dim=1)\n",
    "        return output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d58c11fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The code to run the training step\n",
    "\n",
    "def train(model, device, train_loader, optimizer, epoch):\n",
    "    log_interval = 100\n",
    "    model.train()\n",
    "    for batch_idx, (data, target) in enumerate(train_loader):\n",
    "        data, target = data.to(device), target.to(device)\n",
    "        optimizer.zero_grad()\n",
    "        output = model(data)\n",
    "        loss = F.nll_loss(output, target)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        if batch_idx % log_interval == 0:\n",
    "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
    "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
    "                100. * batch_idx / len(train_loader), loss.item()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e4ed05b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The code for the evaluating the model using the test dataset\n",
    "\n",
    "def test(model, device, test_loader):\n",
    "    model.eval()\n",
    "    test_loss = 0\n",
    "    correct = 0\n",
    "    with torch.no_grad():\n",
    "        for data, target in test_loader:\n",
    "            data, target = data.to(device), target.to(device)\n",
    "            output = model(data)\n",
    "            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss\n",
    "            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability\n",
    "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
    "\n",
    "    test_loss /= len(test_loader.dataset)\n",
    "\n",
    "    print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
    "        test_loss, correct, len(test_loader.dataset),\n",
    "        100. * correct / len(test_loader.dataset)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "05dff25d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Code for reading the data, training and evaluating the model on test data\n",
    "def main():\n",
    "\n",
    "    torch.manual_seed(1)\n",
    "\n",
    "    # Use GPUs for training and testing\n",
    "    device = torch.device(\"cuda\")\n",
    "    # device = torch.device(\"cpu\")\n",
    "\n",
    "    train_kwargs = {'batch_size': 64}\n",
    "    test_kwargs = {'batch_size': 1000}\n",
    "    cuda_kwargs = {'num_workers': 1,\n",
    "                   'pin_memory': True,\n",
    "                   'shuffle': True}\n",
    "    train_kwargs.update(cuda_kwargs)\n",
    "    test_kwargs.update(cuda_kwargs)\n",
    "\n",
    "    transform=transforms.Compose([\n",
    "        transforms.ToTensor(),\n",
    "        transforms.Normalize((0.1307,), (0.3081,))\n",
    "        ])\n",
    "    dataset1 = datasets.MNIST('../data', train=True, download=True,\n",
    "                       transform=transform)\n",
    "    dataset2 = datasets.MNIST('../data', train=False,\n",
    "                       transform=transform)\n",
    "    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)\n",
    "    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)\n",
    "\n",
    "    gamma = 0.7\n",
    "    epochs = 5\n",
    "    \n",
    "    model = Net().to(device)\n",
    "    optimizer = optim.Adam(model.parameters())\n",
    "\n",
    "    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)\n",
    "    for epoch in range(1, epochs + 1):\n",
    "        train(model, device, train_loader, optimizer, epoch)\n",
    "        test(model, device, test_loader)\n",
    "        scheduler.step()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f9c43ca3",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train Epoch: 1 [0/60000 (0%)]\tLoss: 2.282550\n",
      "Train Epoch: 1 [6400/60000 (11%)]\tLoss: 0.382226\n",
      "Train Epoch: 1 [12800/60000 (21%)]\tLoss: 0.247412\n",
      "Train Epoch: 1 [19200/60000 (32%)]\tLoss: 0.183478\n",
      "Train Epoch: 1 [25600/60000 (43%)]\tLoss: 0.143933\n",
      "Train Epoch: 1 [32000/60000 (53%)]\tLoss: 0.131150\n",
      "Train Epoch: 1 [38400/60000 (64%)]\tLoss: 0.092372\n",
      "Train Epoch: 1 [44800/60000 (75%)]\tLoss: 0.045216\n",
      "Train Epoch: 1 [51200/60000 (85%)]\tLoss: 0.113212\n",
      "Train Epoch: 1 [57600/60000 (96%)]\tLoss: 0.033559\n",
      "\n",
      "Test set: Average loss: 0.0477, Accuracy: 9839/10000 (98%)\n",
      "\n",
      "Train Epoch: 2 [0/60000 (0%)]\tLoss: 0.066562\n",
      "Train Epoch: 2 [6400/60000 (11%)]\tLoss: 0.078290\n",
      "Train Epoch: 2 [12800/60000 (21%)]\tLoss: 0.063742\n",
      "Train Epoch: 2 [19200/60000 (32%)]\tLoss: 0.044201\n",
      "Train Epoch: 2 [25600/60000 (43%)]\tLoss: 0.087734\n",
      "Train Epoch: 2 [32000/60000 (53%)]\tLoss: 0.038111\n",
      "Train Epoch: 2 [38400/60000 (64%)]\tLoss: 0.037815\n",
      "Train Epoch: 2 [44800/60000 (75%)]\tLoss: 0.119937\n",
      "Train Epoch: 2 [51200/60000 (85%)]\tLoss: 0.008435\n",
      "Train Epoch: 2 [57600/60000 (96%)]\tLoss: 0.154045\n",
      "\n",
      "Test set: Average loss: 0.0320, Accuracy: 9893/10000 (99%)\n",
      "\n",
      "Train Epoch: 3 [0/60000 (0%)]\tLoss: 0.055466\n",
      "Train Epoch: 3 [6400/60000 (11%)]\tLoss: 0.018367\n",
      "Train Epoch: 3 [12800/60000 (21%)]\tLoss: 0.036571\n",
      "Train Epoch: 3 [19200/60000 (32%)]\tLoss: 0.026443\n",
      "Train Epoch: 3 [25600/60000 (43%)]\tLoss: 0.024813\n",
      "Train Epoch: 3 [32000/60000 (53%)]\tLoss: 0.021004\n",
      "Train Epoch: 3 [38400/60000 (64%)]\tLoss: 0.025752\n",
      "Train Epoch: 3 [44800/60000 (75%)]\tLoss: 0.016083\n",
      "Train Epoch: 3 [51200/60000 (85%)]\tLoss: 0.061993\n",
      "Train Epoch: 3 [57600/60000 (96%)]\tLoss: 0.015852\n",
      "\n",
      "Test set: Average loss: 0.0316, Accuracy: 9899/10000 (99%)\n",
      "\n",
      "Train Epoch: 4 [0/60000 (0%)]\tLoss: 0.029524\n",
      "Train Epoch: 4 [6400/60000 (11%)]\tLoss: 0.021736\n",
      "Train Epoch: 4 [12800/60000 (21%)]\tLoss: 0.058835\n",
      "Train Epoch: 4 [19200/60000 (32%)]\tLoss: 0.005212\n",
      "Train Epoch: 4 [25600/60000 (43%)]\tLoss: 0.097908\n",
      "Train Epoch: 4 [32000/60000 (53%)]\tLoss: 0.002269\n",
      "Train Epoch: 4 [38400/60000 (64%)]\tLoss: 0.010071\n",
      "Train Epoch: 4 [44800/60000 (75%)]\tLoss: 0.003673\n",
      "Train Epoch: 4 [51200/60000 (85%)]\tLoss: 0.004167\n",
      "Train Epoch: 4 [57600/60000 (96%)]\tLoss: 0.038350\n",
      "\n",
      "Test set: Average loss: 0.0291, Accuracy: 9906/10000 (99%)\n",
      "\n",
      "Train Epoch: 5 [0/60000 (0%)]\tLoss: 0.047914\n",
      "Train Epoch: 5 [6400/60000 (11%)]\tLoss: 0.009362\n",
      "Train Epoch: 5 [12800/60000 (21%)]\tLoss: 0.002667\n",
      "Train Epoch: 5 [19200/60000 (32%)]\tLoss: 0.009794\n",
      "Train Epoch: 5 [25600/60000 (43%)]\tLoss: 0.005070\n",
      "Train Epoch: 5 [32000/60000 (53%)]\tLoss: 0.079600\n",
      "Train Epoch: 5 [38400/60000 (64%)]\tLoss: 0.029702\n",
      "Train Epoch: 5 [44800/60000 (75%)]\tLoss: 0.012784\n",
      "Train Epoch: 5 [51200/60000 (85%)]\tLoss: 0.063581\n",
      "Train Epoch: 5 [57600/60000 (96%)]\tLoss: 0.004085\n",
      "\n",
      "Test set: Average loss: 0.0259, Accuracy: 9925/10000 (99%)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Run the training and evaluation of the model\n",
    "main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4159c154",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}