EMNIST Baseline#

Copying from the beginner workshop, we introduce basic model training for the EMNIST dataset. We will build upon the approach outlined here throughout the series.

import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.io import ImageReadMode
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from pathlib import Path
import matplotlib.pyplot as plt
data_dir = f"/scratch/{os.environ['USER']}/data"
model_path = f"/scratch/{os.environ['USER']}/model.pt"

# Model and Training
batch_size=128 #input batch size for training (default: 64)
test_batch_size=1000 #input batch size for testing (default: 1000)
num_workers=10 # parallel data loading to speed things up
lr=1.0 #learning rate (default: 1.0)
gamma=0.7 #Learning rate step gamma (default: 0.7)
no_cuda=False #disables CUDA training (default: False)
seed=42 #random seed (default: 42)
log_interval=10 #how many batches to wait before logging training status (default: 10)
save_model=False #save the trained model (default: False)

# additional derived settings
use_cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if use_cuda else "cpu")

print("Device:", device)
Device: cuda

Dataset#

In the beginner series, we saw how to set up a Pytorch Dataset class for the ImageNet dataset. We will copy the result here.

# Loading the data

# Pytorch provides a number of pre-defined dataset classes
# EMNIST is one of them! Pytorch will automatically download the data.
# It will only download if the data is not already present.
data_train = datasets.EMNIST(data_dir, split='balanced', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

data_test = datasets.EMNIST(data_dir, split='balanced', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

# define pytorch dataloaders for training and testing
train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(data_test, batch_size=test_batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

# save a test batch for later testing
image_gen = iter(test_loader)
test_img, test_trg = next(image_gen)
print("Training dataset:", train_loader.dataset)
print("Testing dataset:", test_loader.dataset)
Training dataset: Dataset EMNIST
    Number of datapoints: 112800
    Root location: /scratch/dane2/data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )
Testing dataset: Dataset EMNIST
    Number of datapoints: 18800
    Root location: /scratch/dane2/data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )

Model definition#

In the EMNIST notebook, we developed CNN architectures for hand-written characters. We will copy our best architecture here and adapt it to the situation of color-image inputs and 1000-class output.

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        # feature encoder
        self.feature_extractor = feature_extractor = nn.Sequential(
            # block 1
            nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, bias=False),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            # 2
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=3, stride=1, bias=False),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            # 3
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=3, stride=1, bias=False),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.MaxPool2d(2),
            # flatten just as with the linear classifier
            nn.Flatten()
        )
        
        # linear classification head -- ImageNet has 1000 classes
        self.classifier = nn.LazyLinear(47)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x
    
    def num_params(self):
        return sum(p.numel() for p in model.parameters())

# Create the model
model = Classifier()
# let's make sure we can run a batch of data through the model
with torch.no_grad():
    x, y = next(iter(train_loader))
    y_hat = model(x)
    
y_hat.shape, y_hat, y_hat.sum(axis=-1)
(torch.Size([128, 47]),
 tensor([[-0.2897, -0.5617,  0.0628,  ..., -1.2602,  0.8728,  0.3598],
         [ 0.8654, -0.0719,  0.6095,  ..., -0.0504, -0.4102, -0.1677],
         [ 0.2731, -0.2552,  0.0274,  ...,  0.0960,  0.7097,  0.4219],
         ...,
         [-0.2629,  0.0098,  0.0789,  ..., -0.7038,  0.5512, -1.0838],
         [ 0.2294,  0.5040, -0.4386,  ..., -0.5136,  0.5362,  0.4131],
         [-0.1829, -0.6301,  1.2044,  ..., -1.4666, -0.5272, -0.3675]]),
 tensor([-1.1857,  7.7833,  3.6772, -1.0103,  7.2764,  3.0098,  6.7484, -2.4809,
         16.5917,  7.4026,  1.1977,  7.7264,  1.4142,  8.6851,  5.8889,  0.8193,
         11.8244,  9.6126,  3.6529,  3.7360,  6.5334,  5.4083,  2.3121,  4.2810,
          3.9888,  0.9701,  6.4130,  3.1773,  7.5547,  9.3368,  4.6726,  8.8772,
         10.0957,  4.4073,  5.9341, -1.3580,  1.0337,  4.1729,  7.2586,  5.7577,
          0.5533,  7.2799,  0.6950,  6.7011, -4.1594,  4.3148,  7.4138,  1.8631,
         -1.8655,  6.9863, -0.7021,  8.2847,  4.9730, -1.9363,  6.9793,  1.7842,
         13.1547,  7.6535,  6.8881,  9.3821,  8.7600,  5.9675,  3.2702, -0.2760,
          2.1410,  5.2800,  4.0835,  0.3117,  8.4077,  4.7187,  1.8049,  0.7824,
          7.1940, 10.2863,  7.8565,  6.0385, -1.9799,  6.3945,  0.3884,  0.3304,
          5.6073,  9.4075, 10.4626, -0.7341,  3.5893,  2.9042,  6.0140,  4.1800,
          7.2520, -0.9992,  8.0851,  7.3866, 10.2711,  5.5463,  2.7001,  7.8807,
          7.6535,  7.2546,  7.2469,  4.9917,  0.8115, 12.6958,  3.4918,  4.0801,
          8.3012,  2.5887,  1.9310,  4.5293,  4.0950,  4.1541,  6.9728,  7.7243,
          3.2597,  8.9427,  3.4462, -2.8556,  7.1138,  4.0573,  1.0241,  6.8103,
          2.1410, 11.8644,  2.2207,  1.7766,  4.6555,  4.4057,  7.7114,  6.6785]))
model
Classifier(
  (feature_extractor): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (4): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (7): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Flatten(start_dim=1, end_dim=-1)
  )
  (classifier): Linear(in_features=484, out_features=47, bias=True)
)
print("Number of parameters:", model.num_params())
Number of parameters: 23143

Training and testing loops#

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    losses = []
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('\r\tTrain epoch {}: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()), end='')
            
def test(model, device, test_loader, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\rTest epoch {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        epoch,
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
def train_and_test(model, dl_train, dl_test, save_name=model_path, epochs=5):
    # @title Train the linear model
    optimizer = optim.Adadelta(model.parameters(), lr=lr)
    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    for epoch in range(1, epochs + 1):
        train(model, device, dl_train, optimizer, epoch)
        test(model, device, dl_test, epoch)
        scheduler.step()

    if save_model:
        torch.save(model.state_dict(), save_name)

Run Training#

model = Classifier().to(device)
model
Classifier(
  (feature_extractor): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): LazyBatchNorm2d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (4): LazyBatchNorm2d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (7): LazyBatchNorm2d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Flatten(start_dim=1, end_dim=-1)
  )
  (classifier): LazyLinear(in_features=0, out_features=47, bias=True)
)
train_and_test(model, train_loader, test_loader)
Test epoch 1: Average loss: 0.6545, Accuracy: 15048/18800 (80.04%)
Test epoch 2: Average loss: 0.5639, Accuracy: 15416/18800 (82.00%)
Test epoch 3: Average loss: 0.5422, Accuracy: 15651/18800 (83.25%)
Test epoch 4: Average loss: 0.5266, Accuracy: 15735/18800 (83.70%)
Test epoch 5: Average loss: 0.5081, Accuracy: 15831/18800 (84.21%)