Introduction

In this assignment, we built a deep learning system to help a fictional kennel owner, Ken, identify and classify lost dogs from 10 different breeds using image recognition.

The dataset includes 12,775 color images (64×64×3) with breed labels. We had to build and evaluate convolutional neural networks (CNNs) to classify the first 5 dog breeds breed of a dog based on input images.


Python Code

challenge_model.py

'''
Challenge - Model
    Constructs a pytorch model for a convolutional neural network
'''
import torch
import torch.nn as nn

class Challenge(nn.Module):
    def __init__(self):
        super(Challenge, self).__init__()
        self.upsample = nn.Upsample(size=(128, 128))# Upsample: 32x32x3 -> 128x128x3

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),# Conv1: 128x128x3 -> 31x31x64
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),# MaxPool1: 31x31x64 -> 15x15x64

            nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),# Conv2: 15x15x64 -> 15x15x192
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),# MaxPool2: 15x15x192 -> 7x7x192

            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),# Conv3: 7x7x192 -> 7x7x384
            nn.LeakyReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),# Conv4: 7x7x384 -> 7x7x256
            nn.LeakyReLU(inplace=True),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),# Conv5: 7x7x256 -> 7x7x256
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)# MaxPool3: 7x7x256 -> 3x3x256
        ) # Flatten: 3x3x256 = 2304 features per sample.

        # Classifier: 2304 -> 512 -> 1024 -> 5
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(2304, 512),   # 2304 -> 512
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 1024),   # 512 -> 1024
            nn.ReLU(inplace=True),
            nn.Linear(1024, 5)  # 1024 -> num_classes
        )

    def forward(self, x):
        x = self.upsample(x)                    # 32x32x3 -> 128x128x3
        x = self.features(x)                    # -> 3x3x256
        x = torch.flatten(x, start_dim=1)       # -> 2304
        x = self.classifier(x)                  # -> 5
        return x

challenge_train.py

'''
Challenge - Train
    Trains a neural network to classify images
    Periodically outputs training information, and saves model checkpoints
    Usage: python train_challenge.py
'''
import torch
import numpy as np
import utils
from data import get_train_val_test_loaders
from challenge_model import Challenge
from utils import *

def _train_epoch(data_loader, model, criterion, optimizer):
    """
    Train the `model` for one epoch of data from `data_loader`
    Use `optimizer` to optimize the specified `criterion`
    """
    for i, (X, y) in enumerate(data_loader):
        # clear parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

def _evaluate_epoch(axes, tr_loader, val_loader, model, criterion, epoch, stats):
    with torch.no_grad():
        y_true, y_pred = [], []
        correct, total = 0, 0
        running_loss = []
        for X, y in tr_loader:
            output = model(X)
            predicted = predictions(output.data)
            y_true.append(y)
            y_pred.append(predicted)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            running_loss.append(criterion(output, y).item())
        train_loss = np.mean(running_loss)
        train_acc = correct / total
    with torch.no_grad():
        y_true, y_pred = [], []
        correct, total = 0, 0
        running_loss = []
        for X, y in val_loader:
            output = model(X)
            predicted = predictions(output.data)
            y_true.append(y)
            y_pred.append(predicted)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            running_loss.append(criterion(output, y).item())
        val_loss = np.mean(running_loss)
        val_acc = correct / total
    stats.append([val_acc, val_loss, train_acc, train_loss])
    utils.log_cnn_training(epoch, stats)
    utils.update_cnn_training_plot(axes, epoch, stats)

def main():
    device = utils.get_device()

    # data loaders
    tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders(
        num_classes=config('challenge.num_classes'))

    model = Challenge().to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config('cnn.learning_rate'))
    #

    # Attempts to restore the latest checkpoint if exists
    print('Loading challenge...')
    model, start_epoch, stats = restore_checkpoint(model,
        config('challenge.checkpoint'))

    axes = utils.make_cnn_training_plot()

    # Evaluate model
    _evaluate_epoch(axes, tr_loader, va_loader, model, criterion, start_epoch, stats)

    # Loop over the entire dataset multiple times
    for epoch in range(start_epoch, config('challenge.num_epochs')):
        # Train model
        _train_epoch(tr_loader, model, criterion, optimizer)

        # Evaluate model
        _evaluate_epoch(axes, tr_loader, va_loader, model, criterion, epoch+1, stats)

        # Save model parameters
        save_checkpoint(model, epoch+1, config('challenge.checkpoint'), stats)

    print('Finished Training')

    # Keep plot open
    utils.hold_training_plot()

if __name__ == '__main__':
    utils.make_checkpoint_dir('./checkpoints/challenge/')
    main()

challenge_predict.py

'''
Challenge - Predict
    Runs the challenge model inference on the test dataset and saves the
    predictions to disk
    Usage: python predict_challenge.py
'''
import argparse
import numpy as np
import pandas as pd
from data import get_train_val_test_loaders
from challenge_model import Challenge
from utils import *
import utils

def predict_challenge(data_loader, model):
    """
    Runs the model inference on the test set and outputs the predictions
    """
    model_pred = np.array([])
    for i, (X, y) in enumerate(data_loader):
        output = model(X)
        predicted = predictions(output.data)
        predicted = predicted.cpu().numpy()
        model_pred = np.concatenate([model_pred, predicted])
    return model_pred

def main():
    device = utils.get_device()

    # data loaders
    _, _, te_loader, get_semantic_label = get_train_val_test_loaders(num_classes=config('challenge.num_classes'))

    # Attempts to restore the latest checkpoint if exists
    model = Challenge()
    model, _, _ = restore_checkpoint(model, config('challenge.checkpoint'))
    model = model.to(device)
    model.eval()
    
    # Evaluate model
    model_pred = predict_challenge(te_loader, model)

    print('saving challenge predictions...\\n')
    model_pred = [get_semantic_label(p) for p in model_pred]
    pd_writer = pd.DataFrame(model_pred, columns=['predictions'])
    pd_writer.to_csv('predictions.csv', index=False, header=False)

if __name__ == '__main__':
    main()

utils.py

"""
Utility functions
"""
import os
import numpy as np
import matplotlib.pyplot as plt
import itertools
import os
import torch

def config(attr):
    """
    Retrieves the queried attribute value from the config file. Loads the
    config file on first call.
    """
    if not hasattr(config, 'config'):
        with open('config.json') as f:
            config.config = eval(f.read())
    node = config.config
    for part in attr.split('.'):
        node = node[part]
    return node

def get_device():
    """Gets the available device for PyTorch."""
    # return torch.device("cpu")
    def _get_device():
        if torch.cuda.is_available():
            device = torch.device("cuda")
            print("Using GPU with CUDA")
        elif torch.backends.mps.is_available():
            device = torch.device("mps")
            print("Using GPU with MPS")
        else:
            device = torch.device("cpu")
            print("Using CPU")
        return device
    if not hasattr(get_device, 'device'):
        get_device.device = _get_device()
    return get_device.device

def denormalize_image(image):
    """ Rescale the image's color space from (min, max) to (0, 1) """
    ptp = np.max(image, axis=(0,1)) - np.min(image, axis=(0,1))
    return (image - np.min(image, axis=(0,1))) / ptp

def hold_training_plot():
    """
    Keep the program alive to display the training plot
    """
    plt.ioff()
    plt.show()

def log_cnn_training(epoch, stats):
    """
    Logs the validation accuracy and loss to the terminal
    """
    valid_acc, valid_loss, train_acc, train_loss = stats[-1]
    print('Epoch {}'.format(epoch))
    print('\\tValidation Loss: {}'.format(valid_loss))
    print('\\tValidation Accuracy: {}'.format(valid_acc))
    print('\\tTrain Loss: {}'.format(train_loss))
    print('\\tTrain Accuracy: {}'.format(train_acc))

def make_cnn_training_plot():
    """
    Runs the setup for an interactive matplotlib graph that logs the loss and
    accuracy
    """
    plt.ion()
    fig, axes = plt.subplots(1,2, figsize=(10,5))
    plt.suptitle('CNN Training')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Accuracy')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Loss')

    return axes

def update_cnn_training_plot(axes, epoch, stats):
    """
    Updates the training plot with a new data point for loss and accuracy
    """
    valid_acc = [s[0] for s in stats]
    valid_loss = [s[1] for s in stats]
    train_acc = [s[2] for s in stats]
    train_loss = [s[3] for s in stats]
    axes[0].plot(range(epoch - len(stats) + 1, epoch + 1), valid_acc,
        linestyle='--', marker='o', color='b')
    axes[0].plot(range(epoch - len(stats) + 1, epoch + 1), train_acc,
        linestyle='--', marker='o', color='r')
    axes[0].legend(['Validation', 'Train'])
    axes[1].plot(range(epoch - len(stats) + 1, epoch + 1), valid_loss,
        linestyle='--', marker='o', color='b')
    axes[1].plot(range(epoch - len(stats) + 1, epoch + 1), train_loss,
        linestyle='--', marker='o', color='r')
    axes[1].legend(['Validation', 'Train'])
    plt.pause(0.00001)

def save_cnn_training_plot():
    """
    Saves the training plot to a file
    """
    plt.savefig('cnn_training_plot.png', dpi=300)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def make_checkpoint_dir(checkpoint_dir):
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

def save_checkpoint(model, epoch, checkpoint_dir, stats):
    state = {
        'epoch': epoch,
        'state_dict': model.state_dict(),
        'stats': stats,
    }

    filename = os.path.join(checkpoint_dir,
        'epoch={}.checkpoint.pth.tar'.format(epoch))
    torch.save(state, filename)

def restore_checkpoint(model, checkpoint_dir, cuda=False, force=False,
    pretrain=False):
    """
    If a checkpoint exists, restores the PyTorch model from the checkpoint.
    Returns the model and the current epoch.
    """
    cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
        if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]

    if not cp_files:
        print('No saved model parameters found')
        if force:
            raise Exception("Checkpoint not found")
        else:
            return model, 0, []
    
    # Find latest epoch
    for i in itertools.count(1):
        if 'epoch={}.checkpoint.pth.tar'.format(i) in cp_files:
            epoch = i
        else:
            break

    if not force:
        print("Which epoch to load from? Choose in range [0, {}]."
            .format(epoch), "Enter 0 to train from scratch.")
        print(">> ", end='')
        inp_epoch = int(input())
        if inp_epoch not in range(epoch+1):
            raise Exception("Invalid epoch number")
        if inp_epoch == 0:
            print("Checkpoint not loaded")
            clear_checkpoint(checkpoint_dir)
            return model, 0, []
    else:
        print("Which epoch to load from? Choose in range [1, {}].".format(epoch))
        inp_epoch = int(input())
        if inp_epoch not in range(1, epoch+1):
            raise Exception("Invalid epoch number")

    filename = os.path.join(checkpoint_dir,
        'epoch={}.checkpoint.pth.tar'.format(inp_epoch))

    print("Loading from checkpoint {}?".format(filename))
    
    if cuda:
        checkpoint = torch.load(filename, weights_only=False)
    else:
        # Load GPU model on CPU
        checkpoint = torch.load(filename, weights_only=False,
            map_location=lambda storage, loc: storage)

    try:
        start_epoch = checkpoint['epoch']
        stats = checkpoint['stats']
        if pretrain:
            model.load_state_dict(checkpoint['state_dict'], strict=False)
        else:
            model.load_state_dict(checkpoint['state_dict'])
        print("=> Successfully restored checkpoint (trained for {} epochs)"
            .format(checkpoint['epoch']))
    except:
        print("=> Checkpoint not successfully restored")
        raise

    return model, inp_epoch, stats

def clear_checkpoint(checkpoint_dir):
    filelist = [ f for f in os.listdir(checkpoint_dir) if f.endswith(".pth.tar") ]
    for f in filelist:
        os.remove(os.path.join(checkpoint_dir, f))

    print("Checkpoint successfully removed")

def predictions(logits):
    """
    Given the network output, determines the predicted class index

    Returns:
        the predicted class output as a PyTorch Tensor
    """
    ## SOLUTION
    _, pred = torch.max(logits, 1)
    return pred
    ##