In this assignment, we built a deep learning system to help a fictional kennel owner, Ken, identify and classify lost dogs from 10 different breeds using image recognition.
The dataset includes 12,775 color images (64×64×3) with breed labels. We had to build and evaluate convolutional neural networks (CNNs) to classify the first 5 dog breeds breed of a dog based on input images.
challenge_model.py
'''
Challenge - Model
Constructs a pytorch model for a convolutional neural network
'''
import torch
import torch.nn as nn
class Challenge(nn.Module):
def __init__(self):
super(Challenge, self).__init__()
self.upsample = nn.Upsample(size=(128, 128))# Upsample: 32x32x3 -> 128x128x3
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),# Conv1: 128x128x3 -> 31x31x64
nn.LeakyReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),# MaxPool1: 31x31x64 -> 15x15x64
nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),# Conv2: 15x15x64 -> 15x15x192
nn.LeakyReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),# MaxPool2: 15x15x192 -> 7x7x192
nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),# Conv3: 7x7x192 -> 7x7x384
nn.LeakyReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),# Conv4: 7x7x384 -> 7x7x256
nn.LeakyReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),# Conv5: 7x7x256 -> 7x7x256
nn.LeakyReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2)# MaxPool3: 7x7x256 -> 3x3x256
) # Flatten: 3x3x256 = 2304 features per sample.
# Classifier: 2304 -> 512 -> 1024 -> 5
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(2304, 512), # 2304 -> 512
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, 1024), # 512 -> 1024
nn.ReLU(inplace=True),
nn.Linear(1024, 5) # 1024 -> num_classes
)
def forward(self, x):
x = self.upsample(x) # 32x32x3 -> 128x128x3
x = self.features(x) # -> 3x3x256
x = torch.flatten(x, start_dim=1) # -> 2304
x = self.classifier(x) # -> 5
return x
challenge_train.py
'''
Challenge - Train
Trains a neural network to classify images
Periodically outputs training information, and saves model checkpoints
Usage: python train_challenge.py
'''
import torch
import numpy as np
import utils
from data import get_train_val_test_loaders
from challenge_model import Challenge
from utils import *
def _train_epoch(data_loader, model, criterion, optimizer):
"""
Train the `model` for one epoch of data from `data_loader`
Use `optimizer` to optimize the specified `criterion`
"""
for i, (X, y) in enumerate(data_loader):
# clear parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
output = model(X)
loss = criterion(output, y)
loss.backward()
optimizer.step()
def _evaluate_epoch(axes, tr_loader, val_loader, model, criterion, epoch, stats):
with torch.no_grad():
y_true, y_pred = [], []
correct, total = 0, 0
running_loss = []
for X, y in tr_loader:
output = model(X)
predicted = predictions(output.data)
y_true.append(y)
y_pred.append(predicted)
total += y.size(0)
correct += (predicted == y).sum().item()
running_loss.append(criterion(output, y).item())
train_loss = np.mean(running_loss)
train_acc = correct / total
with torch.no_grad():
y_true, y_pred = [], []
correct, total = 0, 0
running_loss = []
for X, y in val_loader:
output = model(X)
predicted = predictions(output.data)
y_true.append(y)
y_pred.append(predicted)
total += y.size(0)
correct += (predicted == y).sum().item()
running_loss.append(criterion(output, y).item())
val_loss = np.mean(running_loss)
val_acc = correct / total
stats.append([val_acc, val_loss, train_acc, train_loss])
utils.log_cnn_training(epoch, stats)
utils.update_cnn_training_plot(axes, epoch, stats)
def main():
device = utils.get_device()
# data loaders
tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders(
num_classes=config('challenge.num_classes'))
model = Challenge().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config('cnn.learning_rate'))
#
# Attempts to restore the latest checkpoint if exists
print('Loading challenge...')
model, start_epoch, stats = restore_checkpoint(model,
config('challenge.checkpoint'))
axes = utils.make_cnn_training_plot()
# Evaluate model
_evaluate_epoch(axes, tr_loader, va_loader, model, criterion, start_epoch, stats)
# Loop over the entire dataset multiple times
for epoch in range(start_epoch, config('challenge.num_epochs')):
# Train model
_train_epoch(tr_loader, model, criterion, optimizer)
# Evaluate model
_evaluate_epoch(axes, tr_loader, va_loader, model, criterion, epoch+1, stats)
# Save model parameters
save_checkpoint(model, epoch+1, config('challenge.checkpoint'), stats)
print('Finished Training')
# Keep plot open
utils.hold_training_plot()
if __name__ == '__main__':
utils.make_checkpoint_dir('./checkpoints/challenge/')
main()
challenge_predict.py
'''
Challenge - Predict
Runs the challenge model inference on the test dataset and saves the
predictions to disk
Usage: python predict_challenge.py
'''
import argparse
import numpy as np
import pandas as pd
from data import get_train_val_test_loaders
from challenge_model import Challenge
from utils import *
import utils
def predict_challenge(data_loader, model):
"""
Runs the model inference on the test set and outputs the predictions
"""
model_pred = np.array([])
for i, (X, y) in enumerate(data_loader):
output = model(X)
predicted = predictions(output.data)
predicted = predicted.cpu().numpy()
model_pred = np.concatenate([model_pred, predicted])
return model_pred
def main():
device = utils.get_device()
# data loaders
_, _, te_loader, get_semantic_label = get_train_val_test_loaders(num_classes=config('challenge.num_classes'))
# Attempts to restore the latest checkpoint if exists
model = Challenge()
model, _, _ = restore_checkpoint(model, config('challenge.checkpoint'))
model = model.to(device)
model.eval()
# Evaluate model
model_pred = predict_challenge(te_loader, model)
print('saving challenge predictions...\\n')
model_pred = [get_semantic_label(p) for p in model_pred]
pd_writer = pd.DataFrame(model_pred, columns=['predictions'])
pd_writer.to_csv('predictions.csv', index=False, header=False)
if __name__ == '__main__':
main()
"""
Utility functions
"""
import os
import numpy as np
import matplotlib.pyplot as plt
import itertools
import os
import torch
def config(attr):
"""
Retrieves the queried attribute value from the config file. Loads the
config file on first call.
"""
if not hasattr(config, 'config'):
with open('config.json') as f:
config.config = eval(f.read())
node = config.config
for part in attr.split('.'):
node = node[part]
return node
def get_device():
"""Gets the available device for PyTorch."""
# return torch.device("cpu")
def _get_device():
if torch.cuda.is_available():
device = torch.device("cuda")
print("Using GPU with CUDA")
elif torch.backends.mps.is_available():
device = torch.device("mps")
print("Using GPU with MPS")
else:
device = torch.device("cpu")
print("Using CPU")
return device
if not hasattr(get_device, 'device'):
get_device.device = _get_device()
return get_device.device
def denormalize_image(image):
""" Rescale the image's color space from (min, max) to (0, 1) """
ptp = np.max(image, axis=(0,1)) - np.min(image, axis=(0,1))
return (image - np.min(image, axis=(0,1))) / ptp
def hold_training_plot():
"""
Keep the program alive to display the training plot
"""
plt.ioff()
plt.show()
def log_cnn_training(epoch, stats):
"""
Logs the validation accuracy and loss to the terminal
"""
valid_acc, valid_loss, train_acc, train_loss = stats[-1]
print('Epoch {}'.format(epoch))
print('\\tValidation Loss: {}'.format(valid_loss))
print('\\tValidation Accuracy: {}'.format(valid_acc))
print('\\tTrain Loss: {}'.format(train_loss))
print('\\tTrain Accuracy: {}'.format(train_acc))
def make_cnn_training_plot():
"""
Runs the setup for an interactive matplotlib graph that logs the loss and
accuracy
"""
plt.ion()
fig, axes = plt.subplots(1,2, figsize=(10,5))
plt.suptitle('CNN Training')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
return axes
def update_cnn_training_plot(axes, epoch, stats):
"""
Updates the training plot with a new data point for loss and accuracy
"""
valid_acc = [s[0] for s in stats]
valid_loss = [s[1] for s in stats]
train_acc = [s[2] for s in stats]
train_loss = [s[3] for s in stats]
axes[0].plot(range(epoch - len(stats) + 1, epoch + 1), valid_acc,
linestyle='--', marker='o', color='b')
axes[0].plot(range(epoch - len(stats) + 1, epoch + 1), train_acc,
linestyle='--', marker='o', color='r')
axes[0].legend(['Validation', 'Train'])
axes[1].plot(range(epoch - len(stats) + 1, epoch + 1), valid_loss,
linestyle='--', marker='o', color='b')
axes[1].plot(range(epoch - len(stats) + 1, epoch + 1), train_loss,
linestyle='--', marker='o', color='r')
axes[1].legend(['Validation', 'Train'])
plt.pause(0.00001)
def save_cnn_training_plot():
"""
Saves the training plot to a file
"""
plt.savefig('cnn_training_plot.png', dpi=300)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def make_checkpoint_dir(checkpoint_dir):
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
def save_checkpoint(model, epoch, checkpoint_dir, stats):
state = {
'epoch': epoch,
'state_dict': model.state_dict(),
'stats': stats,
}
filename = os.path.join(checkpoint_dir,
'epoch={}.checkpoint.pth.tar'.format(epoch))
torch.save(state, filename)
def restore_checkpoint(model, checkpoint_dir, cuda=False, force=False,
pretrain=False):
"""
If a checkpoint exists, restores the PyTorch model from the checkpoint.
Returns the model and the current epoch.
"""
cp_files = [file_ for file_ in os.listdir(checkpoint_dir)
if file_.startswith('epoch=') and file_.endswith('.checkpoint.pth.tar')]
if not cp_files:
print('No saved model parameters found')
if force:
raise Exception("Checkpoint not found")
else:
return model, 0, []
# Find latest epoch
for i in itertools.count(1):
if 'epoch={}.checkpoint.pth.tar'.format(i) in cp_files:
epoch = i
else:
break
if not force:
print("Which epoch to load from? Choose in range [0, {}]."
.format(epoch), "Enter 0 to train from scratch.")
print(">> ", end='')
inp_epoch = int(input())
if inp_epoch not in range(epoch+1):
raise Exception("Invalid epoch number")
if inp_epoch == 0:
print("Checkpoint not loaded")
clear_checkpoint(checkpoint_dir)
return model, 0, []
else:
print("Which epoch to load from? Choose in range [1, {}].".format(epoch))
inp_epoch = int(input())
if inp_epoch not in range(1, epoch+1):
raise Exception("Invalid epoch number")
filename = os.path.join(checkpoint_dir,
'epoch={}.checkpoint.pth.tar'.format(inp_epoch))
print("Loading from checkpoint {}?".format(filename))
if cuda:
checkpoint = torch.load(filename, weights_only=False)
else:
# Load GPU model on CPU
checkpoint = torch.load(filename, weights_only=False,
map_location=lambda storage, loc: storage)
try:
start_epoch = checkpoint['epoch']
stats = checkpoint['stats']
if pretrain:
model.load_state_dict(checkpoint['state_dict'], strict=False)
else:
model.load_state_dict(checkpoint['state_dict'])
print("=> Successfully restored checkpoint (trained for {} epochs)"
.format(checkpoint['epoch']))
except:
print("=> Checkpoint not successfully restored")
raise
return model, inp_epoch, stats
def clear_checkpoint(checkpoint_dir):
filelist = [ f for f in os.listdir(checkpoint_dir) if f.endswith(".pth.tar") ]
for f in filelist:
os.remove(os.path.join(checkpoint_dir, f))
print("Checkpoint successfully removed")
def predictions(logits):
"""
Given the network output, determines the predicted class index
Returns:
the predicted class output as a PyTorch Tensor
"""
## SOLUTION
_, pred = torch.max(logits, 1)
return pred
##