# **Deep learning for image analysis with PyTorch**

#### Fernando Cervantes, Systems Analyst I, Imaging Solutions, Research IT
#### fernando.cervantes@jax.org    (slack) @fernando.cervantes

## 6 Monitoring and logging the training process

It is important to track the training process. By doing that, we can detect interesting behavior of our network, possible failures, and even *overfitting*.<br>
This also helps to save the results of different experiments performed using distinct configurations.

### 6.1 _Logging the network performance_

In [3]:
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

cifar_data = CIFAR100(root=r'/home/cervaf/data', # '/mnt/data'
                             download=False,
                             train=True,
                             transform=ToTensor()
                            )

cifar_loader = DataLoader(cifar_data,
                              batch_size=128,
                              shuffle=True,
                              pin_memory=True
                             )

In [4]:
import torch
import torch.nn as nn


class LeNet(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        """
        Always call the initialization function from the nn.Module parent class.
        This way all parameters from the operations defined as members of *this* class are tracked for their optimization.
        """
        super(LeNet, self).__init__()
        
        self.conv_1 = nn.Conv2d(in_channels=in_channels, out_channels=6, kernel_size=5)
        self.sub_1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv_2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.sub_2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc_1 = nn.Linear(in_features=5*5*16, out_features=120)
        self.fc_2 = nn.Linear(in_features=120, out_features=84)
        self.fc_3 = nn.Linear(in_features=84, out_features=num_classes)
        
        self.act_fn = nn.ReLU()

    def forward(self, x):
        # Apply convolution layers to extract feature maps with image context
        fx = self.act_fn(self.conv_1(x))
        fx = self.sub_1(fx)
        
        fx = self.act_fn(self.conv_2(fx))
        fx = self.sub_2(fx)
        
        # Flatten the feature maps to perform linear operations
        fx = fx.view(-1, 16*5*5)
        
        fx = self.act_fn(self.fc_1(fx))
        fx = self.act_fn(self.fc_2(fx))
        y = self.fc_3(fx)
        
        return y

net = LeNet(in_channels=3, num_classes=100)

criterion = nn.CrossEntropyLoss()

net.cuda()
criterion.cuda()

CrossEntropyLoss()

In [5]:
import torch.optim as optim

optimizer = optim.Adam(
    params=net.parameters(),
    lr=1e-3
)

***
Now that we have set up our experiment, lets create a summary writer for our training stage

In [6]:
from torch.utils.tensorboard import SummaryWriter

Create a summary writter using TensorBoard

In [7]:
writer = SummaryWriter('runs/LR_0_001_BATCH_128')

In [15]:
net.train()

for e  in range(10):
    avg_loss = 0
    avg_acc = 0
    
    for i, (x, t) in enumerate(cifar_loader):
        optimizer.zero_grad()

        x = x.cuda()
        t = t.cuda()
        
        y = net(x)

        loss = criterion(y, t)

        loss.backward()
        
        curr_acc = torch.sum(y.argmax(dim=1) == t)
        
        avg_loss += loss.item()
        avg_acc += curr_acc
        
        optimizer.step()

        writer.add_scalar('training loss', loss.item(), e * len(cifar_loader) + i)
        writer.add_scalar('training acc', curr_acc / x.size(0), e * len(cifar_loader) + i)

    avg_loss = avg_loss / len(cifar_loader)
    avg_acc = avg_acc / len(cifar_data)
    writer.add_scalar('training loss', loss.item(), e)
    writer.add_scalar('training loss', loss.item(), e)
    

In [14]:
torch.save(net.state_dict(), 'lenet_700epochs_20220519.pth')