diff --git a/docs/capsule_networks/index.html b/docs/capsule_networks/index.html index 20c6028e..03e752f0 100644 --- a/docs/capsule_networks/index.html +++ b/docs/capsule_networks/index.html @@ -84,7 +84,7 @@ it is difficult to understand some of the concepts with just the modules. confusions I had with the paper.

Here’s a notebook for training a Capsule Network on MNIST dataset.

Open In Colab -View Run

+View Run

33import torch.nn as nn
diff --git a/docs/gan/cycle_gan.html b/docs/gan/cycle_gan.html
index 5067a351..538c6099 100644
--- a/docs/gan/cycle_gan.html
+++ b/docs/gan/cycle_gan.html
@@ -89,7 +89,7 @@ The discriminators test whether the generated images look real.

This file contains the model code as well as the training code. We also have a Google Colab notebook.

Open In Colab -View Run

+View Run

36import itertools
diff --git a/docs/hypernetworks/hyper_lstm.html b/docs/hypernetworks/hyper_lstm.html
index ebc7d1c4..2196d261 100644
--- a/docs/hypernetworks/hyper_lstm.html
+++ b/docs/hypernetworks/hyper_lstm.html
@@ -80,7 +80,7 @@ by David Ha gives a good explanation of HyperNetworks.

We have an experiment that trains a HyperLSTM to predict text on Shakespeare dataset. Here’s the link to code: experiment.py

Open In Colab -View Run

+View Run

HyperNetworks use a smaller network to generate weights of a larger network. There are two variants: static hyper-networks and dynamic hyper-networks. Static HyperNetworks have smaller networks that generate weights (kernels) diff --git a/docs/index.html b/docs/index.html index bf660b38..ed60b1fa 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,20 +7,20 @@ - + - + - + - LabML Neural Networks + labml.ai Neural Networks @@ -70,7 +70,7 @@

-

LabML Neural Networks

+

labml.ai Neural Networks

This is a collection of simple PyTorch implementations of neural networks and related algorithms. These implementations are documented with explanations, @@ -139,7 +139,7 @@ implementations.

author = {Varuna Jayasiri, Nipun Wijerathne}, title = {LabML: A library to organize machine learning experiments}, year = {2020}, - url = {https://lab-ml.com/}, + url = {https://nn.labml.ai/}, }
diff --git a/docs/normalization/batch_norm/index.html b/docs/normalization/batch_norm/index.html index 05ecbeda..5a7cdd58 100644 --- a/docs/normalization/batch_norm/index.html +++ b/docs/normalization/batch_norm/index.html @@ -138,7 +138,7 @@ mean and variance during the training phase and use that for inference.

Here’s the training code and a notebook for training a CNN classifier that uses batch normalization for MNIST dataset.

Open In Colab -View Run

+View Run

98import torch
diff --git a/docs/normalization/batch_norm/readme.html b/docs/normalization/batch_norm/readme.html
index 9aa5f800..8d490274 100644
--- a/docs/normalization/batch_norm/readme.html
+++ b/docs/normalization/batch_norm/readme.html
@@ -138,7 +138,7 @@ mean and variance during the training phase and use that for inference.

Here’s the training code and a notebook for training a CNN classifier that uses batch normalization for MNIST dataset.

Open In Colab -View Run

+View Run

diff --git a/docs/optimizers/amsgrad.html b/docs/optimizers/amsgrad.html index fb17e895..37925234 100644 --- a/docs/optimizers/amsgrad.html +++ b/docs/optimizers/amsgrad.html @@ -488,7 +488,7 @@ The optimal solution is $x = -1$.

#

Run the synthetic experiment is Adam. -Here are the results. +Here are the results. You can see that Adam converges at $x = +1$

@@ -501,7 +501,7 @@ You can see that Adam converges at $x = +1$

#

Run the synthetic experiment is AMSGrad -Here are the results. +Here are the results. You can see that AMSGrad converges to true optimal $x = -1$

diff --git a/docs/resnets/index.html b/docs/resnets/index.html new file mode 100644 index 00000000..b90ff0a6 --- /dev/null +++ b/docs/resnets/index.html @@ -0,0 +1,102 @@ + + + + + + + + + + + + + + + + + + + + + + + None + + + + + + + + +
+
+
+
+

+ home + resnets +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + + + + \ No newline at end of file diff --git a/docs/resnets/models/index.html b/docs/resnets/models/index.html new file mode 100644 index 00000000..65187660 --- /dev/null +++ b/docs/resnets/models/index.html @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + None + + + + + + + + +
+
+
+
+

+ home + resnets + models +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/models/mlp.html b/docs/resnets/models/mlp.html new file mode 100644 index 00000000..c105f42d --- /dev/null +++ b/docs/resnets/models/mlp.html @@ -0,0 +1,322 @@ + + + + + + + + + + + + + + + + + + + + + + + mlp.py + + + + + + + + +
+
+
+
+

+ home + resnets + models +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + +
+
+
3import torch
+4import torch.nn as nn
+
+
+
+
+ + +
+
+
6class MLP(nn.Module):
+
+
+
+
+ + +
+
+
7    def __init__(self
+8            , in_features
+9            , out_features
+10            , hidden_layers
+11            , actv_func
+12            , pre_module_list=None
+13            , use_dropout=False
+14            , use_batch_norm=False
+15            , use_softmax=True
+16            , device="cpu"
+17            ):
+18        super(MLP, self).__init__()
+19
+20        self.in_features = in_features
+21        self.out_features = out_features
+22        self.num_hidden_layers = len(hidden_layers)
+23        self.hidden_layers = hidden_layers
+24        self.use_dropout = use_dropout
+25        self.use_batch_norm = use_batch_norm
+26        self.actv_func = actv_func
+27        self.use_softmax = use_softmax
+28
+29        self.device = device
+
+
+
+
+ +

Add on to another model

+
+
+
32        if pre_module_list:
+33            self.module_list = pre_module_list
+34        else:
+35            self.module_list = nn.ModuleList()
+36
+37        self.build_()
+
+
+
+
+ +

Send to gpu

+
+
+
40        self.to(self.device)
+
+
+
+
+ + +
+
+
42    def build_(self):
+
+
+
+
+ +

Activation Functions for Fully connected layers # +Start with input dimensions

+
+
+
45        dim = self.in_features
+46        for i in range(self.num_hidden_layers):
+
+
+
+
+ +

Create a fully connected layer between the last layer + and the current hidden layer

+
+
+
49            self.module_list.append(nn.Linear(dim, self.hidden_layers[i]))
+
+
+
+
+ +

Update the current dimension

+
+
+
51            dim = self.hidden_layers[i]
+52
+53            if self.use_batch_norm:
+54                self.module_list.append( nn.BatchNorm1d(dim, affine=True) )
+
+
+
+
+ +

Add the Activation function

+
+
+
57            self.module_list.append( self.GetActivation(name=self.actv_func[i]) )
+58
+59            if self.use_dropout:
+60                self.module_list.append( nn.Dropout(p=0.10) )
+
+
+
+
+ +

Fully connect to output dimensions

+
+
+
63        if dim != self.out_features:
+64            self.module_list.append( nn.Linear(dim, self.out_features) )
+
+
+
+
+ + +
+
+
67    def forward(self, x):
+
+
+
+
+ +

Flatten the 2d image into 1d +Also convert into float for FC layer

+
+
+
70        x = torch.flatten(x.float(), start_dim=1)
+
+
+
+
+ +

Apply each layer in the module list

+
+
+
73        for i in range( len(self.module_list) ):
+74            x = self.module_list[i](x)
+75
+76        return x
+
+
+
+
+ + +
+
+
78    def GetActivation(self, name="relu"):
+79        if name == "relu":
+80            return nn.ReLU()
+81        elif name == "leakyrelu":
+82            return nn.LeakyReLU()
+83        elif name == "Sigmoid":
+84            return nn.Sigmoid()
+85        elif name == "Tanh":
+86            return nn.Tanh()
+87        elif name == "Identity":
+88            return nn.Identity()
+89        else:
+90            return nn.ReLU()
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/models/resnet.html b/docs/resnets/models/resnet.html new file mode 100644 index 00000000..1c068168 --- /dev/null +++ b/docs/resnets/models/resnet.html @@ -0,0 +1,549 @@ + + + + + + + + + + + + + + + + + + + + + + + resnet.py + + + + + + + + +
+
+
+
+

+ home + resnets + models +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + +
+
+
2import torch
+3import torch.nn as nn
+4import torchvision
+5import torchvision.transforms as transforms
+6import torch.optim as optim
+7from torchsummary import summary
+
+
+
+
+ +

custom import

+
+
+
10import numpy as np
+11import time
+12import os
+
+
+
+
+ +

ResBlock

+
+
+
16class ResBlock(nn.Module):
+
+
+
+
+ + +
+
+
17    def __init__(self, num_features, use_batch_norm=False):
+18        super(ResBlock, self).__init__()
+19        self.num_features = num_features
+20        self.conv_layer1 = nn.Conv2d(num_features, num_features,  kernel_size=3, stride=1, padding=1)
+21        self.relu_layer = nn.ReLU()
+22        self.conv_layer2 = nn.Conv2d(num_features, num_features, kernel_size=3, stride=1, padding=1)
+23
+24        self.use_batch_norm = use_batch_norm
+25        if self.use_batch_norm:
+26            self.batch_norm_layer1 = nn.BatchNorm2d(self.num_features)
+27            self.batch_norm_layer2 = nn.BatchNorm2d(self.num_features)
+28
+29        for m in self.modules():
+30            if isinstance(m, nn.Conv2d):
+31                nn.init.kaiming_normal_(m.weight)
+
+
+
+
+ +

nn.init.xavier_uniform_(m.weight)

+
+
+
+
+
+
+
+ + +
+
+
34    def forward(self, x):
+35        residual = x
+36        x = self.conv_layer1(x)
+37        if self.use_batch_norm:
+38            x = self.batch_norm_layer1(x)
+39
+40        x = self.relu_layer(x)
+41        x = self.conv_layer2(x)
+42        if self.use_batch_norm:
+43            x = self.batch_norm_layer2(x)
+44
+45        x += residual
+46        x = self.relu_layer(x)
+47        return x
+
+
+
+
+ +

ResNet

+
+
+
50class ResNet(nn.Module):
+
+
+
+
+ + +
+
+
51    def __init__(self, in_features, num_class, feature_channel_list, batch_norm= False, num_stacks=1, zero_init_residual=True):
+52        super(ResNet, self).__init__()
+53        self.in_features = in_features
+54        self.num_in_channel = in_features[2]
+55        self.num_class = num_class
+56        self.feature_channel_list = feature_channel_list
+57        self.num_residual_blocks = len(self.feature_channel_list)
+58        self.num_stacks = num_stacks
+59        self.batch_norm = batch_norm
+60        self.shape_list = []
+61        self.shape_list.append(in_features)
+62        self.module_list = nn.ModuleList()
+63        self.zero_init_residual= zero_init_residual
+64        self.build_()
+
+
+
+
+ + +
+
+
66    def build_(self):
+
+
+
+
+ +

track filter shape

+
+
+
68        cur_shape = self.GetCurShape()
+69        cur_shape = self.CalcConvOutShape(cur_shape, kernel_size=7, padding=1, stride=2, out_filters= self.feature_channel_list[0])
+70        self.shape_list.append(cur_shape)
+71
+72        if len(self.in_features) == 2:
+73            in_channels = 1
+74        else:
+75            in_channels = self.in_features[2]
+
+
+
+
+ +

First Conv layer 7x7 stride=2, pad =1

+
+
+
78        self.module_list.append(nn.Conv2d(in_channels= in_channels,
+79                                    out_channels= self.feature_channel_list[0],
+80                                    kernel_size=7,
+81                                    stride=2,
+82                                    padding=3))
+
+
+
+
+ +

batch norm

+
+
+
86        if self.batch_norm: #batch_norm
+87            self.module_list.append(nn.BatchNorm2d(self.feature_channel_list[0]))
+
+
+
+
+ +

ReLU()

+
+
+
90        self.module_list.append(nn.ReLU())
+91
+92        for i in range(self.num_residual_blocks-1):
+93            in_size = self.feature_channel_list[i]
+94            out_size = self.feature_channel_list[i+1]
+95
+96            res_block = ResBlock(in_size, use_batch_norm=True)
+
+
+
+
+ +

Stacking Residual blocks

+
+
+
99            for num in range(self.num_stacks):
+100                self.module_list.append(res_block)
+
+
+
+
+ +

Intermediate Conv and ReLU()

+
+
+
103            self.module_list.append(nn.Conv2d(in_channels=in_size,
+104                                              out_channels= out_size,
+105                                              kernel_size=3,
+106                                              padding=1,
+107                                              stride=2))
+
+
+
+
+ +

track filter shape

+
+
+
110            cur_shape = self.CalcConvOutShape(cur_shape, kernel_size=3, padding=1,
+111                                         stride=2, out_filters=out_size)
+112
+113            self.shape_list.append(cur_shape)
+
+
+
+
+ +

batch norm

+
+
+
116            if self.batch_norm:  # batch_norm
+117                self.module_list.append(nn.BatchNorm2d(out_size))
+118
+119            self.module_list.append(nn.ReLU())
+
+
+
+
+ +

print(“shape list”, self.shape_list)

+
+
+
+
+
+
+
+ +

TODO include in the main loop +Last Residual block

+
+
+
125        res_block = ResBlock(out_size, use_batch_norm=True)
+126        for num in range(self.num_stacks):
+127            self.module_list.append(res_block)
+
+
+
+
+ +

Last AvgPool layer +self.module_list.append(nn.AvgPool2d(kernel_size=2, stride=2, padding=0))

+
+
+
131        self.module_list.append(nn.MaxPool2d(kernel_size=2, stride=2, padding=0))
+
+
+
+
+ +

track filter shape

+
+
+
134        cur_shape = self.CalcConvOutShape(cur_shape, kernel_size=2, padding=0, stride=2, out_filters=out_size)
+135        self.shape_list.append(cur_shape)
+136
+137        s = self.GetCurShape()
+138        in_features = s[0] * s[1] * s[2]
+
+
+
+
+ +

Initialization

+
+
+
141        for m in self.modules():
+142            if isinstance(m, nn.Conv2d):
+143                nn.init.kaiming_normal_(m.weight)
+
+
+
+
+ +

nn.init.xavier_uniform_(m.weight)

+
+
+
+
+
+
+
+ +

if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, ResBlock): + nn.init.constant_(m.batch_norm_layer1.weight, 0) + nn.init.constant_(m.batch_norm_layer2.weight, 0)

+
+
+
+
+
+
+
+ + +
+
+
152    def GetCurShape(self):
+153        return self.shape_list[-1]
+
+
+
+
+ + +
+
+
155    def CalcConvFormula(self, W, K, P, S):
+156        return int(np.floor(((W - K + 2 * P) / S) + 1))
+
+
+
+
+ +

https://stackoverflow.com/questions/53580088/calculate-the-output-size-in-convolution-layer +Calculate the output shape after applying a convolution

+
+
+
160    def CalcConvOutShape(self, in_shape, kernel_size, padding, stride, out_filters):
+
+
+
+
+ +

Multiple options for different kernel shapes

+
+
+
162        if type(kernel_size) == int:
+163            out_shape = [self.CalcConvFormula(in_shape[i], kernel_size, padding, stride) for i in range(2)]
+164        else:
+165            out_shape = [self.CalcConvFormula(in_shape[i], kernel_size[i], padding, stride) for i in range(2)]
+166
+167        return (out_shape[0], out_shape[1], out_filters)  # , batch_size... but not necessary.
+
+
+
+
+ + +
+
+
169    def AddMLP(self, MLP):
+170        if MLP:
+171            self.module_list.append(MLP)
+
+
+
+
+ +

def MLP(self, in_features, num_classes, use_batch_norm=False, use_dropout=False, use_softmax=False): + return nn.ReLU(nn.Linear(in_features, num_classes))

+
+
+
+
+
+
+
+ + +
+
+
176    def forward(self, x):
+177        for mod_name in self.module_list:
+178            x = mod_name(x)
+179        x = x.view(x.size(0), -1)  # flat #TODO check if it works
+180        return x
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/pretrained_nets.html b/docs/resnets/pretrained_nets.html new file mode 100644 index 00000000..2c4afcf7 --- /dev/null +++ b/docs/resnets/pretrained_nets.html @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + + + + + + + + + + pretrained_nets.py + + + + + + + + +
+
+
+
+

+ home + resnets +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + +
+
+
3from utils.train import Trainer # Default custom training class
+4from models.resnet import *
+5from torchvision import models
+
+
+
+
+ +

GPU Check

+
+
+
8device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+9print("Device:  " + str(device))
+
+
+
+
+ +

Use different train/test data augmentations

+
+
+
12transform_test = transforms.Compose(
+13        [transforms.ToTensor(),
+14         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+
+
+
+ +

Get Cifar 10 Datasets

+
+
+
17save='./data/Cifar10'
+18transform_train = transforms.Compose([
+19        transforms.RandomHorizontalFlip(p=1.0),
+20        transforms.RandomRotation(20),
+21        transforms.RandomCrop(32, (2, 2), pad_if_needed=False, padding_mode='constant'),
+22        transforms.ToTensor(),
+23        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+
+
+
+ +

Get Cifar 10 Datasets

+
+
+
26trainset = torchvision.datasets.CIFAR10(root=save, train=True, download=True, transform=transform_train)
+27testset = torchvision.datasets.CIFAR10(root=save, train=False, download=True, transform=transform_test)
+
+
+
+
+ +

Get Cifar 10 Dataloaders

+
+
+
30trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
+31                                          shuffle=True, num_workers=4)
+32
+33testloader = torch.utils.data.DataLoader(testset, batch_size=64,
+34                                         shuffle=False, num_workers=4)
+
+
+
+
+ +
+

Load the pre-trained model

+
+
+
+
40model_ft = models.resnet18(pretrained=True)
+41num_ftrs = model_ft.fc.in_features
+42model_ft.fc = nn.Sequential(
+43    nn.Dropout(0.5),
+44    nn.Linear(num_ftrs, 10)
+45)
+46
+47
+48model_ft = model_ft.to(device)
+
+
+
+
+ +

Loss function

+
+
+
51cost = nn.CrossEntropyLoss()
+
+
+
+
+ +

Optimizer

+
+
+
54lr = 0.0005
+
+
+
+
+ +

opt = optim.SGD(model_ft.parameters(), lr=lr, momentum=0.9)

+
+
+
56opt = torch.optim.Adam(model_ft.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=1e-4) #0.0005 l2_factor.item()
+
+
+
+
+ +

Create a trainer

+
+
+
59trainer = Trainer(model_ft, opt, cost, name="Transfer-learning",lr=lr , use_lr_schedule=True, device=device)
+
+
+
+
+ +

Run training

+
+
+
62epochs = 25
+63trainer.Train(trainloader, epochs, testloader=testloader)
+
+
+
+
+ +

trainer.Train(trainloader, epochs) # check train error

+
+
+
66print('done')
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/resnet_net.html b/docs/resnets/resnet_net.html new file mode 100644 index 00000000..ee261ff3 --- /dev/null +++ b/docs/resnets/resnet_net.html @@ -0,0 +1,271 @@ + + + + + + + + + + + + + + + + + + + + + + + resnet_net.py + + + + + + + + +
+
+
+
+

+ home + resnets +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ +

Custom classes

+
+
+
4from models.mlp import MLP
+5from utils.train import Trainer
+6from models.resnet import *
+
+
+
+
+ +

GPU Check

+
+
+
9device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+10print("Device:  " + str(device))
+
+
+
+
+ +

Use different train/test data augmentations

+
+
+
13transform_test = transforms.Compose(
+14        [transforms.ToTensor(),
+15         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+16
+17transform_train = transforms.Compose([
+18        transforms.RandomHorizontalFlip(p=1.0),
+19        transforms.RandomRotation(20),
+20        transforms.RandomCrop(32, (2, 2), pad_if_needed=False, padding_mode='constant'),
+21        transforms.ToTensor(),
+22        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+
+
+
+ +

Get Cifar 10 Datasets

+
+
+
26save='./data/Cifar10'
+27trainset = torchvision.datasets.CIFAR10(root=save, train=True, download=True, transform=transform_train)
+28testset = torchvision.datasets.CIFAR10(root=save, train=False, download=True, transform=transform_test)
+
+
+
+
+ +

Get Cifar 10 Dataloaders

+
+
+
31trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
+32                                          shuffle=True, num_workers=4)
+33
+34testloader = torch.utils.data.DataLoader(testset, batch_size=64, 
+35                                         shuffle=False, num_workers=4)
+36
+37epochs = 50
+
+
+
+
+ +
+

Create the assignment Resnet (part a)

+
+
+
+
42def MyResNet():
+43    resnet = ResNet(in_features= [32, 32, 3],
+44                    num_class=10,
+45                    feature_channel_list = [128, 256, 512],
+46                    batch_norm= True,
+47                    num_stacks=1
+48                    )
+
+
+
+
+ +

Create MLP +Calculate the input shape

+
+
+
52    s = resnet.GetCurShape()
+53    in_features = s[0]*s[1]*s[2]
+54
+55    mlp = MLP(in_features,
+56                 10,
+57                 [], #512, 1024, 512
+58                 [],
+59                 use_batch_norm=False,
+60                 use_dropout=False,
+61                 use_softmax=False,
+62                 device=device)
+63
+64    resnet.AddMLP(mlp)
+65    return resnet
+66
+67model = MyResNet()
+68model.to(device=device)
+69summary(model, (3, 32,32))
+
+
+
+
+ +

Optimizer

+
+
+
72opt = torch.optim.Adam(model.parameters(), lr=0.0005, betas=(0.9, 0.95), weight_decay=1e-8) #0.0005 l2_factor.item()
+
+
+
+
+ +

Loss function

+
+
+
75cost = nn.CrossEntropyLoss()
+
+
+
+
+ +

Create a trainer

+
+
+
78trainer = Trainer(model, opt, cost, name="MyResNet", device=device, use_lr_schedule =True)
+
+
+
+
+ +

Run training

+
+
+
81trainer.Train(trainloader, epochs, testloader=testloader)
+82
+83print('done')
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/utils/index.html b/docs/resnets/utils/index.html new file mode 100644 index 00000000..22ebf200 --- /dev/null +++ b/docs/resnets/utils/index.html @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + None + + + + + + + + +
+
+
+
+

+ home + resnets + utils +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/utils/labelsmoothing.html b/docs/resnets/utils/labelsmoothing.html new file mode 100644 index 00000000..f651c6c6 --- /dev/null +++ b/docs/resnets/utils/labelsmoothing.html @@ -0,0 +1,157 @@ + + + + + + + + + + + + + + + + + + + + + + + labelsmoothing.py + + + + + + + + +
+
+
+
+

+ home + resnets + utils +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + +
+
+
1import torch.nn.functional as F
+2from torch import nn
+
+
+
+
+ + +
+
+
4class LabelSmoothingLoss(nn.Module):
+
+
+
+
+ + +
+
+
5    def __init__(self, epsilon= 0.5, reduction='mean'):
+6        super().__init__()
+7        self.epsilon = epsilon
+8        self.reduction = reduction
+
+
+
+
+ + +
+
+
10    def forward(self, pred, target):
+11        n = pred.size()[-1]
+12        log_pred = F.log_softmax(pred, dim=-1)
+13        loss = -log_pred.sum(dim=-1).mean()
+14        nll = F.nll_loss(log_pred, target, reduction=self.reduction)
+15        out = (1-self.epsilon)*nll + self.epsilon*(loss / n)
+16        return out
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/utils/train.html b/docs/resnets/utils/train.html new file mode 100644 index 00000000..1d01632a --- /dev/null +++ b/docs/resnets/utils/train.html @@ -0,0 +1,394 @@ + + + + + + + + + + + + + + + + + + + + + + + train.py + + + + + + + + +
+
+
+
+

+ home + resnets + utils +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + +
+
+
3import torch
+4from torch.utils.data import DataLoader, ConcatDataset
+
+
+
+
+ +

from sklearn.model_selection import KFold +from torch.utils.data.sampler import SubsetRandomSampler

+
+
+
8import matplotlib.pyplot as plt
+9from pylab import *
+10import os
+11
+12from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
+
+
+
+
+ + +
+
+
16class Trainer():
+
+
+
+
+ + +
+
+
17    def __init__(self, net, opt, cost, name="default", lr=0.0005, use_lr_schedule =False , device=None):
+18        self.net = net
+19        self.opt = opt
+20        self.cost = cost
+21        self.device = device
+22        self.epoch = 0
+23        self.start_epoch = 0
+24        self.name = name
+25
+26        self.lr = lr
+27        self.use_lr_schedule = use_lr_schedule
+28        if self.use_lr_schedule:
+29            self.scheduler = ReduceLROnPlateau( self.opt, 'max', factor=0.1, patience=5, threshold=0.00001, verbose=True)
+
+
+
+
+ +

self.scheduler = StepLR(self.opt, step_size=15, gamma=0.1)

+
+
+
+
+
+
+
+ +

Train loop over epochs. Optinal use testloader to return test accuracy after each epoch

+
+
+
33    def Train(self, trainloader, epochs, testloader=None):
+
+
+
+
+ +

Enable Dropout

+
+
+
+
+
+
+
+ +

Record loss/accuracies

+
+
+
37        loss = torch.zeros(epochs)
+38        self.epoch = 0
+
+
+
+
+ +

If testloader is used, loss will be the accuracy

+
+
+
41        for epoch in range(self.start_epoch, self.start_epoch+epochs):
+42            self.epoch = epoch+1
+43
+44            self.net.train()  # Enable Dropout
+45            for data in trainloader:
+
+
+
+
+ +

Get the inputs; data is a list of [inputs, labels]

+
+
+
47                if self.device:
+48                    images, labels = data[0].to(self.device), data[1].to(self.device)
+49                else:
+50                    images, labels = data
+51
+52                self.opt.zero_grad()
+
+
+
+
+ +

Forward + backward + optimize

+
+
+
54                outputs = self.net(images)
+55                epoch_loss = self.cost(outputs, labels)
+56                epoch_loss.backward()
+57                self.opt.step()
+58
+59                loss[epoch] += epoch_loss.item()
+60
+61            if testloader:
+62                loss[epoch] = self.Test(testloader)
+63            else:
+64                loss[epoch] /= len(trainloader)
+65
+66            print("Epoch %d Learning rate %.6f %s: %.3f" % (
+67            self.epoch, self.opt.param_groups[0]['lr'], "Accuracy" if testloader else "Loss", loss[epoch]))
+
+
+
+
+ +

learning rate scheduler

+
+
+
70            if self.use_lr_schedule:
+71                self.scheduler.step(loss[epoch])
+
+
+
+
+ +

self.scheduler.step()

+
+
+
+
+
+
+
+ +

Saving best model

+
+
+
75            if loss[epoch] >= torch.max(loss):
+76                self.save_best_model({
+77                    'epoch': self.epoch,
+78                    'state_dict': self.net.state_dict(),
+79                    'optimizer': self.opt.state_dict(),
+80                })
+81
+82        return loss
+
+
+
+
+ +

Testing

+
+
+
85    def Test(self, testloader, ret="accuracy"):
+
+
+
+
+ +

Disable Dropout

+
+
+
87        self.net.eval()
+
+
+
+
+ +

Track correct and total

+
+
+
90        correct = 0.0
+91        total = 0.0
+92        with torch.no_grad():
+93            for data in testloader:
+94                if self.device:
+95                    images, labels = data[0].to(self.device), data[1].to(self.device)
+96                else:
+97                    images, labels = data
+98
+99                outputs = self.net(images)
+100                _, predicted = torch.max(outputs.data, 1)
+101                total += labels.size(0)
+102                correct += (predicted == labels).sum().item()
+103
+104        return correct / total
+
+
+
+
+ + +
+
+
106    def save_best_model(self, state):
+107        directory = os.path.dirname("./save/%s-best-model/"%(self.name))
+108        if not os.path.exists(directory):
+109            os.mkdir(directory)
+110        torch.save(state, "%s/model.pt" %(directory))
+
+
+
+
+ + +
+
+
112    def save_checkpoint(self, state):
+113        directory = os.path.dirname("./save/%s-checkpoints/"%(self.name))
+114        if not os.path.exists(directory):
+115            os.mkdir(directory)
+116        torch.save(state, "%s/model_epoch_%s.pt" %(directory, self.epoch))
+
+
+
+
+ +

torch.save(state, “./save/checkpoints/model_epoch_%s.pt” % (self.epoch))

+
+
+
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/resnets/utils/utils.html b/docs/resnets/utils/utils.html new file mode 100644 index 00000000..4de8c275 --- /dev/null +++ b/docs/resnets/utils/utils.html @@ -0,0 +1,242 @@ + + + + + + + + + + + + + + + + + + + + + + + utils.py + + + + + + + + +
+
+
+
+

+ home + resnets + utils +

+

+ + + Github + + Join Slact + + Twitter +

+
+
+
+
+ + +
+
+
3import torch
+4import torchvision
+5import torchvision.transforms as transforms
+6
+7import torch.nn as nn
+8import torch.nn.functional as F
+9
+10import matplotlib.pyplot as plt
+11import numpy as np
+12
+13from sklearn.model_selection import KFold
+14from torch.utils.data.sampler import SubsetRandomSampler
+
+
+
+
+ +

Plot the loss of multiple runs together

+
+
+
19def PlotLosses(losses, titles, save=None):
+20    fig = plt.figure()
+21    fig.set_size_inches(14, 22)
+
+
+
+
+ +

Plot results on 3 subgraphs +subplot integers: + nrows + ncols + index

+
+
+
27    sublplot_str_start = "" + str(len(losses)) + "1"
+28
+29    for i in range(len(losses)):
+30        subplot = sublplot_str_start + str(i+1)
+31        loss = losses[i]
+32        title = titles[i]
+33
+34        ax = plt.subplot(int(subplot))
+35        ax.plot(range(len(loss)), loss)
+36        ax.set_xlabel("Epoch")
+37        ax.set_title(title)
+38        ax.set_ylabel("Loss")
+
+
+
+
+ +

Save Figure

+
+
+
41    if save:
+42    	plt.savefig(save)
+43    else:
+44    	plt.show()
+
+
+
+
+ + +
+
+
48def ClassSpecificTestCifar10(net, testdata, device=None):
+49    classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')
+50    class_correct = list(0. for i in range(10))
+51    class_total = list(0. for i in range(10))
+52    with torch.no_grad():
+53        for data in testdata:
+54            if device:
+55                images, labels = data[0].to(device), data[1].to(device)
+56            else:
+57                images, labels = data
+58
+59            outputs = net(images)
+60            _, predicted = torch.max(outputs, 1)
+61            c = (predicted == labels).squeeze()
+62            for i in range(4):
+63                label = labels[i]
+64                class_correct[label] += c[i].item()
+65                class_total[label] += 1
+
+
+
+
+ +

Print out

+
+
+
68    for i in range(10):
+69        print('Accuracy of %5s : %2d %%' % (
+70            classes[i], 100 * class_correct[i] / class_total[i]))
+
+
+
+
+ + +
+
+
74def GetActivation(name="relu"):
+75    if name == "relu":
+76        return nn.ReLU()
+77    elif name == "leakyrelu":
+78        return nn.LeakyReLU()
+79    elif name == "Sigmoid":
+80        return nn.Sigmoid()
+81    elif name == "Tanh":
+82        return nn.Tanh()
+83    elif name == "Identity":
+84        return nn.Identity()
+85    else:
+86        return nn.ReLU()
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a957e3c4..51f8abc4 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -36,7 +36,7 @@ https://nn.labml.ai/gan/cycle_gan.html - 2021-02-14T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -78,7 +78,84 @@ https://nn.labml.ai/hypernetworks/hyper_lstm.html - 2021-02-12T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/pretrained_nets.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/index.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/utils/index.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/utils/utils.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/utils/train.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/utils/labelsmoothing.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/models/index.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/models/mlp.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/models/resnet.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/resnet_net.html + 2021-02-25T16:30:00+00:00 + 1.00 + + + + + https://nn.labml.ai/resnets/accuracy_graph_85.html + 2021-02-25T16:30:00+00:00 1.00 @@ -113,14 +190,14 @@ https://nn.labml.ai/normalization/batch_norm/index.html - 2021-02-15T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/normalization/batch_norm/readme.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -155,7 +232,7 @@ https://nn.labml.ai/index.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -202,6 +279,13 @@ + + https://nn.labml.ai/optimizers/readme.html + 2021-02-23T16:30:00+00:00 + 1.00 + + + https://nn.labml.ai/optimizers/mnist_experiment.html 2021-02-02T16:30:00+00:00 @@ -232,7 +316,7 @@ https://nn.labml.ai/optimizers/amsgrad.html - 2021-01-30T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -281,14 +365,14 @@ https://nn.labml.ai/transformers/gpt/experiment.html - 2021-01-25T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/transformers/gpt/index.html - 2021-02-02T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -316,21 +400,21 @@ https://nn.labml.ai/transformers/feedback/index.html - 2021-02-10T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/transformers/feedback/README.html - 2021-02-01T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/transformers/feedback/experiment.html - 2021-02-02T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -344,7 +428,7 @@ https://nn.labml.ai/transformers/glu_variants/simple.html - 2021-01-26T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -358,7 +442,7 @@ https://nn.labml.ai/transformers/glu_variants/simple.html - 2021-02-10T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -379,14 +463,14 @@ https://nn.labml.ai/transformers/switch/index.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/transformers/switch/readme.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -428,14 +512,14 @@ https://nn.labml.ai/transformers/compressive/index.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/transformers/compressive/readme.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -463,14 +547,14 @@ https://nn.labml.ai/transformers/xl/index.html - 2021-02-18T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/transformers/xl/readme.html - 2021-02-19T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 @@ -498,14 +582,14 @@ https://nn.labml.ai/capsule_networks/mnist.html - 2021-01-17T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 https://nn.labml.ai/capsule_networks/index.html - 2021-02-12T16:30:00+00:00 + 2021-02-27T16:30:00+00:00 1.00 diff --git a/docs/transformers/compressive/index.html b/docs/transformers/compressive/index.html index 3992f882..be44b9cb 100644 --- a/docs/transformers/compressive/index.html +++ b/docs/transformers/compressive/index.html @@ -105,7 +105,7 @@ This is supposed to be more stable in standard transformer setups.

Here are the training code and a notebook for training a compressive transformer model on the Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

54from typing import Optional, List
diff --git a/docs/transformers/compressive/readme.html b/docs/transformers/compressive/readme.html
index c8fb17af..66b73b03 100644
--- a/docs/transformers/compressive/readme.html
+++ b/docs/transformers/compressive/readme.html
@@ -105,7 +105,7 @@ This is supposed to be more stable in standard transformer setups.

Here are the training code and a notebook for training a compressive transformer model on the Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

diff --git a/docs/transformers/feedback/README.html b/docs/transformers/feedback/README.html index 891db7d0..d3ac481c 100644 --- a/docs/transformers/feedback/README.html +++ b/docs/transformers/feedback/README.html @@ -98,7 +98,7 @@ We implemented a custom PyTorch function to improve performance.

Here’s the training code and a notebook for training a feedback transformer on Tiny Shakespeare dataset.

Colab Notebook

Open In Colab -View Run

+View Run

diff --git a/docs/transformers/feedback/experiment.html b/docs/transformers/feedback/experiment.html index b44c76e7..d9e56278 100644 --- a/docs/transformers/feedback/experiment.html +++ b/docs/transformers/feedback/experiment.html @@ -78,7 +78,7 @@ You can pick the original feedback transformer or the new version where the keys and values are precalculated.

Here’s a Colab notebook for training a feedback transformer on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

19import torch
diff --git a/docs/transformers/feedback/index.html b/docs/transformers/feedback/index.html
index ba8e08c9..934af315 100644
--- a/docs/transformers/feedback/index.html
+++ b/docs/transformers/feedback/index.html
@@ -97,7 +97,7 @@ The second half of this file implements this.
 We implemented a custom PyTorch function to improve performance.

Here’s the training code and a notebook for training a feedback transformer on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

43import math
diff --git a/docs/transformers/glu_variants/simple.html b/docs/transformers/glu_variants/simple.html
index 422d979b..7ed19603 100644
--- a/docs/transformers/glu_variants/simple.html
+++ b/docs/transformers/glu_variants/simple.html
@@ -78,7 +78,7 @@ We try different variants for the position-wise feedfo
 

This is a simpler implementation that doesn’t use labml.configs module. We decided to write a simpler implementation to make it easier for readers who are not familiar.

Open In Colab -View Run

+View Run

20import dataclasses
diff --git a/docs/transformers/gpt/index.html b/docs/transformers/gpt/index.html
index e576b3e6..01a130ef 100644
--- a/docs/transformers/gpt/index.html
+++ b/docs/transformers/gpt/index.html
@@ -91,7 +91,7 @@ For the transformer we reuse the
 existing labml/nn transformer implementation.

Here’s a notebook for training a GPT model on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

35import torch
diff --git a/docs/transformers/switch/index.html b/docs/transformers/switch/index.html
index c7b5bbad..279ab732 100644
--- a/docs/transformers/switch/index.html
+++ b/docs/transformers/switch/index.html
@@ -95,7 +95,7 @@ In a distributed setup you would have each FFN (each very large) on a different
 discusses dropping tokens when routing is not balanced.

Here’s the training code and a notebook for training a switch transformer on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

40import torch
diff --git a/docs/transformers/switch/readme.html b/docs/transformers/switch/readme.html
index 3932b635..ea5d5a4d 100644
--- a/docs/transformers/switch/readme.html
+++ b/docs/transformers/switch/readme.html
@@ -95,7 +95,7 @@ In a distributed setup you would have each FFN (each very large) on a different
 discusses dropping tokens when routing is not balanced.

Here’s the training code and a notebook for training a switch transformer on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

diff --git a/docs/transformers/xl/index.html b/docs/transformers/xl/index.html index 84bbdda0..a3ab5c4b 100644 --- a/docs/transformers/xl/index.html +++ b/docs/transformers/xl/index.html @@ -90,7 +90,7 @@ are introduced at the attention calculation.

Annotated implementation of relative multi-headed attention is in relative_mha.py.

Here’s the training code and a notebook for training a transformer XL model on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run

36from typing import List, Optional
diff --git a/docs/transformers/xl/readme.html b/docs/transformers/xl/readme.html
index 616b4255..5c476e39 100644
--- a/docs/transformers/xl/readme.html
+++ b/docs/transformers/xl/readme.html
@@ -90,7 +90,7 @@ are introduced at the attention calculation.

Annotated implementation of relative multi-headed attention is in relative_mha.py.

Here’s the training code and a notebook for training a transformer XL model on Tiny Shakespeare dataset.

Open In Colab -View Run

+View Run