import torch import torch.nn as nn import torchvision.datasets as dsets import torchvision.transforms as transforms from torch.autograd import Variable # Hyper Parameters sequence_length = 28 input_size = 28 hidden_size = 128 num_layers = 2 num_classes = 10 batch_size = 100 num_epochs = 2 learning_rate = 0.01 # MNIST Dataset train_dataset = dsets.MNIST(root='../data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='../data/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # RNN Model (Many-to-One) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): # Set initial states h0 = Variable(torch.zeros(num_layers, x.size(0), hidden_size).cuda()) c0 = Variable(torch.zeros(num_layers, x.size(0), hidden_size).cuda()) # Forward propagate RNN out, _ = self.lstm(x, (h0, c0)) # Decode hidden state of last time step out = self.fc(out[:, -1, :]) return out rnn = RNN(input_size, hidden_size, num_layers, num_classes) rnn.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) # Train the Model for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = Variable(images.view(-1, sequence_length, input_size)).cuda() labels = Variable(labels).cuda() # Forward + Backward + Optimize optimizer.zero_grad() outputs = rnn(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) # Test the Model correct = 0 total = 0 for images, labels in test_loader: images = Variable(images.view(-1, sequence_length, input_size)).cuda() outputs = rnn(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted.cpu() == labels).sum() print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) # Save the Model torch.save(rnn, 'rnn.pkl')