delete
@ -1,87 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
input_size = 784
|
||||
hidden_size = 500
|
||||
num_classes = 10
|
||||
num_epochs = 5
|
||||
batch_size = 100
|
||||
learning_rate = 0.001
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# Neural Network Model (1 hidden layer)
|
||||
class Net(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_classes):
|
||||
super(Net, self).__init__()
|
||||
self.fc1 = nn.Linear(input_size, hidden_size)
|
||||
self.relu = nn.ReLU()
|
||||
self.fc2 = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc1(x)
|
||||
out = self.relu(out)
|
||||
out = self.fc2(out)
|
||||
return out
|
||||
|
||||
net = Net(input_size, hidden_size, num_classes)
|
||||
net.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
# Convert torch tensor to Variable
|
||||
images = Variable(images.view(-1, 28*28).cuda())
|
||||
labels = Variable(labels.cuda())
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad() # zero the gradient buffer
|
||||
outputs = net(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, 28*28)).cuda()
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted.cpu() == labels).sum()
|
||||
|
||||
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(net.state_dict(), 'model.pkl')
|
@ -1,87 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
input_size = 784
|
||||
hidden_size = 500
|
||||
num_classes = 10
|
||||
num_epochs = 5
|
||||
batch_size = 100
|
||||
learning_rate = 0.001
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# Neural Network Model (1 hidden layer)
|
||||
class Net(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_classes):
|
||||
super(Net, self).__init__()
|
||||
self.fc1 = nn.Linear(input_size, hidden_size)
|
||||
self.relu = nn.ReLU()
|
||||
self.fc2 = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc1(x)
|
||||
out = self.relu(out)
|
||||
out = self.fc2(out)
|
||||
return out
|
||||
|
||||
net = Net(input_size, hidden_size, num_classes)
|
||||
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
# Convert torch tensor to Variable
|
||||
images = Variable(images.view(-1, 28*28))
|
||||
labels = Variable(labels)
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad() # zero the gradient buffer
|
||||
outputs = net(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, 28*28))
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(net.state_dict(), 'model.pkl')
|
@ -1,64 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
input_size = 1
|
||||
output_size = 1
|
||||
num_epochs = 60
|
||||
learning_rate = 0.001
|
||||
|
||||
# Toy Dataset
|
||||
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
|
||||
[9.779], [6.182], [7.59], [2.167], [7.042],
|
||||
[10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
|
||||
|
||||
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
|
||||
[3.366], [2.596], [2.53], [1.221], [2.827],
|
||||
[3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
|
||||
|
||||
# Linear Regression Model
|
||||
class LinearRegression(nn.Module):
|
||||
def __init__(self, input_size, output_size):
|
||||
super(LinearRegression, self).__init__()
|
||||
self.linear = nn.Linear(input_size, output_size)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.linear(x)
|
||||
return out
|
||||
|
||||
model = LinearRegression(input_size, output_size)
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.MSELoss()
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
# Convert numpy array to torch Variable
|
||||
inputs = Variable(torch.from_numpy(x_train))
|
||||
targets = Variable(torch.from_numpy(y_train))
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, targets)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (epoch+1) % 5 == 0:
|
||||
print ('Epoch [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, loss.data[0]))
|
||||
|
||||
# Plot the graph
|
||||
predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
|
||||
plt.plot(x_train, y_train, 'ro', label='Original data')
|
||||
plt.plot(x_train, predicted, label='Fitted line')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# Save the Model
|
||||
torch.save(model.state_dict(), 'model.pkl')
|
@ -1,82 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
input_size = 784
|
||||
num_classes = 10
|
||||
num_epochs = 5
|
||||
batch_size = 100
|
||||
learning_rate = 0.001
|
||||
|
||||
# MNIST Dataset (Images and Labels)
|
||||
train_dataset = dsets.MNIST(root='./data',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Dataset Loader (Input Pipline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# Model
|
||||
class LogisticRegression(nn.Module):
|
||||
def __init__(self, input_size, num_classes):
|
||||
super(LogisticRegression, self).__init__()
|
||||
self.linear = nn.Linear(input_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.linear(x)
|
||||
return out
|
||||
|
||||
model = LogisticRegression(input_size, num_classes)
|
||||
|
||||
# Loss and Optimizer
|
||||
# Softmax is internally computed.
|
||||
# Set parameters to be updated.
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Training the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images.view(-1, 28*28))
|
||||
labels = Variable(labels)
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = model(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f'
|
||||
% (epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, 28*28))
|
||||
outputs = model(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
print('Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(model.state_dict(), 'model.pkl')
|
@ -1,165 +0,0 @@
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import torch.utils.data as data
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as dsets
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
#========================== Table of Contents ==========================#
|
||||
# 1. Basic autograd example 1 (Line 21 to 36)
|
||||
# 2. Basic autograd example 2 (Line 39 to 77)
|
||||
# 3. Loading data from numpy (Line 80 to 83)
|
||||
# 4. Implementing the input pipeline (Line 86 to 113)
|
||||
# 5. Input pipeline for custom dataset (Line 116 to 138)
|
||||
# 6. Using pretrained model (Line 141 to 155)
|
||||
# 7. Save and load model (Line 158 to 165)
|
||||
|
||||
|
||||
#======================= Basic autograd example 1 =======================#
|
||||
# Create tensors.
|
||||
x = Variable(torch.Tensor([1]), requires_grad=True)
|
||||
w = Variable(torch.Tensor([2]), requires_grad=True)
|
||||
b = Variable(torch.Tensor([3]), requires_grad=True)
|
||||
|
||||
# Build a computational graph.
|
||||
y = w * x + b # y = 2 * x + 3
|
||||
|
||||
# Compute gradients.
|
||||
y.backward()
|
||||
|
||||
# Print out the gradients.
|
||||
print(x.grad) # x.grad = 2
|
||||
print(w.grad) # w.grad = 1
|
||||
print(b.grad) # b.grad = 1
|
||||
|
||||
|
||||
#======================== Basic autograd example 2 =======================#
|
||||
# Create tensors.
|
||||
x = Variable(torch.randn(5, 3))
|
||||
y = Variable(torch.randn(5, 2))
|
||||
|
||||
# Build a linear layer.
|
||||
linear = nn.Linear(3, 2)
|
||||
print ('w: ', linear.weight)
|
||||
print ('b: ', linear.bias)
|
||||
|
||||
# Build Loss and Optimizer.
|
||||
criterion = nn.MSELoss()
|
||||
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)
|
||||
|
||||
# Forward propagation.
|
||||
pred = linear(x)
|
||||
|
||||
# Compute loss.
|
||||
loss = criterion(pred, y)
|
||||
print('loss: ', loss.data[0])
|
||||
|
||||
# Backpropagation.
|
||||
loss.backward()
|
||||
|
||||
# Print out the gradients.
|
||||
print ('dL/dw: ', linear.weight.grad)
|
||||
print ('dL/db: ', linear.bias.grad)
|
||||
|
||||
# 1-step Optimization (gradient descent).
|
||||
optimizer.step()
|
||||
|
||||
# You can also do optimization at the low level as shown below.
|
||||
# linear.weight.data.sub_(0.01 * linear.weight.grad.data)
|
||||
# linear.bias.data.sub_(0.01 * linear.bias.grad.data)
|
||||
|
||||
# Print out the loss after optimization.
|
||||
pred = linear(x)
|
||||
loss = criterion(pred, y)
|
||||
print('loss after 1 step optimization: ', loss.data[0])
|
||||
|
||||
|
||||
#======================== Loading data from numpy ========================#
|
||||
a = np.array([[1,2], [3,4]])
|
||||
b = torch.from_numpy(a) # convert numpy array to torch tensor
|
||||
c = b.numpy() # convert torch tensor to numpy array
|
||||
|
||||
|
||||
#===================== Implementing the input pipeline =====================#
|
||||
# Download and construct dataset.
|
||||
train_dataset = dsets.CIFAR10(root='../data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
# Select one data pair (read data from disk).
|
||||
image, label = train_dataset[0]
|
||||
print (image.size())
|
||||
print (label)
|
||||
|
||||
# Data Loader (this provides queue and thread in a very simple way).
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=100,
|
||||
shuffle=True,
|
||||
num_workers=2)
|
||||
|
||||
# When iteration starts, queue and thread start to load dataset from files.
|
||||
data_iter = iter(train_loader)
|
||||
|
||||
# Mini-batch images and labels.
|
||||
images, labels = data_iter.next()
|
||||
|
||||
# Actual usage of data loader is as below.
|
||||
for images, labels in train_loader:
|
||||
# Your training code will be written here
|
||||
pass
|
||||
|
||||
|
||||
#===================== Input pipeline for custom dataset =====================#
|
||||
# You should build custom dataset as below.
|
||||
class CustomDataset(data.Dataset):
|
||||
def __init__(self):
|
||||
# TODO
|
||||
# 1. Initialize file path or list of file names.
|
||||
pass
|
||||
def __getitem__(self, index):
|
||||
# TODO
|
||||
# 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
|
||||
# 2. Preprocess the data (e.g. torchvision.Transform).
|
||||
# 3. Return a data pair (e.g. image and label).
|
||||
pass
|
||||
def __len__(self):
|
||||
# You should change 0 to the total size of your dataset.
|
||||
return 0
|
||||
|
||||
# Then, you can just use prebuilt torch's data loader.
|
||||
custom_dataset = CustomDataset()
|
||||
train_loader = torch.utils.data.DataLoader(dataset=custom_dataset,
|
||||
batch_size=100,
|
||||
shuffle=True,
|
||||
num_workers=2)
|
||||
|
||||
|
||||
#========================== Using pretrained model ==========================#
|
||||
# Download and load pretrained resnet.
|
||||
resnet = torchvision.models.resnet18(pretrained=True)
|
||||
|
||||
# If you want to finetune only top layer of the model.
|
||||
for param in resnet.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
# Replace top layer for finetuning.
|
||||
resnet.fc = nn.Linear(resnet.fc.in_features, 100) # 100 is for example.
|
||||
|
||||
# For test.
|
||||
images = Variable(torch.randn(10, 3, 224, 224))
|
||||
outputs = resnet(images)
|
||||
print (outputs.size()) # (10, 100)
|
||||
|
||||
|
||||
#============================ Save and load the model ============================#
|
||||
# Save and load the entire model.
|
||||
torch.save(resnet, 'model.pkl')
|
||||
model = torch.load('model.pkl')
|
||||
|
||||
# Save and load only the model parameters(recommended).
|
||||
torch.save(resnet.state_dict(), 'params.pkl')
|
||||
resnet.load_state_dict(torch.load('params.pkl'))
|
@ -1,96 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
sequence_length = 28
|
||||
input_size = 28
|
||||
hidden_size = 128
|
||||
num_layers = 2
|
||||
num_classes = 10
|
||||
batch_size = 100
|
||||
num_epochs = 2
|
||||
learning_rate = 0.003
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# BiRNN Model (Many-to-One)
|
||||
class BiRNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(BiRNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
|
||||
batch_first=True, bidirectional=True)
|
||||
self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial states
|
||||
h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda() # 2 for bidirection
|
||||
c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda()
|
||||
|
||||
# Forward propagate RNN
|
||||
out, _ = self.lstm(x, (h0, c0))
|
||||
|
||||
# Decode hidden state of last time step
|
||||
out = self.fc(out[:, -1, :])
|
||||
return out
|
||||
|
||||
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
|
||||
rnn.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images.view(-1, sequence_length, input_size)).cuda()
|
||||
labels = Variable(labels).cuda()
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = rnn(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, sequence_length, input_size)).cuda()
|
||||
outputs = rnn(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted.cpu() == labels).sum()
|
||||
|
||||
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(rnn.state_dict(), 'rnn.pkl')
|
@ -1,96 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
sequence_length = 28
|
||||
input_size = 28
|
||||
hidden_size = 128
|
||||
num_layers = 2
|
||||
num_classes = 10
|
||||
batch_size = 100
|
||||
num_epochs = 2
|
||||
learning_rate = 0.003
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# BiRNN Model (Many-to-One)
|
||||
class BiRNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(BiRNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
|
||||
batch_first=True, bidirectional=True)
|
||||
self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial states
|
||||
h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)) # 2 for bidirection
|
||||
c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size))
|
||||
|
||||
# Forward propagate RNN
|
||||
out, _ = self.lstm(x, (h0, c0))
|
||||
|
||||
# Decode hidden state of last time step
|
||||
out = self.fc(out[:, -1, :])
|
||||
return out
|
||||
|
||||
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
|
||||
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images.view(-1, sequence_length, input_size))
|
||||
labels = Variable(labels)
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = rnn(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, sequence_length, input_size))
|
||||
outputs = rnn(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(rnn.state_dict(), 'rnn.pkl')
|
@ -1,93 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
num_epochs = 5
|
||||
batch_size = 100
|
||||
learning_rate = 0.001
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# CNN Model (2 conv layer)
|
||||
class CNN(nn.Module):
|
||||
def __init__(self):
|
||||
super(CNN, self).__init__()
|
||||
self.layer1 = nn.Sequential(
|
||||
nn.Conv2d(1, 16, kernel_size=5, padding=2),
|
||||
nn.BatchNorm2d(16),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(2))
|
||||
self.layer2 = nn.Sequential(
|
||||
nn.Conv2d(16, 32, kernel_size=5, padding=2),
|
||||
nn.BatchNorm2d(32),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(2))
|
||||
self.fc = nn.Linear(7*7*32, 10)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.layer1(x)
|
||||
out = self.layer2(out)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
cnn = CNN()
|
||||
cnn.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images).cuda()
|
||||
labels = Variable(labels).cuda()
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = cnn(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var).
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images).cuda()
|
||||
outputs = cnn(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted.cpu() == labels).sum()
|
||||
|
||||
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Trained Model
|
||||
torch.save(cnn.state_dict(), 'cnn.pkl')
|
@ -1,93 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
num_epochs = 5
|
||||
batch_size = 100
|
||||
learning_rate = 0.001
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# CNN Model (2 conv layer)
|
||||
class CNN(nn.Module):
|
||||
def __init__(self):
|
||||
super(CNN, self).__init__()
|
||||
self.layer1 = nn.Sequential(
|
||||
nn.Conv2d(1, 16, kernel_size=5, padding=2),
|
||||
nn.BatchNorm2d(16),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(2))
|
||||
self.layer2 = nn.Sequential(
|
||||
nn.Conv2d(16, 32, kernel_size=5, padding=2),
|
||||
nn.BatchNorm2d(32),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(2))
|
||||
self.fc = nn.Linear(7*7*32, 10)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.layer1(x)
|
||||
out = self.layer2(out)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
cnn = CNN()
|
||||
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images)
|
||||
labels = Variable(labels)
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = cnn(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var).
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images)
|
||||
outputs = cnn(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Trained Model
|
||||
torch.save(cnn.state_dict(), 'cnn.pkl')
|
@ -1,147 +0,0 @@
|
||||
# Implementation of https://arxiv.org/pdf/1512.03385.pdf
|
||||
# See section 4.2 for model architecture on CIFAR-10.
|
||||
# Some part of the code was referenced below.
|
||||
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
# Image Preprocessing
|
||||
transform = transforms.Compose([
|
||||
transforms.Scale(40),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.RandomCrop(32),
|
||||
transforms.ToTensor()])
|
||||
|
||||
# CIFAR-10 Dataset
|
||||
train_dataset = dsets.CIFAR10(root='./data/',
|
||||
train=True,
|
||||
transform=transform,
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.CIFAR10(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=100,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=100,
|
||||
shuffle=False)
|
||||
|
||||
# 3x3 Convolution
|
||||
def conv3x3(in_channels, out_channels, stride=1):
|
||||
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
|
||||
stride=stride, padding=1, bias=False)
|
||||
|
||||
# Residual Block
|
||||
class ResidualBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
|
||||
super(ResidualBlock, self).__init__()
|
||||
self.conv1 = conv3x3(in_channels, out_channels, stride)
|
||||
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(out_channels, out_channels)
|
||||
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||
self.downsample = downsample
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
if self.downsample:
|
||||
residual = self.downsample(x)
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
# ResNet Module
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, block, layers, num_classes=10):
|
||||
super(ResNet, self).__init__()
|
||||
self.in_channels = 16
|
||||
self.conv = conv3x3(3, 16)
|
||||
self.bn = nn.BatchNorm2d(16)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.layer1 = self.make_layer(block, 16, layers[0])
|
||||
self.layer2 = self.make_layer(block, 32, layers[0], 2)
|
||||
self.layer3 = self.make_layer(block, 64, layers[1], 2)
|
||||
self.avg_pool = nn.AvgPool2d(8)
|
||||
self.fc = nn.Linear(64, num_classes)
|
||||
|
||||
def make_layer(self, block, out_channels, blocks, stride=1):
|
||||
downsample = None
|
||||
if (stride != 1) or (self.in_channels != out_channels):
|
||||
downsample = nn.Sequential(
|
||||
conv3x3(self.in_channels, out_channels, stride=stride),
|
||||
nn.BatchNorm2d(out_channels))
|
||||
layers = []
|
||||
layers.append(block(self.in_channels, out_channels, stride, downsample))
|
||||
self.in_channels = out_channels
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(out_channels, out_channels))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv(x)
|
||||
out = self.bn(out)
|
||||
out = self.relu(out)
|
||||
out = self.layer1(out)
|
||||
out = self.layer2(out)
|
||||
out = self.layer3(out)
|
||||
out = self.avg_pool(out)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
resnet = ResNet(ResidualBlock, [3, 3, 3])
|
||||
resnet.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
lr = 0.001
|
||||
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
|
||||
|
||||
# Training
|
||||
for epoch in range(80):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images.cuda())
|
||||
labels = Variable(labels.cuda())
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = resnet(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, 80, i+1, 500, loss.data[0]))
|
||||
|
||||
# Decaying Learning Rate
|
||||
if (epoch+1) % 20 == 0:
|
||||
lr /= 3
|
||||
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
|
||||
|
||||
# Test
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.cuda())
|
||||
outputs = resnet(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted.cpu() == labels).sum()
|
||||
|
||||
print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(resnet.state_dict(), 'resnet.pkl')
|
@ -1,147 +0,0 @@
|
||||
# Implementation of https://arxiv.org/pdf/1512.03385.pdf.
|
||||
# See section 4.2 for model architecture on CIFAR-10.
|
||||
# Some part of the code was referenced below.
|
||||
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
# Image Preprocessing
|
||||
transform = transforms.Compose([
|
||||
transforms.Scale(40),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.RandomCrop(32),
|
||||
transforms.ToTensor()])
|
||||
|
||||
# CIFAR-10 Dataset
|
||||
train_dataset = dsets.CIFAR10(root='./data/',
|
||||
train=True,
|
||||
transform=transform,
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.CIFAR10(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=100,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=100,
|
||||
shuffle=False)
|
||||
|
||||
# 3x3 Convolution
|
||||
def conv3x3(in_channels, out_channels, stride=1):
|
||||
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
|
||||
stride=stride, padding=1, bias=False)
|
||||
|
||||
# Residual Block
|
||||
class ResidualBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
|
||||
super(ResidualBlock, self).__init__()
|
||||
self.conv1 = conv3x3(in_channels, out_channels, stride)
|
||||
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(out_channels, out_channels)
|
||||
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||
self.downsample = downsample
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
if self.downsample:
|
||||
residual = self.downsample(x)
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
# ResNet Module
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, block, layers, num_classes=10):
|
||||
super(ResNet, self).__init__()
|
||||
self.in_channels = 16
|
||||
self.conv = conv3x3(3, 16)
|
||||
self.bn = nn.BatchNorm2d(16)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.layer1 = self.make_layer(block, 16, layers[0])
|
||||
self.layer2 = self.make_layer(block, 32, layers[0], 2)
|
||||
self.layer3 = self.make_layer(block, 64, layers[1], 2)
|
||||
self.avg_pool = nn.AvgPool2d(8)
|
||||
self.fc = nn.Linear(64, num_classes)
|
||||
|
||||
def make_layer(self, block, out_channels, blocks, stride=1):
|
||||
downsample = None
|
||||
if (stride != 1) or (self.in_channels != out_channels):
|
||||
downsample = nn.Sequential(
|
||||
conv3x3(self.in_channels, out_channels, stride=stride),
|
||||
nn.BatchNorm2d(out_channels))
|
||||
layers = []
|
||||
layers.append(block(self.in_channels, out_channels, stride, downsample))
|
||||
self.in_channels = out_channels
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(out_channels, out_channels))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv(x)
|
||||
out = self.bn(out)
|
||||
out = self.relu(out)
|
||||
out = self.layer1(out)
|
||||
out = self.layer2(out)
|
||||
out = self.layer3(out)
|
||||
out = self.avg_pool(out)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
resnet = ResNet(ResidualBlock, [2, 2, 2, 2])
|
||||
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
lr = 0.001
|
||||
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
|
||||
|
||||
# Training
|
||||
for epoch in range(80):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images)
|
||||
labels = Variable(labels)
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = resnet(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, 80, i+1, 500, loss.data[0]))
|
||||
|
||||
# Decaying Learning Rate
|
||||
if (epoch+1) % 20 == 0:
|
||||
lr /= 3
|
||||
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
|
||||
|
||||
# Test
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images)
|
||||
outputs = resnet(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(resnet.state_dict(), 'resnet.pkl')
|
@ -1,126 +0,0 @@
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torchvision import datasets
|
||||
from torchvision import transforms
|
||||
from torchvision.utils import save_image
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
def to_var(x):
|
||||
if torch.cuda.is_available():
|
||||
x = x.cuda()
|
||||
return Variable(x)
|
||||
|
||||
def denorm(x):
|
||||
out = (x + 1) / 2
|
||||
return out.clamp(0, 1)
|
||||
|
||||
# Image processing
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=(0.5, 0.5, 0.5),
|
||||
std=(0.5, 0.5, 0.5))])
|
||||
# MNIST dataset
|
||||
mnist = datasets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transform,
|
||||
download=True)
|
||||
# Data loader
|
||||
data_loader = torch.utils.data.DataLoader(dataset=mnist,
|
||||
batch_size=100,
|
||||
shuffle=True)
|
||||
# Discriminator
|
||||
D = nn.Sequential(
|
||||
nn.Linear(784, 256),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Linear(256, 256),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Linear(256, 1),
|
||||
nn.Sigmoid())
|
||||
|
||||
# Generator
|
||||
G = nn.Sequential(
|
||||
nn.Linear(64, 256),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Linear(256, 256),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Linear(256, 784),
|
||||
nn.Tanh())
|
||||
|
||||
if torch.cuda.is_available():
|
||||
D.cuda()
|
||||
G.cuda()
|
||||
|
||||
# Binary cross entropy loss and optimizer
|
||||
criterion = nn.BCELoss()
|
||||
d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0003)
|
||||
g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0003)
|
||||
|
||||
# Start training
|
||||
for epoch in range(200):
|
||||
for i, (images, _) in enumerate(data_loader):
|
||||
# Build mini-batch dataset
|
||||
batch_size = images.size(0)
|
||||
images = to_var(images.view(batch_size, -1))
|
||||
|
||||
# Create the labels which are later used as input for the BCE loss
|
||||
real_labels = to_var(torch.ones(batch_size))
|
||||
fake_labels = to_var(torch.zeros(batch_size))
|
||||
|
||||
#============= Train the discriminator =============#
|
||||
# Compute BCE_Loss using real images where BCE_Loss(x, y): - y * log(D(x)) - (1-y) * log(1 - D(x))
|
||||
# Second term of the loss is always zero since real_labels == 1
|
||||
outputs = D(images)
|
||||
d_loss_real = criterion(outputs, real_labels)
|
||||
real_score = outputs
|
||||
|
||||
# Compute BCELoss using fake images
|
||||
# First term of the loss is always zero since fake_labels == 0
|
||||
z = to_var(torch.randn(batch_size, 64))
|
||||
fake_images = G(z)
|
||||
outputs = D(fake_images)
|
||||
d_loss_fake = criterion(outputs, fake_labels)
|
||||
fake_score = outputs
|
||||
|
||||
# Backprop + Optimize
|
||||
d_loss = d_loss_real + d_loss_fake
|
||||
D.zero_grad()
|
||||
d_loss.backward()
|
||||
d_optimizer.step()
|
||||
|
||||
#=============== Train the generator ===============#
|
||||
# Compute loss with fake images
|
||||
z = to_var(torch.randn(batch_size, 64))
|
||||
fake_images = G(z)
|
||||
outputs = D(fake_images)
|
||||
|
||||
# We train G to maximize log(D(G(z)) instead of minimizing log(1-D(G(z)))
|
||||
# For the reason, see the last paragraph of section 3. https://arxiv.org/pdf/1406.2661.pdf
|
||||
g_loss = criterion(outputs, real_labels)
|
||||
|
||||
# Backprop + Optimize
|
||||
D.zero_grad()
|
||||
G.zero_grad()
|
||||
g_loss.backward()
|
||||
g_optimizer.step()
|
||||
|
||||
if (i+1) % 300 == 0:
|
||||
print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, '
|
||||
'g_loss: %.4f, D(x): %.2f, D(G(z)): %.2f'
|
||||
%(epoch, 200, i+1, 600, d_loss.data[0], g_loss.data[0],
|
||||
real_score.data.mean(), fake_score.data.mean()))
|
||||
|
||||
# Save real images
|
||||
if (epoch+1) == 1:
|
||||
images = images.view(images.size(0), 1, 28, 28)
|
||||
save_image(denorm(images.data), './data/real_images.png')
|
||||
|
||||
# Save sampled images
|
||||
fake_images = fake_images.view(fake_images.size(0), 1, 28, 28)
|
||||
save_image(denorm(fake_images.data), './data/fake_images-%d.png' %(epoch+1))
|
||||
|
||||
# Save the trained parameters
|
||||
torch.save(G.state_dict(), './generator.pkl')
|
||||
torch.save(D.state_dict(), './discriminator.pkl')
|
@ -1,44 +0,0 @@
|
||||
import torch
|
||||
import os
|
||||
|
||||
class Dictionary(object):
|
||||
def __init__(self):
|
||||
self.word2idx = {}
|
||||
self.idx2word = {}
|
||||
self.idx = 0
|
||||
|
||||
def add_word(self, word):
|
||||
if not word in self.word2idx:
|
||||
self.word2idx[word] = self.idx
|
||||
self.idx2word[self.idx] = word
|
||||
self.idx += 1
|
||||
|
||||
def __len__(self):
|
||||
return len(self.word2idx)
|
||||
|
||||
class Corpus(object):
|
||||
def __init__(self, path='./data'):
|
||||
self.dictionary = Dictionary()
|
||||
|
||||
def get_data(self, path, batch_size=20):
|
||||
# Add words to the dictionary
|
||||
with open(path, 'r') as f:
|
||||
tokens = 0
|
||||
for line in f:
|
||||
words = line.split() + ['<eos>']
|
||||
tokens += len(words)
|
||||
for word in words:
|
||||
self.dictionary.add_word(word)
|
||||
|
||||
# Tokenize the file content
|
||||
ids = torch.LongTensor(tokens)
|
||||
token = 0
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
words = line.split() + ['<eos>']
|
||||
for word in words:
|
||||
ids[token] = self.dictionary.word2idx[word]
|
||||
token += 1
|
||||
num_batches = ids.size(0) // batch_size
|
||||
ids = ids[:num_batches*batch_size]
|
||||
return ids.view(batch_size, -1)
|
@ -1,122 +0,0 @@
|
||||
# Some part of the code was referenced from below.
|
||||
# https://github.com/pytorch/examples/tree/master/word_language_model
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
from torch.autograd import Variable
|
||||
from data_utils import Dictionary, Corpus
|
||||
|
||||
# Hyper Parameters
|
||||
embed_size = 128
|
||||
hidden_size = 1024
|
||||
num_layers = 1
|
||||
num_epochs = 5
|
||||
num_samples = 1000 # number of words to be sampled
|
||||
batch_size = 20
|
||||
seq_length = 30
|
||||
learning_rate = 0.002
|
||||
|
||||
# Load Penn Treebank Dataset
|
||||
train_path = './data/train.txt'
|
||||
sample_path = './sample.txt'
|
||||
corpus = Corpus()
|
||||
ids = corpus.get_data(train_path, batch_size)
|
||||
vocab_size = len(corpus.dictionary)
|
||||
num_batches = ids.size(1) // seq_length
|
||||
|
||||
# RNN Based Language Model
|
||||
class RNNLM(nn.Module):
|
||||
def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
|
||||
super(RNNLM, self).__init__()
|
||||
self.embed = nn.Embedding(vocab_size, embed_size)
|
||||
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
|
||||
self.linear = nn.Linear(hidden_size, vocab_size)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
self.embed.weight.data.uniform_(-0.1, 0.1)
|
||||
self.linear.bias.data.fill_(0)
|
||||
self.linear.weight.data.uniform_(-0.1, 0.1)
|
||||
|
||||
def forward(self, x, h):
|
||||
# Embed word ids to vectors
|
||||
x = self.embed(x)
|
||||
|
||||
# Forward propagate RNN
|
||||
out, h = self.lstm(x, h)
|
||||
|
||||
# Reshape output to (batch_size*sequence_length, hidden_size)
|
||||
out = out.contiguous().view(out.size(0)*out.size(1), out.size(2))
|
||||
|
||||
# Decode hidden states of all time step
|
||||
out = self.linear(out)
|
||||
return out, h
|
||||
|
||||
model = RNNLM(vocab_size, embed_size, hidden_size, num_layers)
|
||||
model.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Truncated Backpropagation
|
||||
def detach(states):
|
||||
return [state.detach() for state in states]
|
||||
|
||||
# Training
|
||||
for epoch in range(num_epochs):
|
||||
# Initial hidden and memory states
|
||||
states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda(),
|
||||
Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda())
|
||||
|
||||
for i in range(0, ids.size(1) - seq_length, seq_length):
|
||||
# Get batch inputs and targets
|
||||
inputs = Variable(ids[:, i:i+seq_length]).cuda()
|
||||
targets = Variable(ids[:, (i+1):(i+1)+seq_length].contiguous()).cuda()
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
model.zero_grad()
|
||||
states = detach(states)
|
||||
outputs, states = model(inputs, states)
|
||||
loss = criterion(outputs, targets.view(-1))
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm(model.parameters(), 0.5)
|
||||
optimizer.step()
|
||||
|
||||
step = (i+1) // seq_length
|
||||
if step % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' %
|
||||
(epoch+1, num_epochs, step, num_batches, loss.data[0], np.exp(loss.data[0])))
|
||||
|
||||
# Sampling
|
||||
with open(sample_path, 'w') as f:
|
||||
# Set intial hidden ane memory states
|
||||
state = (Variable(torch.zeros(num_layers, 1, hidden_size)).cuda(),
|
||||
Variable(torch.zeros(num_layers, 1, hidden_size)).cuda())
|
||||
|
||||
# Select one word id randomly
|
||||
prob = torch.ones(vocab_size)
|
||||
input = Variable(torch.multinomial(prob, num_samples=1).unsqueeze(1),
|
||||
volatile=True).cuda()
|
||||
|
||||
for i in range(num_samples):
|
||||
# Forward propagate rnn
|
||||
output, state = model(input, state)
|
||||
|
||||
# Sample a word id
|
||||
prob = output.squeeze().data.exp().cpu()
|
||||
word_id = torch.multinomial(prob, 1)[0]
|
||||
|
||||
# Feed sampled word id to next time step
|
||||
input.data.fill_(word_id)
|
||||
|
||||
# File write
|
||||
word = corpus.dictionary.idx2word[word_id]
|
||||
word = '\n' if word == '<eos>' else word + ' '
|
||||
f.write(word)
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print('Sampled [%d/%d] words and save to %s'%(i+1, num_samples, sample_path))
|
||||
|
||||
# Save the Trained Model
|
||||
torch.save(model.state_dict(), 'model.pkl')
|
@ -1,122 +0,0 @@
|
||||
# Some part of the code was referenced from below.
|
||||
# https://github.com/pytorch/examples/tree/master/word_language_model
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
from torch.autograd import Variable
|
||||
from data_utils import Dictionary, Corpus
|
||||
|
||||
# Hyper Parameters
|
||||
embed_size = 128
|
||||
hidden_size = 1024
|
||||
num_layers = 1
|
||||
num_epochs = 5
|
||||
num_samples = 1000 # number of words to be sampled
|
||||
batch_size = 20
|
||||
seq_length = 30
|
||||
learning_rate = 0.002
|
||||
|
||||
# Load Penn Treebank Dataset
|
||||
train_path = './data/train.txt'
|
||||
sample_path = './sample.txt'
|
||||
corpus = Corpus()
|
||||
ids = corpus.get_data(train_path, batch_size)
|
||||
vocab_size = len(corpus.dictionary)
|
||||
num_batches = ids.size(1) // seq_length
|
||||
|
||||
# RNN Based Language Model
|
||||
class RNNLM(nn.Module):
|
||||
def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
|
||||
super(RNNLM, self).__init__()
|
||||
self.embed = nn.Embedding(vocab_size, embed_size)
|
||||
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
|
||||
self.linear = nn.Linear(hidden_size, vocab_size)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
self.embed.weight.data.uniform_(-0.1, 0.1)
|
||||
self.linear.bias.data.fill_(0)
|
||||
self.linear.weight.data.uniform_(-0.1, 0.1)
|
||||
|
||||
def forward(self, x, h):
|
||||
# Embed word ids to vectors
|
||||
x = self.embed(x)
|
||||
|
||||
# Forward propagate RNN
|
||||
out, h = self.lstm(x, h)
|
||||
|
||||
# Reshape output to (batch_size*sequence_length, hidden_size)
|
||||
out = out.contiguous().view(out.size(0)*out.size(1), out.size(2))
|
||||
|
||||
# Decode hidden states of all time step
|
||||
out = self.linear(out)
|
||||
return out, h
|
||||
|
||||
model = RNNLM(vocab_size, embed_size, hidden_size, num_layers)
|
||||
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Truncated Backpropagation
|
||||
def detach(states):
|
||||
return [state.detach() for state in states]
|
||||
|
||||
# Training
|
||||
for epoch in range(num_epochs):
|
||||
# Initial hidden and memory states
|
||||
states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)),
|
||||
Variable(torch.zeros(num_layers, batch_size, hidden_size)))
|
||||
|
||||
for i in range(0, ids.size(1) - seq_length, seq_length):
|
||||
# Get batch inputs and targets
|
||||
inputs = Variable(ids[:, i:i+seq_length])
|
||||
targets = Variable(ids[:, (i+1):(i+1)+seq_length].contiguous())
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
model.zero_grad()
|
||||
states = detach(states)
|
||||
outputs, states = model(inputs, states)
|
||||
loss = criterion(outputs, targets.view(-1))
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm(model.parameters(), 0.5)
|
||||
optimizer.step()
|
||||
|
||||
step = (i+1) // seq_length
|
||||
if step % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' %
|
||||
(epoch+1, num_epochs, step, num_batches, loss.data[0], np.exp(loss.data[0])))
|
||||
|
||||
# Sampling
|
||||
with open(sample_path, 'w') as f:
|
||||
# Set intial hidden ane memory states
|
||||
state = (Variable(torch.zeros(num_layers, 1, hidden_size)),
|
||||
Variable(torch.zeros(num_layers, 1, hidden_size)))
|
||||
|
||||
# Select one word id randomly
|
||||
prob = torch.ones(vocab_size)
|
||||
input = Variable(torch.multinomial(prob, num_samples=1).unsqueeze(1),
|
||||
volatile=True)
|
||||
|
||||
for i in range(num_samples):
|
||||
# Forward propagate rnn
|
||||
output, state = model(input, state)
|
||||
|
||||
# Sample a word id
|
||||
prob = output.squeeze().data.exp()
|
||||
word_id = torch.multinomial(prob, 1)[0]
|
||||
|
||||
# Feed sampled word id to next time step
|
||||
input.data.fill_(word_id)
|
||||
|
||||
# File write
|
||||
word = corpus.dictionary.idx2word[word_id]
|
||||
word = '\n' if word == '<eos>' else word + ' '
|
||||
f.write(word)
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print('Sampled [%d/%d] words and save to %s'%(i+1, num_samples, sample_path))
|
||||
|
||||
# Save the Trained Model
|
||||
torch.save(model.state_dict(), 'model.pkl')
|
@ -1,95 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
sequence_length = 28
|
||||
input_size = 28
|
||||
hidden_size = 128
|
||||
num_layers = 2
|
||||
num_classes = 10
|
||||
batch_size = 100
|
||||
num_epochs = 2
|
||||
learning_rate = 0.01
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# RNN Model (Many-to-One)
|
||||
class RNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(RNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial states
|
||||
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda())
|
||||
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda())
|
||||
|
||||
# Forward propagate RNN
|
||||
out, _ = self.lstm(x, (h0, c0))
|
||||
|
||||
# Decode hidden state of last time step
|
||||
out = self.fc(out[:, -1, :])
|
||||
return out
|
||||
|
||||
rnn = RNN(input_size, hidden_size, num_layers, num_classes)
|
||||
rnn.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images.view(-1, sequence_length, input_size)).cuda()
|
||||
labels = Variable(labels).cuda()
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = rnn(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, sequence_length, input_size)).cuda()
|
||||
outputs = rnn(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted.cpu() == labels).sum()
|
||||
|
||||
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(rnn.state_dict(), 'rnn.pkl')
|
@ -1,95 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
# Hyper Parameters
|
||||
sequence_length = 28
|
||||
input_size = 28
|
||||
hidden_size = 128
|
||||
num_layers = 2
|
||||
num_classes = 10
|
||||
batch_size = 100
|
||||
num_epochs = 2
|
||||
learning_rate = 0.01
|
||||
|
||||
# MNIST Dataset
|
||||
train_dataset = dsets.MNIST(root='./data/',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
test_dataset = dsets.MNIST(root='./data/',
|
||||
train=False,
|
||||
transform=transforms.ToTensor())
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True)
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False)
|
||||
|
||||
# RNN Model (Many-to-One)
|
||||
class RNN(nn.Module):
|
||||
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
||||
super(RNN, self).__init__()
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
|
||||
self.fc = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
# Set initial states
|
||||
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
|
||||
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
|
||||
|
||||
# Forward propagate RNN
|
||||
out, _ = self.lstm(x, (h0, c0))
|
||||
|
||||
# Decode hidden state of last time step
|
||||
out = self.fc(out[:, -1, :])
|
||||
return out
|
||||
|
||||
rnn = RNN(input_size, hidden_size, num_layers, num_classes)
|
||||
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the Model
|
||||
for epoch in range(num_epochs):
|
||||
for i, (images, labels) in enumerate(train_loader):
|
||||
images = Variable(images.view(-1, sequence_length, input_size))
|
||||
labels = Variable(labels)
|
||||
|
||||
# Forward + Backward + Optimize
|
||||
optimizer.zero_grad()
|
||||
outputs = rnn(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (i+1) % 100 == 0:
|
||||
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
|
||||
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
|
||||
|
||||
# Test the Model
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images = Variable(images.view(-1, sequence_length, input_size))
|
||||
outputs = rnn(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
|
||||
|
||||
# Save the Model
|
||||
torch.save(rnn.state_dict(), 'rnn.pkl')
|
@ -1,41 +0,0 @@
|
||||
## Deep Convolutional GAN
|
||||
[Generative Adversarial Network](https://arxiv.org/abs/1406.2661) is a generative model that contains a discriminator and a generator. The discriminator is a binary classifier that is trained to classify the real image as real and the fake image as fake. The discriminator is trained to assign 1 to the real image and 0 to the fake image.The generator is a generative model that creates an image from the latent code. The generator is trained to generate an image that can not be distinguishable from the real image in order to deceive the discriminator.
|
||||
|
||||
In the [Deep Convolutional GAN(DCGAN)](https://arxiv.org/abs/1511.06434), the authors introduce architecture guidlines for stable GAN training. They replace any pooling layers with strided convolutions (for the discriminator) and fractional-strided convolutions (for the generator) and use batchnorm in both the discriminator and the generator. In addition, they use ReLU activation in the generator and LeakyReLU activation in the discriminator. However, in our case, we use LeakyReLU activation in both models to avoid sparse gradients.
|
||||
|
||||

|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
#### 1. Install the dependencies
|
||||
```bash
|
||||
$ pip install -r requirements.txt
|
||||
```
|
||||
|
||||
#### 2. Download the dataset
|
||||
```bash
|
||||
$ chmod +x download.sh
|
||||
$ ./download.sh
|
||||
```
|
||||
|
||||
#### 3. Train the model
|
||||
```bash
|
||||
$ python main.py --mode='train'
|
||||
```
|
||||
|
||||
#### 3. Sample the images
|
||||
```bash
|
||||
$ python main.py --mode='sample'
|
||||
```
|
||||
|
||||
|
||||
|
||||
<br>
|
||||
|
||||
## Results
|
||||
|
||||
The following is the result on the CelebA dataset.
|
||||
|
||||

|
||||

|
@ -1,43 +0,0 @@
|
||||
import os
|
||||
from torch.utils import data
|
||||
from torchvision import transforms
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ImageFolder(data.Dataset):
|
||||
"""Custom Dataset compatible with prebuilt DataLoader.
|
||||
|
||||
This is just for tutorial. You can use the prebuilt torchvision.datasets.ImageFolder.
|
||||
"""
|
||||
def __init__(self, root, transform=None):
|
||||
"""Initializes image paths and preprocessing module."""
|
||||
self.image_paths = list(map(lambda x: os.path.join(root, x), os.listdir(root)))
|
||||
self.transform = transform
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Reads an image from a file and preprocesses it and returns."""
|
||||
image_path = self.image_paths[index]
|
||||
image = Image.open(image_path).convert('RGB')
|
||||
if self.transform is not None:
|
||||
image = self.transform(image)
|
||||
return image
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the total number of image files."""
|
||||
return len(self.image_paths)
|
||||
|
||||
|
||||
def get_loader(image_path, image_size, batch_size, num_workers=2):
|
||||
"""Builds and returns Dataloader."""
|
||||
|
||||
transform = transforms.Compose([
|
||||
transforms.Scale(image_size),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
||||
|
||||
dataset = ImageFolder(image_path, transform)
|
||||
data_loader = data.DataLoader(dataset=dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True,
|
||||
num_workers=num_workers)
|
||||
return data_loader
|
@ -1,2 +0,0 @@
|
||||
wget https://www.dropbox.com/s/e0ig4nf1v94hyj8/CelebA_128crop_FD.zip?dl=0 -P ./
|
||||
unzip CelebA_128crop_FD.zip -d ./
|
@ -1,58 +0,0 @@
|
||||
import argparse
|
||||
import os
|
||||
from solver import Solver
|
||||
from data_loader import get_loader
|
||||
from torch.backends import cudnn
|
||||
|
||||
|
||||
def main(config):
|
||||
cudnn.benchmark = True
|
||||
|
||||
data_loader = get_loader(image_path=config.image_path,
|
||||
image_size=config.image_size,
|
||||
batch_size=config.batch_size,
|
||||
num_workers=config.num_workers)
|
||||
|
||||
solver = Solver(config, data_loader)
|
||||
|
||||
# Create directories if not exist
|
||||
if not os.path.exists(config.model_path):
|
||||
os.makedirs(config.model_path)
|
||||
if not os.path.exists(config.sample_path):
|
||||
os.makedirs(config.sample_path)
|
||||
|
||||
# Train and sample the images
|
||||
if config.mode == 'train':
|
||||
solver.train()
|
||||
elif config.mode == 'sample':
|
||||
solver.sample()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
# model hyper-parameters
|
||||
parser.add_argument('--image_size', type=int, default=64)
|
||||
parser.add_argument('--z_dim', type=int, default=100)
|
||||
parser.add_argument('--g_conv_dim', type=int, default=64)
|
||||
parser.add_argument('--d_conv_dim', type=int, default=64)
|
||||
|
||||
# training hyper-parameters
|
||||
parser.add_argument('--num_epochs', type=int, default=20)
|
||||
parser.add_argument('--batch_size', type=int, default=32)
|
||||
parser.add_argument('--sample_size', type=int, default=100)
|
||||
parser.add_argument('--num_workers', type=int, default=2)
|
||||
parser.add_argument('--lr', type=float, default=0.0002)
|
||||
parser.add_argument('--beta1', type=float, default=0.5) # momentum1 in Adam
|
||||
parser.add_argument('--beta2', type=float, default=0.999) # momentum2 in Adam
|
||||
|
||||
# misc
|
||||
parser.add_argument('--mode', type=str, default='train')
|
||||
parser.add_argument('--model_path', type=str, default='./models')
|
||||
parser.add_argument('--sample_path', type=str, default='./samples')
|
||||
parser.add_argument('--image_path', type=str, default='./CelebA/128_crop')
|
||||
parser.add_argument('--log_step', type=int , default=10)
|
||||
parser.add_argument('--sample_step', type=int , default=500)
|
||||
|
||||
config = parser.parse_args()
|
||||
print(config)
|
||||
main(config)
|
@ -1,59 +0,0 @@
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def deconv(c_in, c_out, k_size, stride=2, pad=1, bn=True):
|
||||
"""Custom deconvolutional layer for simplicity."""
|
||||
layers = []
|
||||
layers.append(nn.ConvTranspose2d(c_in, c_out, k_size, stride, pad))
|
||||
if bn:
|
||||
layers.append(nn.BatchNorm2d(c_out))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
"""Generator containing 7 deconvolutional layers."""
|
||||
def __init__(self, z_dim=256, image_size=128, conv_dim=64):
|
||||
super(Generator, self).__init__()
|
||||
self.fc = deconv(z_dim, conv_dim*8, int(image_size/16), 1, 0, bn=False)
|
||||
self.deconv1 = deconv(conv_dim*8, conv_dim*4, 4)
|
||||
self.deconv2 = deconv(conv_dim*4, conv_dim*2, 4)
|
||||
self.deconv3 = deconv(conv_dim*2, conv_dim, 4)
|
||||
self.deconv4 = deconv(conv_dim, 3, 4, bn=False)
|
||||
|
||||
def forward(self, z):
|
||||
z = z.view(z.size(0), z.size(1), 1, 1) # If image_size is 64, output shape is as below.
|
||||
out = self.fc(z) # (?, 512, 4, 4)
|
||||
out = F.leaky_relu(self.deconv1(out), 0.05) # (?, 256, 8, 8)
|
||||
out = F.leaky_relu(self.deconv2(out), 0.05) # (?, 128, 16, 16)
|
||||
out = F.leaky_relu(self.deconv3(out), 0.05) # (?, 64, 32, 32)
|
||||
out = F.tanh(self.deconv4(out)) # (?, 3, 64, 64)
|
||||
return out
|
||||
|
||||
|
||||
def conv(c_in, c_out, k_size, stride=2, pad=1, bn=True):
|
||||
"""Custom convolutional layer for simplicity."""
|
||||
layers = []
|
||||
layers.append(nn.Conv2d(c_in, c_out, k_size, stride, pad))
|
||||
if bn:
|
||||
layers.append(nn.BatchNorm2d(c_out))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
class Discriminator(nn.Module):
|
||||
"""Discriminator containing 4 convolutional layers."""
|
||||
def __init__(self, image_size=128, conv_dim=64):
|
||||
super(Discriminator, self).__init__()
|
||||
self.conv1 = conv(3, conv_dim, 4, bn=False)
|
||||
self.conv2 = conv(conv_dim, conv_dim*2, 4)
|
||||
self.conv3 = conv(conv_dim*2, conv_dim*4, 4)
|
||||
self.conv4 = conv(conv_dim*4, conv_dim*8, 4)
|
||||
self.fc = conv(conv_dim*8, 1, int(image_size/16), 1, 0, False)
|
||||
|
||||
def forward(self, x): # If image_size is 64, output shape is as below.
|
||||
out = F.leaky_relu(self.conv1(x), 0.05) # (?, 64, 32, 32)
|
||||
out = F.leaky_relu(self.conv2(out), 0.05) # (?, 128, 16, 16)
|
||||
out = F.leaky_relu(self.conv3(out), 0.05) # (?, 256, 8, 8)
|
||||
out = F.leaky_relu(self.conv4(out), 0.05) # (?, 512, 4, 4)
|
||||
out = self.fc(out).squeeze()
|
||||
return out
|
Before Width: | Height: | Size: 264 KiB |
Before Width: | Height: | Size: 992 KiB |
Before Width: | Height: | Size: 984 KiB |
@ -1,4 +0,0 @@
|
||||
torch
|
||||
torchvision
|
||||
Pillow
|
||||
argparse
|
@ -1,147 +0,0 @@
|
||||
import torch
|
||||
import torchvision
|
||||
import os
|
||||
from torch import optim
|
||||
from torch.autograd import Variable
|
||||
from model import Discriminator
|
||||
from model import Generator
|
||||
|
||||
|
||||
class Solver(object):
|
||||
def __init__(self, config, data_loader):
|
||||
self.generator = None
|
||||
self.discriminator = None
|
||||
self.g_optimizer = None
|
||||
self.d_optimizer = None
|
||||
self.g_conv_dim = config.g_conv_dim
|
||||
self.d_conv_dim = config.d_conv_dim
|
||||
self.z_dim = config.z_dim
|
||||
self.beta1 = config.beta1
|
||||
self.beta2 = config.beta2
|
||||
self.image_size = config.image_size
|
||||
self.data_loader = data_loader
|
||||
self.num_epochs = config.num_epochs
|
||||
self.batch_size = config.batch_size
|
||||
self.sample_size = config.sample_size
|
||||
self.lr = config.lr
|
||||
self.log_step = config.log_step
|
||||
self.sample_step = config.sample_step
|
||||
self.sample_path = config.sample_path
|
||||
self.model_path = config.model_path
|
||||
self.build_model()
|
||||
|
||||
def build_model(self):
|
||||
"""Build generator and discriminator."""
|
||||
self.generator = Generator(z_dim=self.z_dim,
|
||||
image_size=self.image_size,
|
||||
conv_dim=self.g_conv_dim)
|
||||
self.discriminator = Discriminator(image_size=self.image_size,
|
||||
conv_dim=self.d_conv_dim)
|
||||
self.g_optimizer = optim.Adam(self.generator.parameters(),
|
||||
self.lr, [self.beta1, self.beta2])
|
||||
self.d_optimizer = optim.Adam(self.discriminator.parameters(),
|
||||
self.lr, [self.beta1, self.beta2])
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self.generator.cuda()
|
||||
self.discriminator.cuda()
|
||||
|
||||
def to_variable(self, x):
|
||||
"""Convert tensor to variable."""
|
||||
if torch.cuda.is_available():
|
||||
x = x.cuda()
|
||||
return Variable(x)
|
||||
|
||||
def to_data(self, x):
|
||||
"""Convert variable to tensor."""
|
||||
if torch.cuda.is_available():
|
||||
x = x.cpu()
|
||||
return x.data
|
||||
|
||||
def reset_grad(self):
|
||||
"""Zero the gradient buffers."""
|
||||
self.discriminator.zero_grad()
|
||||
self.generator.zero_grad()
|
||||
|
||||
def denorm(self, x):
|
||||
"""Convert range (-1, 1) to (0, 1)"""
|
||||
out = (x + 1) / 2
|
||||
return out.clamp(0, 1)
|
||||
|
||||
def train(self):
|
||||
"""Train generator and discriminator."""
|
||||
fixed_noise = self.to_variable(torch.randn(self.batch_size, self.z_dim))
|
||||
total_step = len(self.data_loader)
|
||||
for epoch in range(self.num_epochs):
|
||||
for i, images in enumerate(self.data_loader):
|
||||
|
||||
#===================== Train D =====================#
|
||||
images = self.to_variable(images)
|
||||
batch_size = images.size(0)
|
||||
noise = self.to_variable(torch.randn(batch_size, self.z_dim))
|
||||
|
||||
# Train D to recognize real images as real.
|
||||
outputs = self.discriminator(images)
|
||||
real_loss = torch.mean((outputs - 1) ** 2) # L2 loss instead of Binary cross entropy loss (this is optional for stable training)
|
||||
|
||||
# Train D to recognize fake images as fake.
|
||||
fake_images = self.generator(noise)
|
||||
outputs = self.discriminator(fake_images)
|
||||
fake_loss = torch.mean(outputs ** 2)
|
||||
|
||||
# Backprop + optimize
|
||||
d_loss = real_loss + fake_loss
|
||||
self.reset_grad()
|
||||
d_loss.backward()
|
||||
self.d_optimizer.step()
|
||||
|
||||
#===================== Train G =====================#
|
||||
noise = self.to_variable(torch.randn(batch_size, self.z_dim))
|
||||
|
||||
# Train G so that D recognizes G(z) as real.
|
||||
fake_images = self.generator(noise)
|
||||
outputs = self.discriminator(fake_images)
|
||||
g_loss = torch.mean((outputs - 1) ** 2)
|
||||
|
||||
# Backprop + optimize
|
||||
self.reset_grad()
|
||||
g_loss.backward()
|
||||
self.g_optimizer.step()
|
||||
|
||||
# print the log info
|
||||
if (i+1) % self.log_step == 0:
|
||||
print('Epoch [%d/%d], Step[%d/%d], d_real_loss: %.4f, '
|
||||
'd_fake_loss: %.4f, g_loss: %.4f'
|
||||
%(epoch+1, self.num_epochs, i+1, total_step,
|
||||
real_loss.data[0], fake_loss.data[0], g_loss.data[0]))
|
||||
|
||||
# save the sampled images
|
||||
if (i+1) % self.sample_step == 0:
|
||||
fake_images = self.generator(fixed_noise)
|
||||
torchvision.utils.save_image(self.denorm(fake_images.data),
|
||||
os.path.join(self.sample_path,
|
||||
'fake_samples-%d-%d.png' %(epoch+1, i+1)))
|
||||
|
||||
# save the model parameters for each epoch
|
||||
g_path = os.path.join(self.model_path, 'generator-%d.pkl' %(epoch+1))
|
||||
d_path = os.path.join(self.model_path, 'discriminator-%d.pkl' %(epoch+1))
|
||||
torch.save(self.generator.state_dict(), g_path)
|
||||
torch.save(self.discriminator.state_dict(), d_path)
|
||||
|
||||
def sample(self):
|
||||
|
||||
# Load trained parameters
|
||||
g_path = os.path.join(self.model_path, 'generator-%d.pkl' %(self.num_epochs))
|
||||
d_path = os.path.join(self.model_path, 'discriminator-%d.pkl' %(self.num_epochs))
|
||||
self.generator.load_state_dict(torch.load(g_path))
|
||||
self.discriminator.load_state_dict(torch.load(d_path))
|
||||
self.generator.eval()
|
||||
self.discriminator.eval()
|
||||
|
||||
# Sample the images
|
||||
noise = self.to_variable(torch.randn(self.sample_size, self.z_dim))
|
||||
fake_images = self.generator(noise)
|
||||
sample_path = os.path.join(self.sample_path, 'fake_samples-final.png')
|
||||
torchvision.utils.save_image(self.denorm(fake_images.data), sample_path, nrow=12)
|
||||
|
||||
print("Saved sampled images to '%s'" %sample_path)
|
@ -1,59 +0,0 @@
|
||||
# Image Captioning
|
||||
The goal of image captioning is to convert a given input image into a natural language description. The encoder-decoder framework is widely used for this task. The image encoder is a convolutional neural network (CNN). In this tutorial, we used [resnet-152](https://arxiv.org/abs/1512.03385) model pretrained on the [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) image classification dataset. The decoder is a long short-term memory (LSTM) network.
|
||||
|
||||

|
||||
|
||||
#### Training phase
|
||||
For the encoder part, the pretrained CNN extracts the feature vector from a given input image. The feature vector is linearly transformed to have the same dimension as the input dimension of the LSTM network. For the decoder part, source and target texts are predefined. For example, if the image description is **"Giraffes standing next to each other"**, the source sequence is a list containing **['\<start\>', 'Giraffes', 'standing', 'next', 'to', 'each', 'other']** and the target sequence is a list containing **['Giraffes', 'standing', 'next', 'to', 'each', 'other', '\<end\>']**. Using these source and target sequences and the feature vector, the LSTM decoder is trained as a language model conditioned on the feature vector.
|
||||
|
||||
#### Test phase
|
||||
In the test phase, the encoder part is almost same as the training phase. The only difference is that batchnorm layer uses moving average and variance instead of mini-batch statistics. This can be easily implemented using [encoder.eval()](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/image_captioning/sample.py#L41). For the decoder part, there is a significant difference between the training phase and the test phase. In the test phase, the LSTM decoder can't see the image description. To deal with this problem, the LSTM decoder feeds back the previosly generated word to the next input. This can be implemented using a [for-loop](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/image_captioning/model.py#L57-L68).
|
||||
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
#### 1. Clone the repositories
|
||||
```bash
|
||||
$ git clone https://github.com/pdollar/coco.git
|
||||
$ cd coco/PythonAPI/
|
||||
$ make
|
||||
$ python setup.py build
|
||||
$ python setup.py install
|
||||
$ cd ../../
|
||||
$ git clone https://github.com/yunjey/pytorch-tutorial.git
|
||||
$ cd pytorch-tutorial/tutorials/03-advanced/image_captioning/
|
||||
```
|
||||
|
||||
#### 2. Download the dataset
|
||||
|
||||
```bash
|
||||
$ pip install -r requirements.txt
|
||||
$ chmod +x download.sh
|
||||
$ ./download.sh
|
||||
```
|
||||
|
||||
#### 3. Preprocessing
|
||||
|
||||
```bash
|
||||
$ python build_vocab.py
|
||||
$ python resize.py
|
||||
```
|
||||
|
||||
#### 4. Train the model
|
||||
|
||||
```bash
|
||||
$ python train.py
|
||||
```
|
||||
|
||||
#### 5. Test the model
|
||||
|
||||
```bash
|
||||
$ python sample.py --image='png/example.png'
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
## Pretrained model
|
||||
If you do not want to train the model from scratch, you can use a pretrained model. You can download the pretrained model [here](https://www.dropbox.com/s/ne0ixz5d58ccbbz/pretrained_model.zip?dl=0) and the vocabulary file [here](https://www.dropbox.com/s/26adb7y9m98uisa/vocap.zip?dl=0). You should extract pretrained_model.zip to `./models/` and vocab.pkl to `./data/` using `unzip` command.
|
@ -1,77 +0,0 @@
|
||||
import nltk
|
||||
import pickle
|
||||
import argparse
|
||||
from collections import Counter
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
|
||||
class Vocabulary(object):
|
||||
"""Simple vocabulary wrapper."""
|
||||
def __init__(self):
|
||||
self.word2idx = {}
|
||||
self.idx2word = {}
|
||||
self.idx = 0
|
||||
|
||||
def add_word(self, word):
|
||||
if not word in self.word2idx:
|
||||
self.word2idx[word] = self.idx
|
||||
self.idx2word[self.idx] = word
|
||||
self.idx += 1
|
||||
|
||||
def __call__(self, word):
|
||||
if not word in self.word2idx:
|
||||
return self.word2idx['<unk>']
|
||||
return self.word2idx[word]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.word2idx)
|
||||
|
||||
def build_vocab(json, threshold):
|
||||
"""Build a simple vocabulary wrapper."""
|
||||
coco = COCO(json)
|
||||
counter = Counter()
|
||||
ids = coco.anns.keys()
|
||||
for i, id in enumerate(ids):
|
||||
caption = str(coco.anns[id]['caption'])
|
||||
tokens = nltk.tokenize.word_tokenize(caption.lower())
|
||||
counter.update(tokens)
|
||||
|
||||
if i % 1000 == 0:
|
||||
print("[%d/%d] Tokenized the captions." %(i, len(ids)))
|
||||
|
||||
# If the word frequency is less than 'threshold', then the word is discarded.
|
||||
words = [word for word, cnt in counter.items() if cnt >= threshold]
|
||||
|
||||
# Creates a vocab wrapper and add some special tokens.
|
||||
vocab = Vocabulary()
|
||||
vocab.add_word('<pad>')
|
||||
vocab.add_word('<start>')
|
||||
vocab.add_word('<end>')
|
||||
vocab.add_word('<unk>')
|
||||
|
||||
# Adds the words to the vocabulary.
|
||||
for i, word in enumerate(words):
|
||||
vocab.add_word(word)
|
||||
return vocab
|
||||
|
||||
def main(args):
|
||||
vocab = build_vocab(json=args.caption_path,
|
||||
threshold=args.threshold)
|
||||
vocab_path = args.vocab_path
|
||||
with open(vocab_path, 'wb') as f:
|
||||
pickle.dump(vocab, f)
|
||||
print("Total vocabulary size: %d" %len(vocab))
|
||||
print("Saved the vocabulary wrapper to '%s'" %vocab_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--caption_path', type=str,
|
||||
default='/usr/share/mscoco/annotations/captions_train2014.json',
|
||||
help='path for train annotation file')
|
||||
parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl',
|
||||
help='path for saving vocabulary wrapper')
|
||||
parser.add_argument('--threshold', type=int, default=4,
|
||||
help='minimum word count threshold')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
@ -1,106 +0,0 @@
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
import torch.utils.data as data
|
||||
import os
|
||||
import pickle
|
||||
import numpy as np
|
||||
import nltk
|
||||
from PIL import Image
|
||||
from build_vocab import Vocabulary
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
|
||||
class CocoDataset(data.Dataset):
|
||||
"""COCO Custom Dataset compatible with torch.utils.data.DataLoader."""
|
||||
def __init__(self, root, json, vocab, transform=None):
|
||||
"""Set the path for images, captions and vocabulary wrapper.
|
||||
|
||||
Args:
|
||||
root: image directory.
|
||||
json: coco annotation file path.
|
||||
vocab: vocabulary wrapper.
|
||||
transform: image transformer.
|
||||
"""
|
||||
self.root = root
|
||||
self.coco = COCO(json)
|
||||
self.ids = list(self.coco.anns.keys())
|
||||
self.vocab = vocab
|
||||
self.transform = transform
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Returns one data pair (image and caption)."""
|
||||
coco = self.coco
|
||||
vocab = self.vocab
|
||||
ann_id = self.ids[index]
|
||||
caption = coco.anns[ann_id]['caption']
|
||||
img_id = coco.anns[ann_id]['image_id']
|
||||
path = coco.loadImgs(img_id)[0]['file_name']
|
||||
|
||||
image = Image.open(os.path.join(self.root, path)).convert('RGB')
|
||||
if self.transform is not None:
|
||||
image = self.transform(image)
|
||||
|
||||
# Convert caption (string) to word ids.
|
||||
tokens = nltk.tokenize.word_tokenize(str(caption).lower())
|
||||
caption = []
|
||||
caption.append(vocab('<start>'))
|
||||
caption.extend([vocab(token) for token in tokens])
|
||||
caption.append(vocab('<end>'))
|
||||
target = torch.Tensor(caption)
|
||||
return image, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.ids)
|
||||
|
||||
|
||||
def collate_fn(data):
|
||||
"""Creates mini-batch tensors from the list of tuples (image, caption).
|
||||
|
||||
We should build custom collate_fn rather than using default collate_fn,
|
||||
because merging caption (including padding) is not supported in default.
|
||||
|
||||
Args:
|
||||
data: list of tuple (image, caption).
|
||||
- image: torch tensor of shape (3, 256, 256).
|
||||
- caption: torch tensor of shape (?); variable length.
|
||||
|
||||
Returns:
|
||||
images: torch tensor of shape (batch_size, 3, 256, 256).
|
||||
targets: torch tensor of shape (batch_size, padded_length).
|
||||
lengths: list; valid length for each padded caption.
|
||||
"""
|
||||
# Sort a data list by caption length (descending order).
|
||||
data.sort(key=lambda x: len(x[1]), reverse=True)
|
||||
images, captions = zip(*data)
|
||||
|
||||
# Merge images (from tuple of 3D tensor to 4D tensor).
|
||||
images = torch.stack(images, 0)
|
||||
|
||||
# Merge captions (from tuple of 1D tensor to 2D tensor).
|
||||
lengths = [len(cap) for cap in captions]
|
||||
targets = torch.zeros(len(captions), max(lengths)).long()
|
||||
for i, cap in enumerate(captions):
|
||||
end = lengths[i]
|
||||
targets[i, :end] = cap[:end]
|
||||
return images, targets, lengths
|
||||
|
||||
|
||||
def get_loader(root, json, vocab, transform, batch_size, shuffle, num_workers):
|
||||
"""Returns torch.utils.data.DataLoader for custom coco dataset."""
|
||||
# COCO caption dataset
|
||||
coco = CocoDataset(root=root,
|
||||
json=json,
|
||||
vocab=vocab,
|
||||
transform=transform)
|
||||
|
||||
# Data loader for COCO dataset
|
||||
# This will return (images, captions, lengths) for every iteration.
|
||||
# images: tensor of shape (batch_size, 3, 224, 224).
|
||||
# captions: tensor of shape (batch_size, padded_length).
|
||||
# lengths: list indicating valid length for each caption. length is (batch_size).
|
||||
data_loader = torch.utils.data.DataLoader(dataset=coco,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle,
|
||||
num_workers=num_workers,
|
||||
collate_fn=collate_fn)
|
||||
return data_loader
|
@ -1,11 +0,0 @@
|
||||
mkdir data
|
||||
wget http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip -P ./data/
|
||||
wget http://msvocds.blob.core.windows.net/coco2014/train2014.zip -P ./data/
|
||||
wget http://msvocds.blob.core.windows.net/coco2014/val2014.zip -P ./data/
|
||||
|
||||
unzip ./data/captions_train-val2014.zip -d ./data/
|
||||
rm ./data/captions_train-val2014.zip
|
||||
unzip ./data/train2014.zip -d ./data/
|
||||
rm ./data/train2014.zip
|
||||
unzip ./data/val2014.zip -d ./data/
|
||||
rm ./data/val2014.zip
|
@ -1,69 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.models as models
|
||||
from torch.nn.utils.rnn import pack_padded_sequence
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
class EncoderCNN(nn.Module):
|
||||
def __init__(self, embed_size):
|
||||
"""Load the pretrained ResNet-152 and replace top fc layer."""
|
||||
super(EncoderCNN, self).__init__()
|
||||
resnet = models.resnet152(pretrained=True)
|
||||
modules = list(resnet.children())[:-1] # delete the last fc layer.
|
||||
self.resnet = nn.Sequential(*modules)
|
||||
self.linear = nn.Linear(resnet.fc.in_features, embed_size)
|
||||
self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
"""Initialize the weights."""
|
||||
self.linear.weight.data.normal_(0.0, 0.02)
|
||||
self.linear.bias.data.fill_(0)
|
||||
|
||||
def forward(self, images):
|
||||
"""Extract the image feature vectors."""
|
||||
features = self.resnet(images)
|
||||
features = Variable(features.data)
|
||||
features = features.view(features.size(0), -1)
|
||||
features = self.bn(self.linear(features))
|
||||
return features
|
||||
|
||||
|
||||
class DecoderRNN(nn.Module):
|
||||
def __init__(self, embed_size, hidden_size, vocab_size, num_layers):
|
||||
"""Set the hyper-parameters and build the layers."""
|
||||
super(DecoderRNN, self).__init__()
|
||||
self.embed = nn.Embedding(vocab_size, embed_size)
|
||||
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
|
||||
self.linear = nn.Linear(hidden_size, vocab_size)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
"""Initialize weights."""
|
||||
self.embed.weight.data.uniform_(-0.1, 0.1)
|
||||
self.linear.weight.data.uniform_(-0.1, 0.1)
|
||||
self.linear.bias.data.fill_(0)
|
||||
|
||||
def forward(self, features, captions, lengths):
|
||||
"""Decode image feature vectors and generates captions."""
|
||||
embeddings = self.embed(captions)
|
||||
embeddings = torch.cat((features.unsqueeze(1), embeddings), 1)
|
||||
packed = pack_padded_sequence(embeddings, lengths, batch_first=True)
|
||||
hiddens, _ = self.lstm(packed)
|
||||
outputs = self.linear(hiddens[0])
|
||||
return outputs
|
||||
|
||||
def sample(self, features, states=None):
|
||||
"""Samples captions for given image features (Greedy search)."""
|
||||
sampled_ids = []
|
||||
inputs = features.unsqueeze(1)
|
||||
for i in range(20): # maximum sampling length
|
||||
hiddens, states = self.lstm(inputs, states) # (batch_size, 1, hidden_size),
|
||||
outputs = self.linear(hiddens.squeeze(1)) # (batch_size, vocab_size)
|
||||
predicted = outputs.max(1)[1]
|
||||
sampled_ids.append(predicted)
|
||||
inputs = self.embed(predicted)
|
||||
inputs = inputs.unsqueeze(1) # (batch_size, 1, embed_size)
|
||||
sampled_ids = torch.cat(sampled_ids, 1) # (batch_size, 20)
|
||||
return sampled_ids.squeeze()
|
Before Width: | Height: | Size: 220 KiB |
Before Width: | Height: | Size: 246 KiB |
Before Width: | Height: | Size: 246 KiB |
@ -1,5 +0,0 @@
|
||||
matplotlib
|
||||
nltk
|
||||
numpy
|
||||
Pillow
|
||||
argparse
|
@ -1,44 +0,0 @@
|
||||
import argparse
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def resize_image(image, size):
|
||||
"""Resize an image to the given size."""
|
||||
return image.resize(size, Image.ANTIALIAS)
|
||||
|
||||
def resize_images(image_dir, output_dir, size):
|
||||
"""Resize the images in 'image_dir' and save into 'output_dir'."""
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
images = os.listdir(image_dir)
|
||||
num_images = len(images)
|
||||
for i, image in enumerate(images):
|
||||
with open(os.path.join(image_dir, image), 'r+b') as f:
|
||||
with Image.open(f) as img:
|
||||
img = resize_image(img, size)
|
||||
img.save(os.path.join(output_dir, image), img.format)
|
||||
if i % 100 == 0:
|
||||
print ("[%d/%d] Resized the images and saved into '%s'."
|
||||
%(i, num_images, output_dir))
|
||||
|
||||
def main(args):
|
||||
splits = ['train', 'val']
|
||||
for split in splits:
|
||||
image_dir = args.image_dir
|
||||
output_dir = args.output_dir
|
||||
image_size = [args.image_size, args.image_size]
|
||||
resize_images(image_dir, output_dir, image_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--image_dir', type=str, default='./data/train2014/',
|
||||
help='directory for train images')
|
||||
parser.add_argument('--output_dir', type=str, default='./data/resized2014/',
|
||||
help='directory for saving resized images')
|
||||
parser.add_argument('--image_size', type=int, default=256,
|
||||
help='size for image after processing')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
@ -1,97 +0,0 @@
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import argparse
|
||||
import pickle
|
||||
import os
|
||||
from torch.autograd import Variable
|
||||
from torchvision import transforms
|
||||
from build_vocab import Vocabulary
|
||||
from model import EncoderCNN, DecoderRNN
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def to_var(x, volatile=False):
|
||||
if torch.cuda.is_available():
|
||||
x = x.cuda()
|
||||
return Variable(x, volatile=volatile)
|
||||
|
||||
def load_image(image_path, transform=None):
|
||||
image = Image.open(image_path)
|
||||
image = image.resize([224, 224], Image.LANCZOS)
|
||||
|
||||
if transform is not None:
|
||||
image = transform(image).unsqueeze(0)
|
||||
|
||||
return image
|
||||
|
||||
def main(args):
|
||||
# Image preprocessing
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406),
|
||||
(0.229, 0.224, 0.225))])
|
||||
|
||||
# Load vocabulary wrapper
|
||||
with open(args.vocab_path, 'rb') as f:
|
||||
vocab = pickle.load(f)
|
||||
|
||||
# Build Models
|
||||
encoder = EncoderCNN(args.embed_size)
|
||||
encoder.eval() # evaluation mode (BN uses moving mean/variance)
|
||||
decoder = DecoderRNN(args.embed_size, args.hidden_size,
|
||||
len(vocab), args.num_layers)
|
||||
|
||||
|
||||
# Load the trained model parameters
|
||||
encoder.load_state_dict(torch.load(args.encoder_path))
|
||||
decoder.load_state_dict(torch.load(args.decoder_path))
|
||||
|
||||
# Prepare Image
|
||||
image = load_image(args.image, transform)
|
||||
image_tensor = to_var(image, volatile=True)
|
||||
|
||||
# If use gpu
|
||||
if torch.cuda.is_available():
|
||||
encoder.cuda()
|
||||
decoder.cuda()
|
||||
|
||||
# Generate caption from image
|
||||
feature = encoder(image_tensor)
|
||||
sampled_ids = decoder.sample(feature)
|
||||
sampled_ids = sampled_ids.cpu().data.numpy()
|
||||
|
||||
# Decode word_ids to words
|
||||
sampled_caption = []
|
||||
for word_id in sampled_ids:
|
||||
word = vocab.idx2word[word_id]
|
||||
sampled_caption.append(word)
|
||||
if word == '<end>':
|
||||
break
|
||||
sentence = ' '.join(sampled_caption)
|
||||
|
||||
# Print out image and generated caption.
|
||||
print (sentence)
|
||||
image = Image.open(args.image)
|
||||
plt.imshow(np.asarray(image))
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--image', type=str, required=True,
|
||||
help='input image for generating caption')
|
||||
parser.add_argument('--encoder_path', type=str, default='./models/encoder-5-3000.pkl',
|
||||
help='path for trained encoder')
|
||||
parser.add_argument('--decoder_path', type=str, default='./models/decoder-5-3000.pkl',
|
||||
help='path for trained decoder')
|
||||
parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl',
|
||||
help='path for vocabulary wrapper')
|
||||
|
||||
# Model parameters (should be same as paramters in train.py)
|
||||
parser.add_argument('--embed_size', type=int , default=256,
|
||||
help='dimension of word embedding vectors')
|
||||
parser.add_argument('--hidden_size', type=int , default=512,
|
||||
help='dimension of lstm hidden states')
|
||||
parser.add_argument('--num_layers', type=int , default=1 ,
|
||||
help='number of layers in lstm')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
@ -1,122 +0,0 @@
|
||||
import argparse
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import os
|
||||
import pickle
|
||||
from data_loader import get_loader
|
||||
from build_vocab import Vocabulary
|
||||
from model import EncoderCNN, DecoderRNN
|
||||
from torch.autograd import Variable
|
||||
from torch.nn.utils.rnn import pack_padded_sequence
|
||||
from torchvision import transforms
|
||||
|
||||
def to_var(x, volatile=False):
|
||||
if torch.cuda.is_available():
|
||||
x = x.cuda()
|
||||
return Variable(x, volatile=volatile)
|
||||
|
||||
def main(args):
|
||||
# Create model directory
|
||||
if not os.path.exists(args.model_path):
|
||||
os.makedirs(args.model_path)
|
||||
|
||||
# Image preprocessing
|
||||
# For normalization, see https://github.com/pytorch/vision#models
|
||||
transform = transforms.Compose([
|
||||
transforms.RandomCrop(args.crop_size),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406),
|
||||
(0.229, 0.224, 0.225))])
|
||||
|
||||
# Load vocabulary wrapper.
|
||||
with open(args.vocab_path, 'rb') as f:
|
||||
vocab = pickle.load(f)
|
||||
|
||||
# Build data loader
|
||||
data_loader = get_loader(args.image_dir, args.caption_path, vocab,
|
||||
transform, args.batch_size,
|
||||
shuffle=True, num_workers=args.num_workers)
|
||||
|
||||
# Build the models
|
||||
encoder = EncoderCNN(args.embed_size)
|
||||
decoder = DecoderRNN(args.embed_size, args.hidden_size,
|
||||
len(vocab), args.num_layers)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
encoder.cuda()
|
||||
decoder.cuda()
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
|
||||
optimizer = torch.optim.Adam(params, lr=args.learning_rate)
|
||||
|
||||
# Train the Models
|
||||
total_step = len(data_loader)
|
||||
for epoch in range(args.num_epochs):
|
||||
for i, (images, captions, lengths) in enumerate(data_loader):
|
||||
|
||||
# Set mini-batch dataset
|
||||
images = to_var(images, volatile=True)
|
||||
captions = to_var(captions)
|
||||
targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]
|
||||
|
||||
# Forward, Backward and Optimize
|
||||
decoder.zero_grad()
|
||||
encoder.zero_grad()
|
||||
features = encoder(images)
|
||||
outputs = decoder(features, captions, lengths)
|
||||
loss = criterion(outputs, targets)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# Print log info
|
||||
if i % args.log_step == 0:
|
||||
print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
|
||||
%(epoch, args.num_epochs, i, total_step,
|
||||
loss.data[0], np.exp(loss.data[0])))
|
||||
|
||||
# Save the models
|
||||
if (i+1) % args.save_step == 0:
|
||||
torch.save(decoder.state_dict(),
|
||||
os.path.join(args.model_path,
|
||||
'decoder-%d-%d.pkl' %(epoch+1, i+1)))
|
||||
torch.save(encoder.state_dict(),
|
||||
os.path.join(args.model_path,
|
||||
'encoder-%d-%d.pkl' %(epoch+1, i+1)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model_path', type=str, default='./models/' ,
|
||||
help='path for saving trained models')
|
||||
parser.add_argument('--crop_size', type=int, default=224 ,
|
||||
help='size for randomly cropping images')
|
||||
parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl',
|
||||
help='path for vocabulary wrapper')
|
||||
parser.add_argument('--image_dir', type=str, default='./data/resized2014' ,
|
||||
help='directory for resized images')
|
||||
parser.add_argument('--caption_path', type=str,
|
||||
default='./data/annotations/captions_train2014.json',
|
||||
help='path for train annotation json file')
|
||||
parser.add_argument('--log_step', type=int , default=10,
|
||||
help='step size for prining log info')
|
||||
parser.add_argument('--save_step', type=int , default=1000,
|
||||
help='step size for saving trained models')
|
||||
|
||||
# Model parameters
|
||||
parser.add_argument('--embed_size', type=int , default=256 ,
|
||||
help='dimension of word embedding vectors')
|
||||
parser.add_argument('--hidden_size', type=int , default=512 ,
|
||||
help='dimension of lstm hidden states')
|
||||
parser.add_argument('--num_layers', type=int , default=1 ,
|
||||
help='number of layers in lstm')
|
||||
|
||||
parser.add_argument('--num_epochs', type=int, default=5)
|
||||
parser.add_argument('--batch_size', type=int, default=128)
|
||||
parser.add_argument('--num_workers', type=int, default=2)
|
||||
parser.add_argument('--learning_rate', type=float, default=0.001)
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
main(args)
|
@ -1,33 +0,0 @@
|
||||
# Neural Style Transfer
|
||||
|
||||
[Neural style transfer](https://arxiv.org/abs/1508.06576) is an algorithm that combines the content of one image with the style of another image using CNN. Given a content image and a style image, the goal is to generate a target image that minimizes the content difference with the content image and the style difference with the style image.
|
||||
|
||||
<p align="center"><img width="100%" src="png/neural_style2.png" /></p>
|
||||
|
||||
|
||||
#### Content loss
|
||||
|
||||
To minimize the content difference, we forward propagate the content image and the target image to pretrained [VGGNet](https://arxiv.org/abs/1409.1556) respectively, and extract feature maps from multiple convolutional layers. Then, the target image is updated to minimize the [mean-squared error](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/neural_style_transfer/main.py#L92-L93) between the feature maps of the content image and its feature maps.
|
||||
|
||||
#### Style loss
|
||||
|
||||
As in computing the content loss, we forward propagate the style image and the target image to the VGGNet and extract convolutional feature maps. To generate a texture that matches the style of the style image, we update the target image by minimizing the mean-squared error between the Gram matrix of the style image and the Gram matrix of the target image (feature correlation minimization). See [here](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/neural_style_transfer/main.py#L95-L105) for how to compute the style loss.
|
||||
|
||||
|
||||
|
||||
|
||||
<br>
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
$ pip install -r requirements.txt
|
||||
$ python main.py --content='png/content.png' --style='png/style.png'
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
## Results
|
||||
The following is the result of applying variaous styles of artwork to Anne Hathaway's photograph.
|
||||
|
||||

|
@ -1,137 +0,0 @@
|
||||
from __future__ import division
|
||||
from torch.backends import cudnn
|
||||
from torch.autograd import Variable
|
||||
from torchvision import models
|
||||
from torchvision import transforms
|
||||
from PIL import Image
|
||||
import argparse
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
|
||||
use_cuda = torch.cuda.is_available()
|
||||
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
|
||||
|
||||
# Load image file and convert it into variable
|
||||
# unsqueeze for make the 4D tensor to perform conv arithmetic
|
||||
def load_image(image_path, transform=None, max_size=None, shape=None):
|
||||
image = Image.open(image_path)
|
||||
|
||||
if max_size is not None:
|
||||
scale = max_size / max(image.size)
|
||||
size = np.array(image.size) * scale
|
||||
image = image.resize(size.astype(int), Image.ANTIALIAS)
|
||||
|
||||
if shape is not None:
|
||||
image = image.resize(shape, Image.LANCZOS)
|
||||
|
||||
if transform is not None:
|
||||
image = transform(image).unsqueeze(0)
|
||||
|
||||
return image.type(dtype)
|
||||
|
||||
# Pretrained VGGNet
|
||||
class VGGNet(nn.Module):
|
||||
def __init__(self):
|
||||
"""Select conv1_1 ~ conv5_1 activation maps."""
|
||||
super(VGGNet, self).__init__()
|
||||
self.select = ['0', '5', '10', '19', '28']
|
||||
self.vgg = models.vgg19(pretrained=True).features
|
||||
|
||||
def forward(self, x):
|
||||
"""Extract 5 conv activation maps from an input image.
|
||||
|
||||
Args:
|
||||
x: 4D tensor of shape (1, 3, height, width).
|
||||
|
||||
Returns:
|
||||
features: a list containing 5 conv activation maps.
|
||||
"""
|
||||
features = []
|
||||
for name, layer in self.vgg._modules.items():
|
||||
x = layer(x)
|
||||
if name in self.select:
|
||||
features.append(x)
|
||||
return features
|
||||
|
||||
|
||||
def main(config):
|
||||
|
||||
# Image preprocessing
|
||||
# For normalization, see https://github.com/pytorch/vision#models
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406),
|
||||
(0.229, 0.224, 0.225))])
|
||||
|
||||
# Load content and style images
|
||||
# make content.size() == style.size()
|
||||
content = load_image(config.content, transform, max_size=config.max_size)
|
||||
style = load_image(config.style, transform, shape=[content.size(2), content.size(3)])
|
||||
|
||||
# Initialization and optimizer
|
||||
target = Variable(content.clone(), requires_grad=True)
|
||||
optimizer = torch.optim.Adam([target], lr=config.lr, betas=[0.5, 0.999])
|
||||
|
||||
vgg = VGGNet()
|
||||
if use_cuda:
|
||||
vgg.cuda()
|
||||
|
||||
for step in range(config.total_step):
|
||||
|
||||
# Extract multiple(5) conv feature vectors
|
||||
target_features = vgg(target)
|
||||
content_features = vgg(Variable(content))
|
||||
style_features = vgg(Variable(style))
|
||||
|
||||
style_loss = 0
|
||||
content_loss = 0
|
||||
for f1, f2, f3 in zip(target_features, content_features, style_features):
|
||||
# Compute content loss (target and content image)
|
||||
content_loss += torch.mean((f1 - f2)**2)
|
||||
|
||||
# Reshape conv features
|
||||
_, c, h, w = f1.size()
|
||||
f1 = f1.view(c, h * w)
|
||||
f3 = f3.view(c, h * w)
|
||||
|
||||
# Compute gram matrix
|
||||
f1 = torch.mm(f1, f1.t())
|
||||
f3 = torch.mm(f3, f3.t())
|
||||
|
||||
# Compute style loss (target and style image)
|
||||
style_loss += torch.mean((f1 - f3)**2) / (c * h * w)
|
||||
|
||||
# Compute total loss, backprop and optimize
|
||||
loss = content_loss + config.style_weight * style_loss
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (step+1) % config.log_step == 0:
|
||||
print ('Step [%d/%d], Content Loss: %.4f, Style Loss: %.4f'
|
||||
%(step+1, config.total_step, content_loss.data[0], style_loss.data[0]))
|
||||
|
||||
if (step+1) % config.sample_step == 0:
|
||||
# Save the generated image
|
||||
denorm = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44))
|
||||
img = target.clone().cpu().squeeze()
|
||||
img = denorm(img.data).clamp_(0, 1)
|
||||
torchvision.utils.save_image(img, 'output-%d.png' %(step+1))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--content', type=str, default='./png/content.png')
|
||||
parser.add_argument('--style', type=str, default='./png/style.png')
|
||||
parser.add_argument('--max_size', type=int, default=400)
|
||||
parser.add_argument('--total_step', type=int, default=5000)
|
||||
parser.add_argument('--log_step', type=int, default=10)
|
||||
parser.add_argument('--sample_step', type=int, default=1000)
|
||||
parser.add_argument('--style_weight', type=float, default=100)
|
||||
parser.add_argument('--lr', type=float, default=0.003)
|
||||
config = parser.parse_args()
|
||||
print(config)
|
||||
main(config)
|
Before Width: | Height: | Size: 599 KiB |
Before Width: | Height: | Size: 1.3 MiB |
Before Width: | Height: | Size: 493 KiB |
Before Width: | Height: | Size: 682 KiB |
Before Width: | Height: | Size: 947 KiB |
Before Width: | Height: | Size: 1.2 MiB |
Before Width: | Height: | Size: 1.9 MiB |
@ -1,4 +0,0 @@
|
||||
argparse
|
||||
torch
|
||||
torchvision
|
||||
Pillow
|
@ -1,24 +0,0 @@
|
||||
# Variational Auto-Encoder
|
||||
[Variational Auto-Encoder(VAE)](https://arxiv.org/abs/1312.6114) is one of the generative model. From a neural network perspective, the only difference between the VAE and the Auto-Encoder(AE) is that the latent vector z in VAE is stochastically sampled. This solves the problem that the AE learns identity mapping and can not have meaningful representations in latent space. In fact, the VAE uses [reparameterization trick](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/variational_auto_encoder/main.py#L40-L44) to enable back propagation without sampling z directly from the mean and variance.
|
||||
|
||||
#### VAE loss
|
||||
As in conventional auto-encoders, the VAE minimizes the reconstruction loss between the input image and the generated image. In addition, the VAE approximates z to the standard normal distribution so that the decoder in the VAE can be used for sampling in the test phase.
|
||||
|
||||
<p align="center"><img width="100%" src="png/vae.png" /></p>
|
||||
|
||||
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
$ pip install -r requirements.txt
|
||||
$ python main.py
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
## Results
|
||||
Real image | Reconstruced image
|
||||
:-------------------------:|:-------------------------:
|
||||
 | 
|
@ -1,98 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
from torchvision import datasets
|
||||
from torchvision import transforms
|
||||
import torchvision
|
||||
|
||||
# MNIST dataset
|
||||
dataset = datasets.MNIST(root='./data',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
# Data loader
|
||||
data_loader = torch.utils.data.DataLoader(dataset=dataset,
|
||||
batch_size=100,
|
||||
shuffle=True)
|
||||
|
||||
def to_var(x):
|
||||
if torch.cuda.is_available():
|
||||
x = x.cuda()
|
||||
return Variable(x)
|
||||
|
||||
# VAE model
|
||||
class VAE(nn.Module):
|
||||
def __init__(self, image_size=784, h_dim=400, z_dim=20):
|
||||
super(VAE, self).__init__()
|
||||
self.encoder = nn.Sequential(
|
||||
nn.Linear(image_size, h_dim),
|
||||
nn.LeakyReLU(0.2),
|
||||
nn.Linear(h_dim, z_dim*2)) # 2 for mean and variance.
|
||||
|
||||
self.decoder = nn.Sequential(
|
||||
nn.Linear(z_dim, h_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(h_dim, image_size),
|
||||
nn.Sigmoid())
|
||||
|
||||
def reparameterize(self, mu, log_var):
|
||||
""""z = mean + eps * sigma where eps is sampled from N(0, 1)."""
|
||||
eps = to_var(torch.randn(mu.size(0), mu.size(1)))
|
||||
z = mu + eps * torch.exp(log_var/2) # 2 for convert var to std
|
||||
return z
|
||||
|
||||
def forward(self, x):
|
||||
h = self.encoder(x)
|
||||
mu, log_var = torch.chunk(h, 2, dim=1) # mean and log variance.
|
||||
z = self.reparameterize(mu, log_var)
|
||||
out = self.decoder(z)
|
||||
return out, mu, log_var
|
||||
|
||||
def sample(self, z):
|
||||
return self.decoder(z)
|
||||
|
||||
vae = VAE()
|
||||
|
||||
if torch.cuda.is_available():
|
||||
vae.cuda()
|
||||
|
||||
optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)
|
||||
iter_per_epoch = len(data_loader)
|
||||
data_iter = iter(data_loader)
|
||||
|
||||
# fixed inputs for debugging
|
||||
fixed_z = to_var(torch.randn(100, 20))
|
||||
fixed_x, _ = next(data_iter)
|
||||
torchvision.utils.save_image(fixed_x.cpu(), './data/real_images.png')
|
||||
fixed_x = to_var(fixed_x.view(fixed_x.size(0), -1))
|
||||
|
||||
for epoch in range(50):
|
||||
for i, (images, _) in enumerate(data_loader):
|
||||
|
||||
images = to_var(images.view(images.size(0), -1))
|
||||
out, mu, log_var = vae(images)
|
||||
|
||||
# Compute reconstruction loss and kl divergence
|
||||
# For kl_divergence, see Appendix B in the paper or http://yunjey47.tistory.com/43
|
||||
reconst_loss = F.binary_cross_entropy(out, images, size_average=False)
|
||||
kl_divergence = torch.sum(0.5 * (mu**2 + torch.exp(log_var) - log_var -1))
|
||||
|
||||
# Backprop + Optimize
|
||||
total_loss = reconst_loss + kl_divergence
|
||||
optimizer.zero_grad()
|
||||
total_loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if i % 100 == 0:
|
||||
print ("Epoch[%d/%d], Step [%d/%d], Total Loss: %.4f, "
|
||||
"Reconst Loss: %.4f, KL Div: %.7f"
|
||||
%(epoch+1, 50, i+1, iter_per_epoch, total_loss.data[0],
|
||||
reconst_loss.data[0], kl_divergence.data[0]))
|
||||
|
||||
# Save the reconstructed images
|
||||
reconst_images, _, _ = vae(fixed_x)
|
||||
reconst_images = reconst_images.view(reconst_images.size(0), 1, 28, 28)
|
||||
torchvision.utils.save_image(reconst_images.data.cpu(),
|
||||
'./data/reconst_images_%d.png' %(epoch+1))
|
Before Width: | Height: | Size: 61 KiB |
Before Width: | Height: | Size: 71 KiB |
Before Width: | Height: | Size: 189 KiB |
@ -1,2 +0,0 @@
|
||||
torch
|
||||
torchvision
|
@ -1,25 +0,0 @@
|
||||
# TensorBoard in PyTorch
|
||||
|
||||
In this tutorial, we implement the MNIST classifier using a simple neural network and visualize the training process using [TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard). In training phase, we plot the loss and accuracy functions through `scalar_summary` and visualize the training images through `image_summary`. In addition, we visualize the weight and gradient values of the parameters of the neural network using `histogram_summary`. PyTorch code for handling with these summary functions can be found [here](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04-utils/tensorboard/main.py#L83-L105).
|
||||
|
||||

|
||||
|
||||
<br>
|
||||
|
||||
## Usage
|
||||
|
||||
#### 1. Install the dependencies
|
||||
```bash
|
||||
$ pip install -r requirements.txt
|
||||
```
|
||||
|
||||
#### 2. Train the model
|
||||
```bash
|
||||
$ python main.py
|
||||
```
|
||||
|
||||
#### 3. Open the TensorBoard
|
||||
To run the TensorBoard, open a new terminal and run the command below. Then, open http://localhost:6006/ in your web browser.
|
||||
```bash
|
||||
$ tensorboard --logdir='./logs' --port=6006
|
||||
```
|
@ -1 +0,0 @@
|
||||
|
Before Width: | Height: | Size: 561 KiB |
@ -1,71 +0,0 @@
|
||||
# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import scipy.misc
|
||||
try:
|
||||
from StringIO import StringIO # Python 2.7
|
||||
except ImportError:
|
||||
from io import BytesIO # Python 3.x
|
||||
|
||||
|
||||
class Logger(object):
|
||||
|
||||
def __init__(self, log_dir):
|
||||
"""Create a summary writer logging to log_dir."""
|
||||
self.writer = tf.summary.FileWriter(log_dir)
|
||||
|
||||
def scalar_summary(self, tag, value, step):
|
||||
"""Log a scalar variable."""
|
||||
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
|
||||
self.writer.add_summary(summary, step)
|
||||
|
||||
def image_summary(self, tag, images, step):
|
||||
"""Log a list of images."""
|
||||
|
||||
img_summaries = []
|
||||
for i, img in enumerate(images):
|
||||
# Write the image to a string
|
||||
try:
|
||||
s = StringIO()
|
||||
except:
|
||||
s = BytesIO()
|
||||
scipy.misc.toimage(img).save(s, format="png")
|
||||
|
||||
# Create an Image object
|
||||
img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
|
||||
height=img.shape[0],
|
||||
width=img.shape[1])
|
||||
# Create a Summary value
|
||||
img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
|
||||
|
||||
# Create and write Summary
|
||||
summary = tf.Summary(value=img_summaries)
|
||||
self.writer.add_summary(summary, step)
|
||||
|
||||
def histo_summary(self, tag, values, step, bins=1000):
|
||||
"""Log a histogram of the tensor of values."""
|
||||
|
||||
# Create a histogram using numpy
|
||||
counts, bin_edges = np.histogram(values, bins=bins)
|
||||
|
||||
# Fill the fields of the histogram proto
|
||||
hist = tf.HistogramProto()
|
||||
hist.min = float(np.min(values))
|
||||
hist.max = float(np.max(values))
|
||||
hist.num = int(np.prod(values.shape))
|
||||
hist.sum = float(np.sum(values))
|
||||
hist.sum_squares = float(np.sum(values**2))
|
||||
|
||||
# Drop the start of the first bin
|
||||
bin_edges = bin_edges[1:]
|
||||
|
||||
# Add bin edges and counts
|
||||
for edge in bin_edges:
|
||||
hist.bucket_limit.append(edge)
|
||||
for c in counts:
|
||||
hist.bucket.append(c)
|
||||
|
||||
# Create and write Summary
|
||||
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
|
||||
self.writer.add_summary(summary, step)
|
||||
self.writer.flush()
|
@ -1,105 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.datasets as dsets
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
from logger import Logger
|
||||
|
||||
|
||||
# MNIST Dataset
|
||||
dataset = dsets.MNIST(root='./data',
|
||||
train=True,
|
||||
transform=transforms.ToTensor(),
|
||||
download=True)
|
||||
|
||||
# Data Loader (Input Pipeline)
|
||||
data_loader = torch.utils.data.DataLoader(dataset=dataset,
|
||||
batch_size=100,
|
||||
shuffle=True)
|
||||
|
||||
def to_np(x):
|
||||
return x.data.cpu().numpy()
|
||||
|
||||
def to_var(x):
|
||||
if torch.cuda.is_available():
|
||||
x = x.cuda()
|
||||
return Variable(x)
|
||||
|
||||
# Neural Network Model (1 hidden layer)
|
||||
class Net(nn.Module):
|
||||
def __init__(self, input_size=784, hidden_size=500, num_classes=10):
|
||||
super(Net, self).__init__()
|
||||
self.fc1 = nn.Linear(input_size, hidden_size)
|
||||
self.relu = nn.ReLU()
|
||||
self.fc2 = nn.Linear(hidden_size, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc1(x)
|
||||
out = self.relu(out)
|
||||
out = self.fc2(out)
|
||||
return out
|
||||
|
||||
net = Net()
|
||||
if torch.cuda.is_available():
|
||||
net.cuda()
|
||||
|
||||
# Set the logger
|
||||
logger = Logger('./logs')
|
||||
|
||||
# Loss and Optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(net.parameters(), lr=0.00001)
|
||||
|
||||
data_iter = iter(data_loader)
|
||||
iter_per_epoch = len(data_loader)
|
||||
total_step = 50000
|
||||
|
||||
# Start training
|
||||
for step in range(total_step):
|
||||
|
||||
# Reset the data_iter
|
||||
if (step+1) % iter_per_epoch == 0:
|
||||
data_iter = iter(data_loader)
|
||||
|
||||
# Fetch the images and labels and convert them to variables
|
||||
images, labels = next(data_iter)
|
||||
images, labels = to_var(images.view(images.size(0), -1)), to_var(labels)
|
||||
|
||||
# Forward, backward and optimize
|
||||
optimizer.zero_grad() # zero the gradient buffer
|
||||
outputs = net(images)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# Compute accuracy
|
||||
_, argmax = torch.max(outputs, 1)
|
||||
accuracy = (labels == argmax.squeeze()).float().mean()
|
||||
|
||||
if (step+1) % 100 == 0:
|
||||
print ('Step [%d/%d], Loss: %.4f, Acc: %.2f'
|
||||
%(step+1, total_step, loss.data[0], accuracy.data[0]))
|
||||
|
||||
#============ TensorBoard logging ============#
|
||||
# (1) Log the scalar values
|
||||
info = {
|
||||
'loss': loss.data[0],
|
||||
'accuracy': accuracy.data[0]
|
||||
}
|
||||
|
||||
for tag, value in info.items():
|
||||
logger.scalar_summary(tag, value, step+1)
|
||||
|
||||
# (2) Log values and gradients of the parameters (histogram)
|
||||
for tag, value in net.named_parameters():
|
||||
tag = tag.replace('.', '/')
|
||||
logger.histo_summary(tag, to_np(value), step+1)
|
||||
logger.histo_summary(tag+'/grad', to_np(value.grad), step+1)
|
||||
|
||||
# (3) Log the images
|
||||
info = {
|
||||
'images': to_np(images.view(-1, 28, 28)[:10])
|
||||
}
|
||||
|
||||
for tag, images in info.items():
|
||||
logger.image_summary(tag, images, step+1)
|
@ -1,5 +0,0 @@
|
||||
tensorflow
|
||||
torch
|
||||
torchvision
|
||||
scipy
|
||||
numpy
|