diff --git a/README.md b/README.md
index 380d190..6e85097 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,6 @@
--------------------------------------------------------------------------------
-
-
-
-
This repository provides tutorial code for deep learning researchers to learn [PyTorch](https://github.com/pytorch/pytorch). In the tutorial, most of the models were implemented with less than 30 lines of code. Before starting this tutorial, it is recommended to finish [Official Pytorch Tutorial](http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html).
@@ -13,21 +9,29 @@ This repository provides tutorial code for deep learning researchers to learn [P
## Table of Contents
-* [PyTorch Basics](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/00%20-%20PyTorch%20Basics/main.py)
-* [Linear Regression](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01%20-%20Linear%20Regression/main.py#L24-L31)
-* [Logistic Regression](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02%20-%20Logistic%20Regression/main.py#L35-L42)
-* [Feedforward Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03%20-%20Feedforward%20Neural%20Network/main.py#L36-L47)
-* [Convolutional Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04%20-%20Convolutional%20Neural%20Network/main.py#L33-L53)
-* [Deep Residual Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/05%20-%20Deep%20Residual%20Network/main.py#L67-L103)
-* [Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/06%20-%20Recurrent%20Neural%20Network/main.py#L38-L56)
-* [Bidirectional Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/07%20-%20Bidirectional%20Recurrent%20Neural%20Network/main.py#L38-L57)
-* [Language Model (RNNLM)](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/08%20-%20Language%20Model/main.py#L28-L53)
-* [Image Captioning (CNN-RNN)](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/09%20-%20Image%20Captioning)
-* [Generative Adversarial Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/10%20-%20Generative%20Adversarial%20Network/main.py#L25-L51)
-* [Deep Convolutional GAN](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/11%20-%20Deep%20Convolutional%20Generative%20Adversarial%20Network/main.py#L32-L50)
-* Variational Auto-Encoder (will be updated soon)
-* Neural Style Transfer (will be updated soon)
-* [Deep Q-Network and Q-learning (WIP)](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/12%20-%20Deep%20Q%20Network/dqn13.py)
+#### 1. Basics
+* [PyTorch Basics](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/pytorch_basics/main.py)
+* [Linear Regression](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/linear_regression/main.py#L24-L31)
+* [Logistic Regression](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/logistic_regression/main.py#L35-L42)
+* [Feedforward Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/feedforward_neural_network/main.py#L36-L47)
+
+#### 2. Intermediate
+* [Convolutional Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/convolutional_neural_network/main.py#L33-L53)
+* [Deep Residual Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/deep_residual_network/main.py#L67-L103)
+* [Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/recurrent_neural_network/main.py#L38-L56)
+* [Bidirectional Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py#L38-L57)
+* [Language Model (RNN-LM)](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/language_model/main.py#L28-L53)
+* [Generative Adversarial Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/generative_adversarial_network/main.py#L34-L50)
+
+#### 3. Advanced
+* [Image Captioning (CNN-RNN)](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/image_captioning)
+* [Deep Convolutional GAN (DCGAN)](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/deep_convolutional_gan)
+* [Variational Auto-Encoder](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/variational_auto_encoder)
+* [Neural Style Transfer](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/neural_style_transfer)
+
+#### 4. Utilities
+* [TensorBoard in PyTorch](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/04-utils/tensorboard)
+
@@ -43,10 +47,13 @@ $ python main-gpu.py # gpu version
## Dependencies
-* [pytorch](http://pytorch.org)
-* [pytorch-vision](http://pytorch.org/)
+* [Python 2.7 or 3.5](https://www.continuum.io/downloads)
+* [PyTorch 0.1.12](http://pytorch.org/)
+
+## Author
+Yunjey Choi/ [@yunjey](https://github.com/yunjey)
\ No newline at end of file
diff --git a/tutorials/00 - PyTorch Basics/basics.ipynb b/tutorials/00 - PyTorch Basics/basics.ipynb
deleted file mode 100644
index 4024f11..0000000
--- a/tutorials/00 - PyTorch Basics/basics.ipynb
+++ /dev/null
@@ -1,397 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import torch \n",
- "import torchvision\n",
- "import torch.nn as nn\n",
- "import torch.utils.data as data\n",
- "import numpy as np\n",
- "import torchvision.transforms as transforms\n",
- "import torchvision.datasets as dsets\n",
- "from torch.autograd import Variable"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Simple Example"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "-1.2532 -1.1120 0.9717\n",
- "-2.3617 0.1516 1.1280\n",
- "-2.1599 0.0828 -1.4305\n",
- " 0.5265 0.5020 -2.1852\n",
- "-0.9197 0.1772 -1.1378\n",
- "[torch.FloatTensor of size 5x3]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# random normal\n",
- "x = torch.randn(5, 3)\n",
- "print (x)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# build a layer\n",
- "linear = nn.Linear(3, 2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Parameter containing:\n",
- " 0.3884 -0.3335 -0.5146\n",
- "-0.3692 0.1977 -0.4081\n",
- "[torch.FloatTensor of size 2x3]\n",
- "\n",
- "Parameter containing:\n",
- "-0.4826\n",
- "-0.0038\n",
- "[torch.FloatTensor of size 2]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# Sess weight and bias\n",
- "print (linear.weight)\n",
- "print (linear.bias)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Variable containing:\n",
- "-1.0986 -0.1575\n",
- "-2.0311 0.4378\n",
- "-0.6131 1.3938\n",
- " 0.6790 0.7929\n",
- "-0.3134 0.8351\n",
- "[torch.FloatTensor of size 5x2]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# forward propagate\n",
- "y = linear(Variable(x))\n",
- "print (y)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Convert numpy array to torch tensor"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# convert numpy array to tensor\n",
- "a = np.array([[1,2], [3,4]])\n",
- "b = torch.from_numpy(a)\n",
- "print (b)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Input pipeline"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### (1) Preprocessing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# Image Preprocessing \n",
- "transform = transforms.Compose([\n",
- " transforms.Scale(40),\n",
- " transforms.RandomHorizontalFlip(),\n",
- " transforms.RandomCrop(32),\n",
- " transforms.ToTensor()])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### (2) Define Dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Files already downloaded and verified\n",
- "torch.Size([3, 32, 32])\n",
- "6\n"
- ]
- }
- ],
- "source": [
- "# download and loading dataset f\n",
- "train_dataset = dsets.CIFAR10(root='./data/',\n",
- " train=True, \n",
- " transform=transform,\n",
- " download=True)\n",
- "\n",
- "image, label = train_dataset[0]\n",
- "print (image.size())\n",
- "print (label)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### (3) Data Loader"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# data loader provides queue and thread in a very simple way\n",
- "train_loader = data.DataLoader(dataset=train_dataset,\n",
- " batch_size=100, \n",
- " shuffle=True,\n",
- " num_workers=2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# iteration start then queue and thread start\n",
- "data_iter = iter(train_loader)\n",
- "\n",
- "# mini-batch images and labels\n",
- "images, labels = data_iter.next()\n",
- "\n",
- "for images, labels in train_loader:\n",
- " # your training code will be written here\n",
- " pass"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### (4) What about custom dataset not cifar10?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "class CustomDataset(data.Dataset):\n",
- " def __init__(self):\n",
- " pass\n",
- " def __getitem__(self, index):\n",
- " # You should build this function to return one data for given index\n",
- " pass\n",
- " def __len__(self):\n",
- " pass"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "ename": "TypeError",
- "evalue": "'NoneType' object cannot be interpreted as an integer",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m num_workers=2)\n\u001b[0m",
- "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, dataset, batch_size, shuffle, sampler, num_workers, collate_fn, pin_memory)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msampler\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomSampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSequentialSampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/torch/utils/data/sampler.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data_source)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_source\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_source\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object cannot be interpreted as an integer"
- ]
- }
- ],
- "source": [
- "custom_dataset = CustomDataset()\n",
- "data.DataLoader(dataset=custom_dataset,\n",
- " batch_size=100, \n",
- " shuffle=True,\n",
- " num_workers=2)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Using Pretrained Model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Downloading: \"https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth\" to /home/yunjey/.torch/models/resnet18-5c106cde.pth\n",
- "100%|██████████| 46827520/46827520 [07:48<00:00, 99907.53it/s] \n"
- ]
- }
- ],
- "source": [
- "# Download and load pretrained model\n",
- "resnet = torchvision.models.resnet18(pretrained=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# delete top layer for finetuning\n",
- "sub_model = nn.Sequentialtial(*list(resnet.children()[:-1]))\n",
- "\n",
- "# for test\n",
- "images = Variable(torch.randn(10, 3, 256, 256))\n",
- "print (resnet(images).size())\n",
- "print (sub_model(images).size())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Save and Load Model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# Save and load the trained model\n",
- "torch.save(sub_model, 'model.pkl')\n",
- "\n",
- "model = torch.load('model.pkl')"
- ]
- }
- ],
- "metadata": {
- "anaconda-cloud": {},
- "kernelspec": {
- "display_name": "Python [conda root]",
- "language": "python",
- "name": "conda-root-py"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/tutorials/03 - Feedforward Neural Network/main-gpu.py b/tutorials/01-basics/feedforward_neural_network/main-gpu.py
similarity index 100%
rename from tutorials/03 - Feedforward Neural Network/main-gpu.py
rename to tutorials/01-basics/feedforward_neural_network/main-gpu.py
diff --git a/tutorials/03 - Feedforward Neural Network/main.py b/tutorials/01-basics/feedforward_neural_network/main.py
similarity index 100%
rename from tutorials/03 - Feedforward Neural Network/main.py
rename to tutorials/01-basics/feedforward_neural_network/main.py
diff --git a/tutorials/01 - Linear Regression/main.py b/tutorials/01-basics/linear_regression/main.py
similarity index 100%
rename from tutorials/01 - Linear Regression/main.py
rename to tutorials/01-basics/linear_regression/main.py
diff --git a/tutorials/02 - Logistic Regression/main.py b/tutorials/01-basics/logistic_regression/main.py
similarity index 100%
rename from tutorials/02 - Logistic Regression/main.py
rename to tutorials/01-basics/logistic_regression/main.py
diff --git a/tutorials/00 - PyTorch Basics/main.py b/tutorials/01-basics/pytorch_basics/main.py
similarity index 100%
rename from tutorials/00 - PyTorch Basics/main.py
rename to tutorials/01-basics/pytorch_basics/main.py
diff --git a/tutorials/07 - Bidirectional Recurrent Neural Network/main-gpu.py b/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main-gpu.py
similarity index 100%
rename from tutorials/07 - Bidirectional Recurrent Neural Network/main-gpu.py
rename to tutorials/02-intermediate/bidirectional_recurrent_neural_network/main-gpu.py
diff --git a/tutorials/07 - Bidirectional Recurrent Neural Network/main.py b/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py
similarity index 100%
rename from tutorials/07 - Bidirectional Recurrent Neural Network/main.py
rename to tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py
diff --git a/tutorials/04 - Convolutional Neural Network/main-gpu.py b/tutorials/02-intermediate/convolutional_neural_network/main-gpu.py
similarity index 100%
rename from tutorials/04 - Convolutional Neural Network/main-gpu.py
rename to tutorials/02-intermediate/convolutional_neural_network/main-gpu.py
diff --git a/tutorials/04 - Convolutional Neural Network/main.py b/tutorials/02-intermediate/convolutional_neural_network/main.py
similarity index 100%
rename from tutorials/04 - Convolutional Neural Network/main.py
rename to tutorials/02-intermediate/convolutional_neural_network/main.py
diff --git a/tutorials/05 - Deep Residual Network/main-gpu.py b/tutorials/02-intermediate/deep_residual_network/main-gpu.py
similarity index 100%
rename from tutorials/05 - Deep Residual Network/main-gpu.py
rename to tutorials/02-intermediate/deep_residual_network/main-gpu.py
diff --git a/tutorials/05 - Deep Residual Network/main.py b/tutorials/02-intermediate/deep_residual_network/main.py
similarity index 100%
rename from tutorials/05 - Deep Residual Network/main.py
rename to tutorials/02-intermediate/deep_residual_network/main.py
diff --git a/tutorials/02-intermediate/generative_adversarial_network/main.py b/tutorials/02-intermediate/generative_adversarial_network/main.py
new file mode 100644
index 0000000..29b7f35
--- /dev/null
+++ b/tutorials/02-intermediate/generative_adversarial_network/main.py
@@ -0,0 +1,119 @@
+import torch
+import torchvision
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import datasets
+from torchvision import transforms
+from torchvision.utils import save_image
+from torch.autograd import Variable
+
+
+def to_var(x):
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x)
+
+def denorm(x):
+ out = (x + 1) / 2
+ return out.clamp(0, 1)
+
+# Image processing
+transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(mean=(0.5, 0.5, 0.5),
+ std=(0.5, 0.5, 0.5))])
+# MNIST dataset
+mnist = datasets.MNIST(root='./data/',
+ train=True,
+ transform=transform,
+ download=True)
+# Data loader
+data_loader = torch.utils.data.DataLoader(dataset=mnist,
+ batch_size=100,
+ shuffle=True)
+# Discriminator
+D = nn.Sequential(
+ nn.Linear(784, 256),
+ nn.LeakyReLU(0.2),
+ nn.Linear(256, 256),
+ nn.LeakyReLU(0.2),
+ nn.Linear(256, 1),
+ nn.Sigmoid())
+
+# Generator
+G = nn.Sequential(
+ nn.Linear(64, 256),
+ nn.LeakyReLU(0.2),
+ nn.Linear(256, 256),
+ nn.LeakyReLU(0.2),
+ nn.Linear(256, 784),
+ nn.Tanh())
+
+if torch.cuda.is_available():
+ D.cuda()
+ G.cuda()
+
+# Binary cross entropy loss and optimizer
+criterion = nn.BCELoss()
+d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0003)
+g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0003)
+
+# Start training
+for epoch in range(200):
+ for i, (images, _) in enumerate(data_loader):
+ # Build mini-batch dataset
+ batch_size = images.size(0)
+ images = to_var(images.view(batch_size, -1))
+ real_labels = to_var(torch.ones(batch_size))
+ fake_labels = to_var(torch.zeros(batch_size))
+
+ #============= Train the discriminator =============#
+ # Compute loss with real images
+ outputs = D(images)
+ d_loss_real = criterion(outputs, real_labels)
+ real_score = outputs
+
+ # Compute loss with fake images
+ z = to_var(torch.randn(batch_size, 64))
+ fake_images = G(z)
+ outputs = D(fake_images)
+ d_loss_fake = criterion(outputs, fake_labels)
+ fake_score = outputs
+
+ # Backprop + Optimize
+ d_loss = d_loss_real + d_loss_fake
+ D.zero_grad()
+ d_loss.backward()
+ d_optimizer.step()
+
+ #=============== Train the generator ===============#
+ # Compute loss with fake images
+ z = to_var(torch.randn(batch_size, 64))
+ fake_images = G(z)
+ outputs = D(fake_images)
+ g_loss = criterion(outputs, real_labels)
+
+ # Backprop + Optimize
+ D.zero_grad()
+ G.zero_grad()
+ g_loss.backward()
+ g_optimizer.step()
+
+ if (i+1) % 300 == 0:
+ print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, '
+ 'g_loss: %.4f, D(x): %.2f, D(G(z)): %.2f'
+ %(epoch, 200, i+1, 600, d_loss.data[0], g_loss.data[0],
+ real_score.data.mean(), fake_score.data.mean()))
+
+ # Save real images
+ if (epoch+1) == 1:
+ images = images.view(images.size(0), 1, 28, 28)
+ save_image(denorm(images.data), './data/real_images.png')
+
+ # Save sampled images
+ fake_images = fake_images.view(fake_images.size(0), 1, 28, 28)
+ save_image(denorm(fake_images.data), './data/fake_images-%d.png' %(epoch+1))
+
+# Save the trained parameters
+torch.save(G.state_dict(), './generator.pkl')
+torch.save(D.state_dict(), './discriminator.pkl')
\ No newline at end of file
diff --git a/tutorials/08 - Language Model/data/train.txt b/tutorials/02-intermediate/language_model/data/train.txt
similarity index 100%
rename from tutorials/08 - Language Model/data/train.txt
rename to tutorials/02-intermediate/language_model/data/train.txt
diff --git a/tutorials/08 - Language Model/data_utils.py b/tutorials/02-intermediate/language_model/data_utils.py
similarity index 100%
rename from tutorials/08 - Language Model/data_utils.py
rename to tutorials/02-intermediate/language_model/data_utils.py
diff --git a/tutorials/08 - Language Model/main-gpu.py b/tutorials/02-intermediate/language_model/main-gpu.py
similarity index 100%
rename from tutorials/08 - Language Model/main-gpu.py
rename to tutorials/02-intermediate/language_model/main-gpu.py
diff --git a/tutorials/08 - Language Model/main.py b/tutorials/02-intermediate/language_model/main.py
similarity index 100%
rename from tutorials/08 - Language Model/main.py
rename to tutorials/02-intermediate/language_model/main.py
diff --git a/tutorials/06 - Recurrent Neural Network/main-gpu.py b/tutorials/02-intermediate/recurrent_neural_network/main-gpu.py
similarity index 100%
rename from tutorials/06 - Recurrent Neural Network/main-gpu.py
rename to tutorials/02-intermediate/recurrent_neural_network/main-gpu.py
diff --git a/tutorials/06 - Recurrent Neural Network/main.py b/tutorials/02-intermediate/recurrent_neural_network/main.py
similarity index 100%
rename from tutorials/06 - Recurrent Neural Network/main.py
rename to tutorials/02-intermediate/recurrent_neural_network/main.py
diff --git a/tutorials/03-advanced/deep_convolutional_gan/README.md b/tutorials/03-advanced/deep_convolutional_gan/README.md
new file mode 100644
index 0000000..ce0e4cc
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/README.md
@@ -0,0 +1,41 @@
+## Deep Convolutional GAN
+[Generative Adversarial Network](https://arxiv.org/abs/1406.2661) is a generative model that contains a discriminator and a generator. The discriminator is a binary classifier that is trained to classify the real image as real and the fake image as fake. The discriminator is trained to assign 1 to the real image and 0 to the fake image.The generator is a generative model that creates an image from the latent code. The generator is trained to generate an image that can not be distinguishable from the real image in order to deceive the discriminator.
+
+In the [Deep Convolutional GAN(DCGAN)](https://arxiv.org/abs/1511.06434), the authors introduce architecture guidlines for stable GAN training. They replace any pooling layers with strided convolutions (for the discriminator) and fractional-strided convolutions (for the generator) and use batchnorm in both the discriminator and the generator. In addition, they use ReLU activation in the generator and LeakyReLU activation in the discriminator. However, in our case, we use LeakyReLU activation in both models to avoid sparse gradients.
+
+
+
+
+## Usage
+
+#### 1. Install dependencies
+```bash
+$ pip install -r requirements.txt
+```
+
+#### 2. Download the dataset
+```bash
+$ chmod +x download.sh
+$ ./download.sh
+```
+
+#### 3. Train the model
+```bash
+$ python main.py --mode='train'
+```
+
+#### 3. Sample the images
+```bash
+$ python main.py --mode='sample'
+```
+
+
+
+
+
+## Results
+
+The following is the result on the CelebA dataset.
+
+
+
diff --git a/tutorials/03-advanced/deep_convolutional_gan/data_loader.py b/tutorials/03-advanced/deep_convolutional_gan/data_loader.py
new file mode 100644
index 0000000..a472db7
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/data_loader.py
@@ -0,0 +1,43 @@
+import os
+from torch.utils import data
+from torchvision import transforms
+from PIL import Image
+
+
+class ImageFolder(data.Dataset):
+ """Custom Dataset compatible with prebuilt DataLoader.
+
+ This is just for tutorial. You can use the prebuilt torchvision.datasets.ImageFolder.
+ """
+ def __init__(self, root, transform=None):
+ """Initializes image paths and preprocessing module."""
+ self.image_paths = list(map(lambda x: os.path.join(root, x), os.listdir(root)))
+ self.transform = transform
+
+ def __getitem__(self, index):
+ """Reads an image from a file and preprocesses it and returns."""
+ image_path = self.image_paths[index]
+ image = Image.open(image_path).convert('RGB')
+ if self.transform is not None:
+ image = self.transform(image)
+ return image
+
+ def __len__(self):
+ """Returns the total number of image files."""
+ return len(self.image_paths)
+
+
+def get_loader(image_path, image_size, batch_size, num_workers=2):
+ """Builds and returns Dataloader."""
+
+ transform = transforms.Compose([
+ transforms.Scale(image_size),
+ transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+ dataset = ImageFolder(image_path, transform)
+ data_loader = data.DataLoader(dataset=dataset,
+ batch_size=batch_size,
+ shuffle=True,
+ num_workers=num_workers)
+ return data_loader
\ No newline at end of file
diff --git a/tutorials/03-advanced/deep_convolutional_gan/download.sh b/tutorials/03-advanced/deep_convolutional_gan/download.sh
new file mode 100755
index 0000000..d4b92bd
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/download.sh
@@ -0,0 +1,2 @@
+wget https://www.dropbox.com/s/e0ig4nf1v94hyj8/CelebA.zip?dl=0 -P ./
+unzip CelebA.zip -d ./
\ No newline at end of file
diff --git a/tutorials/03-advanced/deep_convolutional_gan/main.py b/tutorials/03-advanced/deep_convolutional_gan/main.py
new file mode 100644
index 0000000..b63ce1d
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/main.py
@@ -0,0 +1,58 @@
+import argparse
+import os
+from solver import Solver
+from data_loader import get_loader
+from torch.backends import cudnn
+
+
+def main(config):
+ cudnn.benchmark = True
+
+ data_loader = get_loader(image_path=config.image_path,
+ image_size=config.image_size,
+ batch_size=config.batch_size,
+ num_workers=config.num_workers)
+
+ solver = Solver(config, data_loader)
+
+ # Create directories if not exist
+ if not os.path.exists(config.model_path):
+ os.makedirs(config.model_path)
+ if not os.path.exists(config.sample_path):
+ os.makedirs(config.sample_path)
+
+ # Train and sample the images
+ if config.mode == 'train':
+ solver.train()
+ elif config.mode == 'sample':
+ solver.sample()
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+
+ # model hyper-parameters
+ parser.add_argument('--image_size', type=int, default=64)
+ parser.add_argument('--z_dim', type=int, default=100)
+ parser.add_argument('--g_conv_dim', type=int, default=64)
+ parser.add_argument('--d_conv_dim', type=int, default=64)
+
+ # training hyper-parameters
+ parser.add_argument('--num_epochs', type=int, default=20)
+ parser.add_argument('--batch_size', type=int, default=32)
+ parser.add_argument('--sample_size', type=int, default=100)
+ parser.add_argument('--num_workers', type=int, default=2)
+ parser.add_argument('--lr', type=float, default=0.0002)
+ parser.add_argument('--beta1', type=float, default=0.5) # momentum1 in Adam
+ parser.add_argument('--beta2', type=float, default=0.999) # momentum2 in Adam
+
+ # misc
+ parser.add_argument('--mode', type=str, default='train')
+ parser.add_argument('--model_path', type=str, default='./models')
+ parser.add_argument('--sample_path', type=str, default='./samples')
+ parser.add_argument('--image_path', type=str, default='./CelebA/128_crop')
+ parser.add_argument('--log_step', type=int , default=10)
+ parser.add_argument('--sample_step', type=int , default=500)
+
+ config = parser.parse_args()
+ print(config)
+ main(config)
\ No newline at end of file
diff --git a/tutorials/03-advanced/deep_convolutional_gan/model.py b/tutorials/03-advanced/deep_convolutional_gan/model.py
new file mode 100644
index 0000000..1fbc6a9
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/model.py
@@ -0,0 +1,59 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def deconv(c_in, c_out, k_size, stride=2, pad=1, bn=True):
+ """Custom deconvolutional layer for simplicity."""
+ layers = []
+ layers.append(nn.ConvTranspose2d(c_in, c_out, k_size, stride, pad))
+ if bn:
+ layers.append(nn.BatchNorm2d(c_out))
+ return nn.Sequential(*layers)
+
+
+class Generator(nn.Module):
+ """Generator containing 7 deconvolutional layers."""
+ def __init__(self, z_dim=256, image_size=128, conv_dim=64):
+ super(Generator, self).__init__()
+ self.fc = deconv(z_dim, conv_dim*8, int(image_size/16), 1, 0, bn=False)
+ self.deconv1 = deconv(conv_dim*8, conv_dim*4, 4)
+ self.deconv2 = deconv(conv_dim*4, conv_dim*2, 4)
+ self.deconv3 = deconv(conv_dim*2, conv_dim, 4)
+ self.deconv4 = deconv(conv_dim, 3, 4, bn=False)
+
+ def forward(self, z):
+ z = z.view(z.size(0), z.size(1), 1, 1) # If image_size is 64, output shape is as below.
+ out = self.fc(z) # (?, 512, 4, 4)
+ out = F.leaky_relu(self.deconv1(out), 0.05) # (?, 256, 8, 8)
+ out = F.leaky_relu(self.deconv2(out), 0.05) # (?, 128, 16, 16)
+ out = F.leaky_relu(self.deconv3(out), 0.05) # (?, 64, 32, 32)
+ out = F.tanh(self.deconv4(out)) # (?, 3, 64, 64)
+ return out
+
+
+def conv(c_in, c_out, k_size, stride=2, pad=1, bn=True):
+ """Custom convolutional layer for simplicity."""
+ layers = []
+ layers.append(nn.Conv2d(c_in, c_out, k_size, stride, pad))
+ if bn:
+ layers.append(nn.BatchNorm2d(c_out))
+ return nn.Sequential(*layers)
+
+
+class Discriminator(nn.Module):
+ """Discriminator containing 4 convolutional layers."""
+ def __init__(self, image_size=128, conv_dim=64):
+ super(Discriminator, self).__init__()
+ self.conv1 = conv(3, conv_dim, 4, bn=False)
+ self.conv2 = conv(conv_dim, conv_dim*2, 4)
+ self.conv3 = conv(conv_dim*2, conv_dim*4, 4)
+ self.conv4 = conv(conv_dim*4, conv_dim*8, 4)
+ self.fc = conv(conv_dim*8, 1, int(image_size/16), 1, 0, False)
+
+ def forward(self, x): # If image_size is 64, output shape is as below.
+ out = F.leaky_relu(self.conv1(x), 0.05) # (?, 64, 32, 32)
+ out = F.leaky_relu(self.conv2(out), 0.05) # (?, 128, 16, 16)
+ out = F.leaky_relu(self.conv3(out), 0.05) # (?, 256, 8, 8)
+ out = F.leaky_relu(self.conv4(out), 0.05) # (?, 512, 4, 4)
+ out = self.fc(out).squeeze()
+ return out
\ No newline at end of file
diff --git a/tutorials/03-advanced/deep_convolutional_gan/png/dcgan.png b/tutorials/03-advanced/deep_convolutional_gan/png/dcgan.png
new file mode 100644
index 0000000..db2a9b8
Binary files /dev/null and b/tutorials/03-advanced/deep_convolutional_gan/png/dcgan.png differ
diff --git a/tutorials/03-advanced/deep_convolutional_gan/png/sample1.png b/tutorials/03-advanced/deep_convolutional_gan/png/sample1.png
new file mode 100644
index 0000000..835e30a
Binary files /dev/null and b/tutorials/03-advanced/deep_convolutional_gan/png/sample1.png differ
diff --git a/tutorials/03-advanced/deep_convolutional_gan/png/sample2.png b/tutorials/03-advanced/deep_convolutional_gan/png/sample2.png
new file mode 100644
index 0000000..a086348
Binary files /dev/null and b/tutorials/03-advanced/deep_convolutional_gan/png/sample2.png differ
diff --git a/tutorials/03-advanced/deep_convolutional_gan/requirements.txt b/tutorials/03-advanced/deep_convolutional_gan/requirements.txt
new file mode 100644
index 0000000..30e4546
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/requirements.txt
@@ -0,0 +1,4 @@
+torch
+torchvision
+Pillow
+argparse
diff --git a/tutorials/03-advanced/deep_convolutional_gan/solver.py b/tutorials/03-advanced/deep_convolutional_gan/solver.py
new file mode 100644
index 0000000..fa7b1aa
--- /dev/null
+++ b/tutorials/03-advanced/deep_convolutional_gan/solver.py
@@ -0,0 +1,147 @@
+import torch
+import torchvision
+import os
+from torch import optim
+from torch.autograd import Variable
+from model import Discriminator
+from model import Generator
+
+
+class Solver(object):
+ def __init__(self, config, data_loader):
+ self.generator = None
+ self.discriminator = None
+ self.g_optimizer = None
+ self.d_optimizer = None
+ self.g_conv_dim = config.g_conv_dim
+ self.d_conv_dim = config.d_conv_dim
+ self.z_dim = config.z_dim
+ self.beta1 = config.beta1
+ self.beta2 = config.beta2
+ self.image_size = config.image_size
+ self.data_loader = data_loader
+ self.num_epochs = config.num_epochs
+ self.batch_size = config.batch_size
+ self.sample_size = config.sample_size
+ self.lr = config.lr
+ self.log_step = config.log_step
+ self.sample_step = config.sample_step
+ self.sample_path = config.sample_path
+ self.model_path = config.model_path
+ self.build_model()
+
+ def build_model(self):
+ """Build generator and discriminator."""
+ self.generator = Generator(z_dim=self.z_dim,
+ image_size=self.image_size,
+ conv_dim=self.g_conv_dim)
+ self.discriminator = Discriminator(image_size=self.image_size,
+ conv_dim=self.d_conv_dim)
+ self.g_optimizer = optim.Adam(self.generator.parameters(),
+ self.lr, [self.beta1, self.beta2])
+ self.d_optimizer = optim.Adam(self.discriminator.parameters(),
+ self.lr, [self.beta1, self.beta2])
+
+ if torch.cuda.is_available():
+ self.generator.cuda()
+ self.discriminator.cuda()
+
+ def to_variable(self, x):
+ """Convert tensor to variable."""
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x)
+
+ def to_data(self, x):
+ """Convert variable to tensor."""
+ if torch.cuda.is_available():
+ x = x.cpu()
+ return x.data
+
+ def reset_grad(self):
+ """Zero the gradient buffers."""
+ self.discriminator.zero_grad()
+ self.generator.zero_grad()
+
+ def denorm(self, x):
+ """Convert range (-1, 1) to (0, 1)"""
+ out = (x + 1) / 2
+ return out.clamp(0, 1)
+
+ def train(self):
+ """Train generator and discriminator."""
+ fixed_noise = self.to_variable(torch.randn(self.batch_size, self.z_dim))
+ total_step = len(self.data_loader)
+ for epoch in range(self.num_epochs):
+ for i, images in enumerate(self.data_loader):
+
+ #===================== Train D =====================#
+ images = self.to_variable(images)
+ batch_size = images.size(0)
+ noise = self.to_variable(torch.randn(batch_size, self.z_dim))
+
+ # Train D to recognize real images as real.
+ outputs = self.discriminator(images)
+ real_loss = torch.mean((outputs - 1) ** 2) # L2 loss instead of Binary cross entropy loss (this is optional for stable training)
+
+ # Train D to recognize fake images as fake.
+ fake_images = self.generator(noise)
+ outputs = self.discriminator(fake_images)
+ fake_loss = torch.mean(outputs ** 2)
+
+ # Backprop + optimize
+ d_loss = real_loss + fake_loss
+ self.reset_grad()
+ d_loss.backward()
+ self.d_optimizer.step()
+
+ #===================== Train G =====================#
+ noise = self.to_variable(torch.randn(batch_size, self.z_dim))
+
+ # Train G so that D recognizes G(z) as real.
+ fake_images = self.generator(noise)
+ outputs = self.discriminator(fake_images)
+ g_loss = torch.mean((outputs - 1) ** 2)
+
+ # Backprop + optimize
+ self.reset_grad()
+ g_loss.backward()
+ self.g_optimizer.step()
+
+ # print the log info
+ if (i+1) % self.log_step == 0:
+ print('Epoch [%d/%d], Step[%d/%d], d_real_loss: %.4f, '
+ 'd_fake_loss: %.4f, g_loss: %.4f'
+ %(epoch+1, self.num_epochs, i+1, total_step,
+ real_loss.data[0], fake_loss.data[0], g_loss.data[0]))
+
+ # save the sampled images
+ if (i+1) % self.sample_step == 0:
+ fake_images = self.generator(fixed_noise)
+ torchvision.utils.save_image(self.denorm(fake_images.data),
+ os.path.join(self.sample_path,
+ 'fake_samples-%d-%d.png' %(epoch+1, i+1)))
+
+ # save the model parameters for each epoch
+ g_path = os.path.join(self.model_path, 'generator-%d.pkl' %(epoch+1))
+ d_path = os.path.join(self.model_path, 'discriminator-%d.pkl' %(epoch+1))
+ torch.save(self.generator.state_dict(), g_path)
+ torch.save(self.discriminator.state_dict(), d_path)
+
+ def sample(self):
+
+ # Load trained parameters
+ g_path = os.path.join(self.model_path, 'generator-%d.pkl' %(self.num_epochs))
+ d_path = os.path.join(self.model_path, 'discriminator-%d.pkl' %(self.num_epochs))
+ self.generator.load_state_dict(torch.load(g_path))
+ self.discriminator.load_state_dict(torch.load(d_path))
+ self.generator.eval()
+ self.discriminator.eval()
+
+ # Sample the images
+ noise = self.to_variable(torch.randn(self.sample_size, self.z_dim))
+ fake_images = self.generator(noise)
+ sample_path = os.path.join(self.sample_path, 'fake_samples-final.png')
+ torchvision.utils.save_image(self.denorm(fake_images.data), sample_path, nrow=12)
+
+ print("Saved sampled images to '%s'" %sample_path)
diff --git a/tutorials/03-advanced/image_captioning/README.md b/tutorials/03-advanced/image_captioning/README.md
new file mode 100644
index 0000000..5346ac7
--- /dev/null
+++ b/tutorials/03-advanced/image_captioning/README.md
@@ -0,0 +1,54 @@
+# Image Captioning
+The goal of image captioning is to convert a given input image into a natural language description. The encoder-decoder framework is widely used for this task. The image encoder is a convolutional neural network (CNN). In this tutorial, we used [resnet-152](https://arxiv.org/abs/1512.03385) model pretrained on the [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) image classification dataset. The decoder is a long short-term memory (LSTM) network.
+
+
+
+#### Training phase
+For the encoder part, the pretrained CNN extracts the feature vector from a given input image. The feature vector is linearly transformed to have the same dimension as the input dimension of the LSTM network. For the decoder part, source and target texts are predefined. For example, if the image description is **"Giraffes standing next to each other"**, the source sequence is a list containing **['\', 'Giraffes', 'standing', 'next', 'to', 'each', 'other']** and the target sequence is a list containing **['Giraffes', 'standing', 'next', 'to', 'each', 'other', '\']**. Using these source and target sequences and the feature vector, the LSTM decoder is trained as a language model conditioned on the feature vector.
+
+#### Test phase
+In the test phase, the encoder part is almost same as the training phase. The only difference is that batchnorm layer uses moving average and variance instead of mini-batch statistics. This can be easily implemented using [encoder.eval()](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/image_captioning/sample.py#L41). For the decoder part, there is a significant difference between the training phase and the test phase. In the test phase, the LSTM decoder can't see the image description. To deal with this problem, the LSTM decoder feeds back the previosly generated word to the next input. This can be implemented using a [for-loop](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/image_captioning/model.py#L57-L68).
+
+
+
+## Usage
+
+
+#### 1. Clone the repositories
+```bash
+$ git clone https://github.com/pdollar/coco.git
+$ cd coco/PythonAPI/
+$ make
+$ python setup.py build
+$ python setup.py install
+$ cd ../../
+$ git clone https://github.com/yunjey/pytorch-tutorial.git
+$ cd pytorch-tutorial/tutorials/03-advanced/image_captioning/
+```
+
+#### 2. Download the dataset
+
+```bash
+$ pip install -r requirements.txt
+$ chmod +x download.sh
+$ ./download.sh
+```
+
+#### 3. Preprocessing
+
+```bash
+$ python build_vocab.py
+$ python resize.py
+```
+
+#### 4. Train the model
+
+```bash
+$ python train.py
+```
+
+#### 5. Test the model
+
+```bash
+$ python sample.py --image='png/example.png'
+```
\ No newline at end of file
diff --git a/tutorials/09 - Image Captioning/build_vocab.py b/tutorials/03-advanced/image_captioning/build_vocab.py
similarity index 100%
rename from tutorials/09 - Image Captioning/build_vocab.py
rename to tutorials/03-advanced/image_captioning/build_vocab.py
diff --git a/tutorials/09 - Image Captioning/data_loader.py b/tutorials/03-advanced/image_captioning/data_loader.py
similarity index 100%
rename from tutorials/09 - Image Captioning/data_loader.py
rename to tutorials/03-advanced/image_captioning/data_loader.py
diff --git a/tutorials/09 - Image Captioning/download.sh b/tutorials/03-advanced/image_captioning/download.sh
similarity index 100%
rename from tutorials/09 - Image Captioning/download.sh
rename to tutorials/03-advanced/image_captioning/download.sh
diff --git a/tutorials/09 - Image Captioning/model.py b/tutorials/03-advanced/image_captioning/model.py
similarity index 79%
rename from tutorials/09 - Image Captioning/model.py
rename to tutorials/03-advanced/image_captioning/model.py
index e79fa02..e2d4fa6 100644
--- a/tutorials/09 - Image Captioning/model.py
+++ b/tutorials/03-advanced/image_captioning/model.py
@@ -9,22 +9,24 @@ class EncoderCNN(nn.Module):
def __init__(self, embed_size):
"""Load the pretrained ResNet-152 and replace top fc layer."""
super(EncoderCNN, self).__init__()
- self.resnet = models.resnet152(pretrained=True)
- for param in self.resnet.parameters():
- param.requires_grad = False
- self.resnet.fc = nn.Linear(self.resnet.fc.in_features, embed_size)
+ resnet = models.resnet152(pretrained=True)
+ modules = list(resnet.children())[:-1] # delete the last fc layer.
+ self.resnet = nn.Sequential(*modules)
+ self.linear = nn.Linear(resnet.fc.in_features, embed_size)
self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)
self.init_weights()
def init_weights(self):
"""Initialize the weights."""
- self.resnet.fc.weight.data.normal_(0.0, 0.02)
- self.resnet.fc.bias.data.fill_(0)
+ self.linear.weight.data.normal_(0.0, 0.02)
+ self.linear.bias.data.fill_(0)
def forward(self, images):
"""Extract the image feature vectors."""
features = self.resnet(images)
- features = self.bn(features)
+ features = Variable(features.data)
+ features = features.view(features.size(0), -1)
+ features = self.bn(self.linear(features))
return features
@@ -52,12 +54,12 @@ class DecoderRNN(nn.Module):
outputs = self.linear(hiddens[0])
return outputs
- def sample(self, features, states):
+ def sample(self, features, states=None):
"""Samples captions for given image features (Greedy search)."""
sampled_ids = []
inputs = features.unsqueeze(1)
for i in range(20): # maximum sampling length
- hiddens, states = self.lstm(inputs, states) # (batch_size, 1, hidden_size)
+ hiddens, states = self.lstm(inputs, states) # (batch_size, 1, hidden_size),
outputs = self.linear(hiddens.squeeze(1)) # (batch_size, vocab_size)
predicted = outputs.max(1)[1]
sampled_ids.append(predicted)
diff --git a/tutorials/03-advanced/image_captioning/png/example.png b/tutorials/03-advanced/image_captioning/png/example.png
new file mode 100644
index 0000000..810228d
Binary files /dev/null and b/tutorials/03-advanced/image_captioning/png/example.png differ
diff --git a/tutorials/03-advanced/image_captioning/png/image_captioning.png b/tutorials/03-advanced/image_captioning/png/image_captioning.png
new file mode 100644
index 0000000..2aceadd
Binary files /dev/null and b/tutorials/03-advanced/image_captioning/png/image_captioning.png differ
diff --git a/tutorials/03-advanced/image_captioning/png/model.png b/tutorials/03-advanced/image_captioning/png/model.png
new file mode 100644
index 0000000..4fc7c7a
Binary files /dev/null and b/tutorials/03-advanced/image_captioning/png/model.png differ
diff --git a/tutorials/09 - Image Captioning/requirements.txt b/tutorials/03-advanced/image_captioning/requirements.txt
similarity index 100%
rename from tutorials/09 - Image Captioning/requirements.txt
rename to tutorials/03-advanced/image_captioning/requirements.txt
diff --git a/tutorials/09 - Image Captioning/resize.py b/tutorials/03-advanced/image_captioning/resize.py
similarity index 100%
rename from tutorials/09 - Image Captioning/resize.py
rename to tutorials/03-advanced/image_captioning/resize.py
diff --git a/tutorials/09 - Image Captioning/sample.py b/tutorials/03-advanced/image_captioning/sample.py
similarity index 78%
rename from tutorials/09 - Image Captioning/sample.py
rename to tutorials/03-advanced/image_captioning/sample.py
index be8ee71..acf6271 100644
--- a/tutorials/09 - Image Captioning/sample.py
+++ b/tutorials/03-advanced/image_captioning/sample.py
@@ -11,13 +11,26 @@ from model import EncoderCNN, DecoderRNN
from PIL import Image
+def to_var(x, volatile=False):
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x, volatile=volatile)
+
+def load_image(image_path, transform=None):
+ image = Image.open(image_path)
+ image = image.resize([224, 224], Image.LANCZOS)
+
+ if transform is not None:
+ image = transform(image).unsqueeze(0)
+
+ return image
+
def main(args):
# Image preprocessing
- transform = transforms.Compose([
- transforms.Scale(args.crop_size),
- transforms.CenterCrop(args.crop_size),
+ transform = transforms.Compose([
transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ transforms.Normalize((0.485, 0.456, 0.406),
+ (0.229, 0.224, 0.225))])
# Load vocabulary wrapper
with open(args.vocab_path, 'rb') as f:
@@ -35,23 +48,17 @@ def main(args):
decoder.load_state_dict(torch.load(args.decoder_path))
# Prepare Image
- image = Image.open(args.image)
- image_tensor = Variable(transform(image).unsqueeze(0))
-
- # Set initial states
- state = (Variable(torch.zeros(args.num_layers, 1, args.hidden_size)),
- Variable(torch.zeros(args.num_layers, 1, args.hidden_size)))
+ image = load_image(args.image, transform)
+ image_tensor = to_var(image, volatile=True)
# If use gpu
if torch.cuda.is_available():
encoder.cuda()
decoder.cuda()
- state = [s.cuda() for s in state]
- image_tensor = image_tensor.cuda()
# Generate caption from image
feature = encoder(image_tensor)
- sampled_ids = decoder.sample(feature, state)
+ sampled_ids = decoder.sample(feature)
sampled_ids = sampled_ids.cpu().data.numpy()
# Decode word_ids to words
@@ -77,8 +84,6 @@ if __name__ == '__main__':
help='path for trained decoder')
parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl',
help='path for vocabulary wrapper')
- parser.add_argument('--crop_size', type=int, default=224,
- help='size for center cropping images')
# Model parameters (should be same as paramters in train.py)
parser.add_argument('--embed_size', type=int , default=256,
diff --git a/tutorials/09 - Image Captioning/train.py b/tutorials/03-advanced/image_captioning/train.py
similarity index 89%
rename from tutorials/09 - Image Captioning/train.py
rename to tutorials/03-advanced/image_captioning/train.py
index e01f7b2..37c26ca 100644
--- a/tutorials/09 - Image Captioning/train.py
+++ b/tutorials/03-advanced/image_captioning/train.py
@@ -11,18 +11,24 @@ from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence
from torchvision import transforms
-
+def to_var(x, volatile=False):
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x, volatile=volatile)
+
def main(args):
# Create model directory
if not os.path.exists(args.model_path):
os.makedirs(args.model_path)
# Image preprocessing
+ # For normalization, see https://github.com/pytorch/vision#models
transform = transforms.Compose([
transforms.RandomCrop(args.crop_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ transforms.Normalize((0.485, 0.456, 0.406),
+ (0.229, 0.224, 0.225))])
# Load vocabulary wrapper.
with open(args.vocab_path, 'rb') as f:
@@ -44,7 +50,7 @@ def main(args):
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
- params = list(decoder.parameters()) + list(encoder.resnet.fc.parameters())
+ params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
optimizer = torch.optim.Adam(params, lr=args.learning_rate)
# Train the Models
@@ -53,11 +59,8 @@ def main(args):
for i, (images, captions, lengths) in enumerate(data_loader):
# Set mini-batch dataset
- images = Variable(images)
- captions = Variable(captions)
- if torch.cuda.is_available():
- images = images.cuda()
- captions = captions.cuda()
+ images = to_var(images, volatile=True)
+ captions = to_var(captions)
targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]
# Forward, Backward and Optimize
@@ -116,4 +119,4 @@ if __name__ == '__main__':
parser.add_argument('--learning_rate', type=float, default=0.001)
args = parser.parse_args()
print(args)
- main(args)
+ main(args)
\ No newline at end of file
diff --git a/tutorials/03-advanced/neural_style_transfer/README.md b/tutorials/03-advanced/neural_style_transfer/README.md
new file mode 100644
index 0000000..de18de2
--- /dev/null
+++ b/tutorials/03-advanced/neural_style_transfer/README.md
@@ -0,0 +1,33 @@
+# Neural Style Transfer
+
+[Neural style transfer](https://arxiv.org/abs/1508.06576) is an algorithm that combines the content of one image with the style of another image using CNN. Given a content image and a style image, the goal is to generate a target image that minimizes the content difference with the content image and the style difference with the style image.
+
+
+
+
+#### Content loss
+
+To minimize the content difference, we forward propagate the content image and the target image to pretrained [VGGNet](https://arxiv.org/abs/1409.1556) respectively, and extract feature maps from multiple convolutional layers. Then, the target image is updated to minimize the [mean-squared error](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/neural_style_transfer/main.py#L92-L93) between the feature maps of the content image and its feature maps.
+
+#### Style loss
+
+As in computing the content loss, we forward propagate the style image and the target image to the VGGNet and extract convolutional feature maps. To generate a texture that matches the style of the style image, we update the target image by minimizing the mean-squared error between the Gram matrix of the style image and the Gram matrix of the target image (feature correlation minimization). See [here](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/neural_style_transfer/main.py#L95-L105) for how to compute the style loss.
+
+
+
+
+
+
+## Usage
+
+```bash
+$ pip install -r requirements.txt
+$ python main.py --content='png/content.png' --style='png/style.png'
+```
+
+
+
+## Results
+The following is the result of applying variaous styles of artwork to Anne Hathaway's photograph.
+
+
diff --git a/tutorials/03-advanced/neural_style_transfer/main.py b/tutorials/03-advanced/neural_style_transfer/main.py
new file mode 100644
index 0000000..d34ac62
--- /dev/null
+++ b/tutorials/03-advanced/neural_style_transfer/main.py
@@ -0,0 +1,137 @@
+from __future__ import division
+from torch.backends import cudnn
+from torch.autograd import Variable
+from torchvision import models
+from torchvision import transforms
+from PIL import Image
+import argparse
+import torch
+import torchvision
+import torch.nn as nn
+import numpy as np
+
+
+use_cuda = torch.cuda.is_available()
+dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
+
+# Load image file and convert it into variable
+# unsqueeze for make the 4D tensor to perform conv arithmetic
+def load_image(image_path, transform=None, max_size=None, shape=None):
+ image = Image.open(image_path)
+
+ if max_size is not None:
+ scale = max_size / max(image.size)
+ size = np.array(image.size) * scale
+ image = image.resize(size.astype(int), Image.ANTIALIAS)
+
+ if shape is not None:
+ image = image.resize(shape, Image.LANCZOS)
+
+ if transform is not None:
+ image = transform(image).unsqueeze(0)
+
+ return image.type(dtype)
+
+# Pretrained VGGNet
+class VGGNet(nn.Module):
+ def __init__(self):
+ """Select conv1_1 ~ conv5_1 activation maps."""
+ super(VGGNet, self).__init__()
+ self.select = ['0', '5', '10', '19', '28']
+ self.vgg = models.vgg19(pretrained=True).features
+
+ def forward(self, x):
+ """Extract 5 conv activation maps from an input image.
+
+ Args:
+ x: 4D tensor of shape (1, 3, height, width).
+
+ Returns:
+ features: a list containing 5 conv activation maps.
+ """
+ features = []
+ for name, layer in self.vgg._modules.items():
+ x = layer(x)
+ if name in self.select:
+ features.append(x)
+ return features
+
+
+def main(config):
+
+ # Image preprocessing
+ # For normalization, see https://github.com/pytorch/vision#models
+ transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize((0.485, 0.456, 0.406),
+ (0.229, 0.224, 0.225))])
+
+ # Load content and style images
+ # make content.size() == style.size()
+ content = load_image(config.content, transform, max_size=config.max_size)
+ style = load_image(config.style, transform, shape=[content.size(2), content.size(3)])
+
+ # Initialization and optimizer
+ target = Variable(content.clone(), requires_grad=True)
+ optimizer = torch.optim.Adam([target], lr=config.lr, betas=[0.5, 0.999])
+
+ vgg = VGGNet()
+ if use_cuda:
+ vgg.cuda()
+
+ for step in range(config.total_step):
+
+ # Extract multiple(5) conv feature vectors
+ target_features = vgg(target)
+ content_features = vgg(Variable(content))
+ style_features = vgg(Variable(style))
+
+ style_loss = 0
+ content_loss = 0
+ for f1, f2, f3 in zip(target_features, content_features, style_features):
+ # Compute content loss (target and content image)
+ content_loss += torch.mean((f1 - f2)**2)
+
+ # Reshape conv features
+ _, c, h, w = f1.size()
+ f1 = f1.view(c, h * w)
+ f3 = f3.view(c, h * w)
+
+ # Compute gram matrix
+ f1 = torch.mm(f1, f1.t())
+ f3 = torch.mm(f3, f3.t())
+
+ # Compute style loss (target and style image)
+ style_loss += torch.mean((f1 - f3)**2) / (c * h * w)
+
+ # Compute total loss, backprop and optimize
+ loss = content_loss + config.style_weight * style_loss
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ if (step+1) % config.log_step == 0:
+ print ('Step [%d/%d], Content Loss: %.4f, Style Loss: %.4f'
+ %(step+1, config.total_step, content_loss.data[0], style_loss.data[0]))
+
+ if (step+1) % config.sample_step == 0:
+ # Save the generated image
+ denorm = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44))
+ img = target.clone().cpu().squeeze()
+ img = denorm(img.data).clamp_(0, 1)
+ torchvision.utils.save_image(img, 'output-%d.png' %(step+1))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--content', type=str, default='./png/content.png')
+ parser.add_argument('--style', type=str, default='./png/style.png')
+ parser.add_argument('--max_size', type=int, default=400)
+ parser.add_argument('--total_step', type=int, default=5000)
+ parser.add_argument('--log_step', type=int, default=10)
+ parser.add_argument('--sample_step', type=int, default=1000)
+ parser.add_argument('--style_weight', type=float, default=100)
+ parser.add_argument('--lr', type=float, default=0.003)
+ config = parser.parse_args()
+ print(config)
+ main(config)
\ No newline at end of file
diff --git a/tutorials/03-advanced/neural_style_transfer/png/content.png b/tutorials/03-advanced/neural_style_transfer/png/content.png
new file mode 100644
index 0000000..96889a0
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/content.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/png/neural_style.png b/tutorials/03-advanced/neural_style_transfer/png/neural_style.png
new file mode 100644
index 0000000..0f5eacd
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/neural_style.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/png/neural_style2.png b/tutorials/03-advanced/neural_style_transfer/png/neural_style2.png
new file mode 100644
index 0000000..92bfe81
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/neural_style2.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/png/style.png b/tutorials/03-advanced/neural_style_transfer/png/style.png
new file mode 100644
index 0000000..e7d9b4c
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/png/style2.png b/tutorials/03-advanced/neural_style_transfer/png/style2.png
new file mode 100644
index 0000000..eb7df21
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style2.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/png/style3.png b/tutorials/03-advanced/neural_style_transfer/png/style3.png
new file mode 100644
index 0000000..0260be2
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style3.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/png/style4.png b/tutorials/03-advanced/neural_style_transfer/png/style4.png
new file mode 100644
index 0000000..c62fdb3
Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style4.png differ
diff --git a/tutorials/03-advanced/neural_style_transfer/requirements.txt b/tutorials/03-advanced/neural_style_transfer/requirements.txt
new file mode 100644
index 0000000..131621d
--- /dev/null
+++ b/tutorials/03-advanced/neural_style_transfer/requirements.txt
@@ -0,0 +1,4 @@
+argparse
+torch
+torchvision
+Pillow
diff --git a/tutorials/03-advanced/variational_auto_encoder/README.md b/tutorials/03-advanced/variational_auto_encoder/README.md
new file mode 100644
index 0000000..df7ded7
--- /dev/null
+++ b/tutorials/03-advanced/variational_auto_encoder/README.md
@@ -0,0 +1,24 @@
+## Variational Auto-Encoder
+[Variational Auto-Encoder(VAE)](https://arxiv.org/abs/1312.6114) is one of the generative model. From a neural network perspective, the only difference between the VAE and the Auto-Encoder(AE) is that the latent vector z in VAE is stochastically sampled. This solves the problem that the AE learns identity mapping and can not have meaningful representations in latent space. In fact, the VAE uses [reparameterization trick](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/variational_auto_encoder/main.py#L40-L44) to enable back propagation without sampling z directly from the mean and variance.
+
+#### VAE loss
+As in conventional auto-encoders, the VAE minimizes the reconstruction loss between the input image and the generated image. In addition, the VAE approximates z to the standard normal distribution so that the decoder in the VAE can be used for sampling in the test phase.
+
+
+
+
+
+
+## Usage
+
+```bash
+$ pip install -r requirements.txt
+$ python main.py
+```
+
+
+
+## Results
+Real image | Reconstruced image
+:-------------------------:|:-------------------------:
+ | 
\ No newline at end of file
diff --git a/tutorials/03-advanced/variational_auto_encoder/main.py b/tutorials/03-advanced/variational_auto_encoder/main.py
new file mode 100644
index 0000000..231377d
--- /dev/null
+++ b/tutorials/03-advanced/variational_auto_encoder/main.py
@@ -0,0 +1,98 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from torchvision import datasets
+from torchvision import transforms
+import torchvision
+
+# MNIST dataset
+dataset = datasets.MNIST(root='./data',
+ train=True,
+ transform=transforms.ToTensor(),
+ download=True)
+
+# Data loader
+data_loader = torch.utils.data.DataLoader(dataset=dataset,
+ batch_size=100,
+ shuffle=True)
+
+def to_var(x):
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x)
+
+# VAE model
+class VAE(nn.Module):
+ def __init__(self, image_size=784, h_dim=400, z_dim=20):
+ super(VAE, self).__init__()
+ self.encoder = nn.Sequential(
+ nn.Linear(image_size, h_dim),
+ nn.LeakyReLU(0.2),
+ nn.Linear(h_dim, z_dim*2)) # 2 for mean and variance.
+
+ self.decoder = nn.Sequential(
+ nn.Linear(z_dim, h_dim),
+ nn.ReLU(),
+ nn.Linear(h_dim, image_size),
+ nn.Sigmoid())
+
+ def reparametrize(self, mu, log_var):
+ """"z = mean + eps * sigma where eps is sampled from N(0, 1)."""
+ eps = to_var(torch.randn(mu.size(0), mu.size(1)))
+ z = mu + eps * torch.exp(log_var/2) # 2 for convert var to std
+ return z
+
+ def forward(self, x):
+ h = self.encoder(x)
+ mu, log_var = torch.chunk(h, 2, dim=1) # mean and log variance.
+ z = self.reparametrize(mu, log_var)
+ out = self.decoder(z)
+ return out, mu, log_var
+
+ def sample(self, z):
+ return self.decoder(z)
+
+vae = VAE()
+
+if torch.cuda.is_available():
+ vae.cuda()
+
+optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)
+iter_per_epoch = len(data_loader)
+data_iter = iter(data_loader)
+
+# fixed inputs for debugging
+fixed_z = to_var(torch.randn(100, 20))
+fixed_x, _ = next(data_iter)
+torchvision.utils.save_image(fixed_x.data.cpu(), './data/real_images.png')
+fixed_x = to_var(fixed_x.view(fixed_x.size(0), -1))
+
+for epoch in range(50):
+ for i, (images, _) in enumerate(data_loader):
+
+ images = to_var(images.view(images.size(0), -1))
+ out, mu, log_var = vae(images)
+
+ # Compute reconstruction loss and kl divergence
+ # For kl_divergence, see Appendix B in the paper or http://yunjey47.tistory.com/43
+ reconst_loss = F.binary_cross_entropy(out, images, size_average=False)
+ kl_divergence = torch.sum(0.5 * (mu**2 + torch.exp(log_var) - log_var -1))
+
+ # Backprop + Optimize
+ total_loss = reconst_loss + kl_divergence
+ optimizer.zero_grad()
+ total_loss.backward()
+ optimizer.step()
+
+ if i % 100 == 0:
+ print ("Epoch[%d/%d], Step [%d/%d], Total Loss: %.4f, "
+ "Reconst Loss: %.4f, KL Div: %.7f"
+ %(epoch+1, 50, i+1, iter_per_epoch, total_loss.data[0],
+ reconst_loss.data[0], kl_divergence.data[0]))
+
+ # Save the reconstructed images
+ reconst_images, _, _ = vae(fixed_x)
+ reconst_images = reconst_images.view(reconst_images.size(0), 1, 28, 28)
+ torchvision.utils.save_image(reconst_images.data.cpu(),
+ './data/reconst_images_%d.png' %(epoch+1))
\ No newline at end of file
diff --git a/tutorials/03-advanced/variational_auto_encoder/png/real.png b/tutorials/03-advanced/variational_auto_encoder/png/real.png
new file mode 100644
index 0000000..25b8ad6
Binary files /dev/null and b/tutorials/03-advanced/variational_auto_encoder/png/real.png differ
diff --git a/tutorials/03-advanced/variational_auto_encoder/png/reconst.png b/tutorials/03-advanced/variational_auto_encoder/png/reconst.png
new file mode 100644
index 0000000..e70c3cb
Binary files /dev/null and b/tutorials/03-advanced/variational_auto_encoder/png/reconst.png differ
diff --git a/tutorials/03-advanced/variational_auto_encoder/png/vae.png b/tutorials/03-advanced/variational_auto_encoder/png/vae.png
new file mode 100644
index 0000000..6ecf999
Binary files /dev/null and b/tutorials/03-advanced/variational_auto_encoder/png/vae.png differ
diff --git a/tutorials/03-advanced/variational_auto_encoder/requirements.txt b/tutorials/03-advanced/variational_auto_encoder/requirements.txt
new file mode 100644
index 0000000..ac988bd
--- /dev/null
+++ b/tutorials/03-advanced/variational_auto_encoder/requirements.txt
@@ -0,0 +1,2 @@
+torch
+torchvision
diff --git a/tutorials/04-utils/tensorboard/README.md b/tutorials/04-utils/tensorboard/README.md
new file mode 100644
index 0000000..19ea749
--- /dev/null
+++ b/tutorials/04-utils/tensorboard/README.md
@@ -0,0 +1,25 @@
+# TensorBoard in PyTorch
+
+In this tutorial, we implement the MNIST classifier using a simple neural network and visualize the training process using [TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard). In training phase, we plot the loss and accuracy functions through `scalar_summary` and visualize the training images through `image_summary`. In addition, we visualize the weight and gradient values of the parameters of the neural network using `histogram_summary`. PyTorch code for handling with these summary functions can be found [here](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04-utils/tensorboard/main.py#L83-L105).
+
+
+
+
+
+## Usage
+
+#### 1. Install dependencies
+```bash
+$ pip install -r requirements.txt
+```
+
+#### 2. Train the model
+```bash
+$ python main.py
+```
+
+#### 3. Open the TensorBoard
+To run the TensorBoard, open a new terminal and run the command below. Then, open http://localhost:6006/ in your web browser.
+```bash
+$ tensorboard --logdir='./logs' --port=6006
+```
\ No newline at end of file
diff --git a/tutorials/09 - Image Captioning/png/r b/tutorials/04-utils/tensorboard/gif/g
similarity index 100%
rename from tutorials/09 - Image Captioning/png/r
rename to tutorials/04-utils/tensorboard/gif/g
diff --git a/tutorials/04-utils/tensorboard/gif/tensorboard.gif b/tutorials/04-utils/tensorboard/gif/tensorboard.gif
new file mode 100644
index 0000000..d6ac609
Binary files /dev/null and b/tutorials/04-utils/tensorboard/gif/tensorboard.gif differ
diff --git a/tutorials/04-utils/tensorboard/logger.py b/tutorials/04-utils/tensorboard/logger.py
new file mode 100644
index 0000000..06e2ce3
--- /dev/null
+++ b/tutorials/04-utils/tensorboard/logger.py
@@ -0,0 +1,71 @@
+# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514.
+import tensorflow as tf
+import numpy as np
+import scipy.misc
+try:
+ from StringIO import StringIO # Python 2.7
+except ImportError:
+ from io import BytesIO # Python 3.x
+
+
+class Logger(object):
+
+ def __init__(self, log_dir):
+ """Create a summary writer logging to log_dir."""
+ self.writer = tf.summary.FileWriter(log_dir)
+
+ def scalar_summary(self, tag, value, step):
+ """Log a scalar variable."""
+ summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
+ self.writer.add_summary(summary, step)
+
+ def image_summary(self, tag, images, step):
+ """Log a list of images."""
+
+ img_summaries = []
+ for i, img in enumerate(images):
+ # Write the image to a string
+ try:
+ s = StringIO()
+ except:
+ s = BytesIO()
+ scipy.misc.toimage(img).save(s, format="png")
+
+ # Create an Image object
+ img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
+ height=img.shape[0],
+ width=img.shape[1])
+ # Create a Summary value
+ img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
+
+ # Create and write Summary
+ summary = tf.Summary(value=img_summaries)
+ self.writer.add_summary(summary, step)
+
+ def histo_summary(self, tag, values, step, bins=1000):
+ """Log a histogram of the tensor of values."""
+
+ # Create a histogram using numpy
+ counts, bin_edges = np.histogram(values, bins=bins)
+
+ # Fill the fields of the histogram proto
+ hist = tf.HistogramProto()
+ hist.min = float(np.min(values))
+ hist.max = float(np.max(values))
+ hist.num = int(np.prod(values.shape))
+ hist.sum = float(np.sum(values))
+ hist.sum_squares = float(np.sum(values**2))
+
+ # Drop the start of the first bin
+ bin_edges = bin_edges[1:]
+
+ # Add bin edges and counts
+ for edge in bin_edges:
+ hist.bucket_limit.append(edge)
+ for c in counts:
+ hist.bucket.append(c)
+
+ # Create and write Summary
+ summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
+ self.writer.add_summary(summary, step)
+ self.writer.flush()
\ No newline at end of file
diff --git a/tutorials/04-utils/tensorboard/main.py b/tutorials/04-utils/tensorboard/main.py
new file mode 100644
index 0000000..c8f1042
--- /dev/null
+++ b/tutorials/04-utils/tensorboard/main.py
@@ -0,0 +1,105 @@
+import torch
+import torch.nn as nn
+import torchvision.datasets as dsets
+import torchvision.transforms as transforms
+from torch.autograd import Variable
+from logger import Logger
+
+
+# MNIST Dataset
+dataset = dsets.MNIST(root='./data',
+ train=True,
+ transform=transforms.ToTensor(),
+ download=True)
+
+# Data Loader (Input Pipeline)
+data_loader = torch.utils.data.DataLoader(dataset=dataset,
+ batch_size=100,
+ shuffle=True)
+
+def to_np(x):
+ return x.data.cpu().numpy()
+
+def to_var(x):
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x)
+
+# Neural Network Model (1 hidden layer)
+class Net(nn.Module):
+ def __init__(self, input_size=784, hidden_size=500, num_classes=10):
+ super(Net, self).__init__()
+ self.fc1 = nn.Linear(input_size, hidden_size)
+ self.relu = nn.ReLU()
+ self.fc2 = nn.Linear(hidden_size, num_classes)
+
+ def forward(self, x):
+ out = self.fc1(x)
+ out = self.relu(out)
+ out = self.fc2(out)
+ return out
+
+net = Net()
+if torch.cuda.is_available():
+ net.cuda()
+
+# Set the logger
+logger = Logger('./logs')
+
+# Loss and Optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(net.parameters(), lr=0.00001)
+
+data_iter = iter(data_loader)
+iter_per_epoch = len(data_loader)
+total_step = 50000
+
+# Start training
+for step in range(total_step):
+
+ # Reset the data_iter
+ if (step+1) % iter_per_epoch == 0:
+ data_iter = iter(data_loader)
+
+ # Fetch the images and labels and convert them to variables
+ images, labels = next(data_iter)
+ images, labels = to_var(images.view(images.size(0), -1)), to_var(labels)
+
+ # Forward, backward and optimize
+ optimizer.zero_grad() # zero the gradient buffer
+ outputs = net(images)
+ loss = criterion(outputs, labels)
+ loss.backward()
+ optimizer.step()
+
+ # Compute accuracy
+ _, argmax = torch.max(outputs, 1)
+ accuracy = (labels == argmax.squeeze()).float().mean()
+
+ if (step+1) % 100 == 0:
+ print ('Step [%d/%d], Loss: %.4f, Acc: %.2f'
+ %(step+1, total_step, loss.data[0], accuracy.data[0]))
+
+ #============ TensorBoard logging ============#
+ # (1) Log the scalar values
+ info = {
+ 'loss': loss.data[0],
+ 'accuracy': accuracy.data[0]
+ }
+
+ for tag, value in info.items():
+ logger.scalar_summary(tag, value, step+1)
+
+ # (2) Log values and gradients of the parameters (histogram)
+ for tag, value in net.named_parameters():
+ tag = tag.replace('.', '/')
+ logger.histo_summary(tag, to_np(value), step+1)
+ logger.histo_summary(tag+'/grad', to_np(value.grad), step+1)
+
+ # (3) Log the images
+ info = {
+ 'images': to_np(images.view(-1, 28, 28)[:10])
+ }
+
+ for tag, images in info.items():
+ logger.image_summary(tag, images, step+1)
\ No newline at end of file
diff --git a/tutorials/04-utils/tensorboard/requirements.txt b/tutorials/04-utils/tensorboard/requirements.txt
new file mode 100644
index 0000000..e74a2c9
--- /dev/null
+++ b/tutorials/04-utils/tensorboard/requirements.txt
@@ -0,0 +1,5 @@
+tensorflow
+torch
+torchvision
+scipy
+numpy
diff --git a/tutorials/09 - Image Captioning/README.md b/tutorials/09 - Image Captioning/README.md
deleted file mode 100644
index 1f491e6..0000000
--- a/tutorials/09 - Image Captioning/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-## Usage
-
-
-#### 1. Clone the repositories
-```bash
-$ git clone https://github.com/pdollar/coco.git
-$ cd coco/PythonAPI/
-$ make
-$ python setup.py build
-$ python setup.py install
-$ cd ../../
-$ git clone https://github.com/yunjey/pytorch-tutorial.git
-$ cd pytorch-tutorial/tutorials/09\ -\ Image\ Captioning
-```
-
-#### 2. Download the dataset
-
-```bash
-$ pip install -r requirements.txt
-$ chmod +x download.sh
-$ ./download.sh
-```
-
-#### 3. Preprocessing
-
-```bash
-$ python build_vocab.py
-$ python resize.py
-```
-
-#### 4. Train the model
-
-```bash
-$ python train.py
-```
-
-#### 5. Generate captions
-
-
-```bash
-$ python sample.py --image='path_for_image'
-```
-
-
-
-## Pretrained model
-
-If you do not want to train the model yourself, you can use a pretrained model. I have provided the pretrained model as a zip file. You can download the file [here](https://www.dropbox.com/s/b7gyo15as6m6s7x/train_model.zip?dl=0) and extract it to `./models/` directory.
diff --git a/tutorials/09 - Image Captioning/png/training phase.png b/tutorials/09 - Image Captioning/png/training phase.png
deleted file mode 100644
index c01f7e4..0000000
Binary files a/tutorials/09 - Image Captioning/png/training phase.png and /dev/null differ
diff --git a/tutorials/10 - Generative Adversarial Network/main-gpu.py b/tutorials/10 - Generative Adversarial Network/main-gpu.py
deleted file mode 100644
index 87a30c6..0000000
--- a/tutorials/10 - Generative Adversarial Network/main-gpu.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import torch
-import torchvision
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision.datasets as dsets
-import torchvision.transforms as transforms
-from torch.autograd import Variable
-
-# Image Preprocessing
-transform = transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
-
-def denorm(x):
- return (x + 1) / 2
-
-# MNIST Dataset
-train_dataset = dsets.MNIST(root='./data/',
- train=True,
- transform=transform,
- download=True)
-
-# Data Loader (Input Pipeline)
-train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
- batch_size=100,
- shuffle=True)
-
-# Discriminator Model
-class Discriminator(nn.Module):
- def __init__(self):
- super(Discriminator, self).__init__()
- self.fc1 = nn.Linear(784, 256)
- self.fc2 = nn.Linear(256, 256)
- self.fc3 = nn.Linear(256, 1)
-
- def forward(self, x):
- h = F.relu(self.fc1(x))
- h = F.relu(self.fc2(h))
- out = F.sigmoid(self.fc3(h))
- return out
-
-# Generator Model
-class Generator(nn.Module):
- def __init__(self):
- super(Generator, self).__init__()
- self.fc1 = nn.Linear(128, 256)
- self.fc2 = nn.Linear(256, 256)
- self.fc3 = nn.Linear(256, 784)
-
- def forward(self, x):
- h = F.leaky_relu(self.fc1(x))
- h = F.leaky_relu(self.fc2(h))
- out = F.tanh(self.fc3(h))
- return out
-
-discriminator = Discriminator()
-generator = Generator()
-discriminator.cuda()
-generator.cuda()
-
-# Loss and Optimizer
-criterion = nn.BCELoss()
-d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.0005)
-g_optimizer = torch.optim.Adam(generator.parameters(), lr=0.0005)
-
-# Training
-for epoch in range(200):
- for i, (images, _) in enumerate(train_loader):
- # Build mini-batch dataset
- images = images.view(images.size(0), -1)
- images = Variable(images.cuda())
- real_labels = Variable(torch.ones(images.size(0)).cuda())
- fake_labels = Variable(torch.zeros(images.size(0)).cuda())
-
- # Train the discriminator
- discriminator.zero_grad()
- outputs = discriminator(images)
- real_loss = criterion(outputs, real_labels)
- real_score = outputs
-
- noise = Variable(torch.randn(images.size(0), 128).cuda())
- fake_images = generator(noise)
- outputs = discriminator(fake_images.detach())
- fake_loss = criterion(outputs, fake_labels)
- fake_score = outputs
-
- d_loss = real_loss + fake_loss
- d_loss.backward()
- d_optimizer.step()
-
- # Train the generator
- generator.zero_grad()
- noise = Variable(torch.randn(images.size(0), 128).cuda())
- fake_images = generator(noise)
- outputs = discriminator(fake_images)
- g_loss = criterion(outputs, real_labels)
- g_loss.backward()
- g_optimizer.step()
-
- if (i+1) % 300 == 0:
- print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, '
- 'D(x): %.2f, D(G(z)): %.2f'
- %(epoch, 200, i+1, 600, d_loss.data[0], g_loss.data[0],
- real_score.data.mean(), fake_score.cpu().data.mean()))
-
- # Save the sampled images
- fake_images = fake_images.view(fake_images.size(0), 1, 28, 28)
- torchvision.utils.save_image(denorm(fake_images.data),
- './data/fake_samples_%d.png' %(epoch+1))
-
-# Save the Models
-torch.save(generator.state_dict(), './generator.pkl')
-torch.save(discriminator.state_dict(), './discriminator.pkl')
\ No newline at end of file
diff --git a/tutorials/10 - Generative Adversarial Network/main.py b/tutorials/10 - Generative Adversarial Network/main.py
deleted file mode 100644
index c7c2636..0000000
--- a/tutorials/10 - Generative Adversarial Network/main.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import torch
-import torchvision
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision.datasets as dsets
-import torchvision.transforms as transforms
-from torch.autograd import Variable
-
-# Image Preprocessing
-transform = transforms.Compose([
- transforms.ToTensor(),
- transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
-
-def denorm(x):
- return (x + 1) / 2
-
-# MNIST Dataset
-train_dataset = dsets.MNIST(root='./data/',
- train=True,
- transform=transform,
- download=True)
-
-# Data Loader (Input Pipeline)
-train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
- batch_size=100,
- shuffle=True)
-
-# Discriminator Model
-class Discriminator(nn.Module):
- def __init__(self):
- super(Discriminator, self).__init__()
- self.fc1 = nn.Linear(784, 256)
- self.fc2 = nn.Linear(256, 256)
- self.fc3 = nn.Linear(256, 1)
-
- def forward(self, x):
- h = F.relu(self.fc1(x))
- h = F.relu(self.fc2(h))
- out = F.sigmoid(self.fc3(h))
- return out
-
-# Generator Model
-class Generator(nn.Module):
- def __init__(self):
- super(Generator, self).__init__()
- self.fc1 = nn.Linear(128, 256)
- self.fc2 = nn.Linear(256, 256)
- self.fc3 = nn.Linear(256, 784)
-
- def forward(self, x):
- h = F.leaky_relu(self.fc1(x))
- h = F.leaky_relu(self.fc2(h))
- out = F.tanh(self.fc3(h))
- return out
-
-discriminator = Discriminator()
-generator = Generator()
-
-
-
-# Loss and Optimizer
-criterion = nn.BCELoss()
-d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.0005)
-g_optimizer = torch.optim.Adam(generator.parameters(), lr=0.0005)
-
-# Training
-for epoch in range(200):
- for i, (images, _) in enumerate(train_loader):
- # Build mini-batch dataset
- images = images.view(images.size(0), -1)
- images = Variable(images)
- real_labels = Variable(torch.ones(images.size(0)))
- fake_labels = Variable(torch.zeros(images.size(0)))
-
- # Train the discriminator
- discriminator.zero_grad()
- outputs = discriminator(images)
- real_loss = criterion(outputs, real_labels)
- real_score = outputs
-
- noise = Variable(torch.randn(images.size(0), 128))
- fake_images = generator(noise)
- outputs = discriminator(fake_images.detach())
- fake_loss = criterion(outputs, fake_labels)
- fake_score = outputs
-
- d_loss = real_loss + fake_loss
- d_loss.backward()
- d_optimizer.step()
-
- # Train the generator
- generator.zero_grad()
- noise = Variable(torch.randn(images.size(0), 128))
- fake_images = generator(noise)
- outputs = discriminator(fake_images)
- g_loss = criterion(outputs, real_labels)
- g_loss.backward()
- g_optimizer.step()
-
- if (i+1) % 300 == 0:
- print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, '
- 'D(x): %.2f, D(G(z)): %.2f'
- %(epoch, 200, i+1, 600, d_loss.data[0], g_loss.data[0],
- real_score.data.mean(), fake_score.cpu().data.mean()))
-
- # Save the sampled images
- fake_images = fake_images.view(fake_images.size(0), 1, 28, 28)
- torchvision.utils.save_image(denorm(fake_images.data),
- './data/fake_samples_%d.png' %(epoch+1))
-
-# Save the Models
-torch.save(generator.state_dict(), './generator.pkl')
-torch.save(discriminator.state_dict(), './discriminator.pkl')
\ No newline at end of file
diff --git a/tutorials/11 - Deep Convolutional Generative Adversarial Network/main-gpu.py b/tutorials/11 - Deep Convolutional Generative Adversarial Network/main-gpu.py
deleted file mode 100644
index ad56627..0000000
--- a/tutorials/11 - Deep Convolutional Generative Adversarial Network/main-gpu.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import torch
-import torchvision
-import torch.nn as nn
-import torchvision.datasets as dsets
-import torchvision.transforms as transforms
-from torch.autograd import Variable
-
-# Image Preprocessing
-transform = transforms.Compose([
- transforms.Scale(36),
- transforms.RandomCrop(32),
- transforms.ToTensor(),
- transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
-
-def denorm(x):
- return (x + 1) / 2
-
-# CIFAR-10 Dataset
-train_dataset = dsets.CIFAR10(root='./data/',
- train=True,
- transform=transform,
- download=True)
-
-# Data Loader (Input Pipeline)
-train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
- batch_size=100,
- shuffle=True)
-
-# 4x4 Convolution
-def conv4x4(in_channels, out_channels, stride):
- return nn.Conv2d(in_channels, out_channels, kernel_size=4,
- stride=stride, padding=1, bias=False)
-
-# Discriminator Model
-class Discriminator(nn.Module):
- def __init__(self):
- super(Discriminator, self).__init__()
- self.model = nn.Sequential(
- conv4x4(3, 16, 2),
- nn.LeakyReLU(0.2, inplace=True),
- conv4x4(16, 32, 2),
- nn.BatchNorm2d(32),
- nn.LeakyReLU(0.2, inplace=True),
- conv4x4(32, 64, 2),
- nn.BatchNorm2d(64),
- nn.LeakyReLU(0.2, inplace=True),
- nn.Conv2d(64, 1, kernel_size=4),
- nn.Sigmoid())
-
- def forward(self, x):
- out = self.model(x)
- out = out.view(out.size(0), -1)
- return out
-
-# 4x4 Transpose convolution
-def conv_transpose4x4(in_channels, out_channels, stride=1, padding=1, bias=False):
- return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4,
- stride=stride, padding=padding, bias=bias)
-
-# Generator Model
-class Generator(nn.Module):
- def __init__(self):
- super(Generator, self).__init__()
- self.model = nn.Sequential(
- conv_transpose4x4(128, 64, padding=0),
- nn.BatchNorm2d(64),
- nn.ReLU(inplace=True),
- conv_transpose4x4(64, 32, 2),
- nn.BatchNorm2d(32),
- nn.ReLU(inplace=True),
- conv_transpose4x4(32, 16, 2),
- nn.BatchNorm2d(16),
- nn.ReLU(inplace=True),
- conv_transpose4x4(16, 3, 2, bias=True),
- nn.Tanh())
-
- def forward(self, x):
- x = x.view(x.size(0), 128, 1, 1)
- out = self.model(x)
- return out
-
-discriminator = Discriminator()
-generator = Generator()
-discriminator.cuda()
-generator.cuda()
-
-# Loss and Optimizer
-criterion = nn.BCELoss()
-lr = 0.002
-d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr)
-g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
-
-# Training
-for epoch in range(50):
- for i, (images, _) in enumerate(train_loader):
- images = Variable(images.cuda())
- real_labels = Variable(torch.ones(images.size(0))).cuda()
- fake_labels = Variable(torch.zeros(images.size(0))).cuda()
-
- # Train the discriminator
- discriminator.zero_grad()
- outputs = discriminator(images)
- real_loss = criterion(outputs, real_labels)
- real_score = outputs
-
- noise = Variable(torch.randn(images.size(0), 128)).cuda()
- fake_images = generator(noise)
- outputs = discriminator(fake_images.detach())
- fake_loss = criterion(outputs, fake_labels)
- fake_score = outputs
-
- d_loss = real_loss + fake_loss
- d_loss.backward()
- d_optimizer.step()
-
- # Train the generator
- generator.zero_grad()
- noise = Variable(torch.randn(images.size(0), 128)).cuda()
- fake_images = generator(noise)
- outputs = discriminator(fake_images)
- g_loss = criterion(outputs, real_labels)
- g_loss.backward()
- g_optimizer.step()
-
- if (i+1) % 100 == 0:
- print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, '
- 'D(x): %.2f, D(G(z)): %.2f'
- %(epoch, 50, i+1, 500, d_loss.data[0], g_loss.data[0],
- real_score.cpu().data.mean(), fake_score.cpu().data.mean()))
-
- # Save the sampled images
- torchvision.utils.save_image(denorm(fake_images.data),
- './data/fake_samples_%d_%d.png' %(epoch+1, i+1))
-
-# Save the Models
-torch.save(generator.state_dict(), './generator.pkl')
-torch.save(discriminator.state_dict(), './discriminator.pkl')
\ No newline at end of file
diff --git a/tutorials/11 - Deep Convolutional Generative Adversarial Network/main.py b/tutorials/11 - Deep Convolutional Generative Adversarial Network/main.py
deleted file mode 100644
index cca50b4..0000000
--- a/tutorials/11 - Deep Convolutional Generative Adversarial Network/main.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import torch
-import torchvision
-import torch.nn as nn
-import torchvision.datasets as dsets
-import torchvision.transforms as transforms
-from torch.autograd import Variable
-
-# Image Preprocessing
-transform = transforms.Compose([
- transforms.Scale(36),
- transforms.RandomCrop(32),
- transforms.ToTensor(),
- transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
-
-def denorm(x):
- return (x + 1) / 2
-
-# CIFAR-10 Dataset
-train_dataset = dsets.CIFAR10(root='./data/',
- train=True,
- transform=transform,
- download=True)
-
-# Data Loader (Input Pipeline)
-train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
- batch_size=100,
- shuffle=True)
-
-# 4x4 Convolution
-def conv4x4(in_channels, out_channels, stride):
- return nn.Conv2d(in_channels, out_channels, kernel_size=4,
- stride=stride, padding=1, bias=False)
-
-# Discriminator Model
-class Discriminator(nn.Module):
- def __init__(self):
- super(Discriminator, self).__init__()
- self.model = nn.Sequential(
- conv4x4(3, 16, 2),
- nn.LeakyReLU(0.2, inplace=True),
- conv4x4(16, 32, 2),
- nn.BatchNorm2d(32),
- nn.LeakyReLU(0.2, inplace=True),
- conv4x4(32, 64, 2),
- nn.BatchNorm2d(64),
- nn.LeakyReLU(0.2, inplace=True),
- nn.Conv2d(64, 1, kernel_size=4),
- nn.Sigmoid())
-
- def forward(self, x):
- out = self.model(x)
- out = out.view(out.size(0), -1)
- return out
-
-# 4x4 Transpose convolution
-def conv_transpose4x4(in_channels, out_channels, stride=1, padding=1, bias=False):
- return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4,
- stride=stride, padding=padding, bias=bias)
-
-# Generator Model
-class Generator(nn.Module):
- def __init__(self):
- super(Generator, self).__init__()
- self.model = nn.Sequential(
- conv_transpose4x4(128, 64, padding=0),
- nn.BatchNorm2d(64),
- nn.ReLU(inplace=True),
- conv_transpose4x4(64, 32, 2),
- nn.BatchNorm2d(32),
- nn.ReLU(inplace=True),
- conv_transpose4x4(32, 16, 2),
- nn.BatchNorm2d(16),
- nn.ReLU(inplace=True),
- conv_transpose4x4(16, 3, 2, bias=True),
- nn.Tanh())
-
- def forward(self, x):
- x = x.view(x.size(0), 128, 1, 1)
- out = self.model(x)
- return out
-
-discriminator = Discriminator()
-generator = Generator()
-
-
-
-# Loss and Optimizer
-criterion = nn.BCELoss()
-lr = 0.0002
-d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr)
-g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
-
-# Training
-for epoch in range(50):
- for i, (images, _) in enumerate(train_loader):
- images = Variable(images)
- real_labels = Variable(torch.ones(images.size(0)))
- fake_labels = Variable(torch.zeros(images.size(0)))
-
- # Train the discriminator
- discriminator.zero_grad()
- outputs = discriminator(images)
- real_loss = criterion(outputs, real_labels)
- real_score = outputs
-
- noise = Variable(torch.randn(images.size(0), 128))
- fake_images = generator(noise)
- outputs = discriminator(fake_images.detach())
- fake_loss = criterion(outputs, fake_labels)
- fake_score = outputs
-
- d_loss = real_loss + fake_loss
- d_loss.backward()
- d_optimizer.step()
-
- # Train the generator
- generator.zero_grad()
- noise = Variable(torch.randn(images.size(0), 128))
- fake_images = generator(noise)
- outputs = discriminator(fake_images)
- g_loss = criterion(outputs, real_labels)
- g_loss.backward()
- g_optimizer.step()
-
- if (i+1) % 100 == 0:
- print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, '
- 'D(x): %.2f, D(G(z)): %.2f'
- %(epoch, 50, i+1, 500, d_loss.data[0], g_loss.data[0],
- real_score.data.mean(), fake_score.data.mean()))
-
- # Save the sampled images
- torchvision.utils.save_image(denorm(fake_images.data),
- './data/fake_samples_%d_%d.png' %(epoch+1, i+1))
-
-# Save the Models
-torch.save(generator.state_dict(), './generator.pkl')
-torch.save(discriminator.state_dict(), './discriminator.pkl')
\ No newline at end of file
diff --git a/tutorials/12 - Deep Q Network/dqn13.py b/tutorials/12 - Deep Q Network/dqn13.py
deleted file mode 100644
index 442b609..0000000
--- a/tutorials/12 - Deep Q Network/dqn13.py
+++ /dev/null
@@ -1,124 +0,0 @@
-%matplotlib inline
-
-import torch
-import torch.nn as nn
-import gym
-import random
-import numpy as np
-import torchvision.transforms as transforms
-import matplotlib.pyplot as plt
-from torch.autograd import Variable
-from collections import deque, namedtuple
-
-env = gym.envs.make("CartPole-v0")
-
-class Net(nn.Module):
- def __init__(self):
- super(Net, self).__init__()
- self.fc1 = nn.Linear(4, 128)
- self.tanh = nn.Tanh()
- self.fc2 = nn.Linear(128, 2)
- self.init_weights()
-
- def init_weights(self):
- self.fc1.weight.data.uniform_(-0.1, 0.1)
- self.fc2.weight.data.uniform_(-0.1, 0.1)
-
- def forward(self, x):
- out = self.fc1(x)
- out = self.tanh(out)
- out = self.fc2(out)
- return out
-
-def make_epsilon_greedy_policy(network, epsilon, nA):
- def policy(state):
- sample = random.random()
- if sample < (1-epsilon) + (epsilon/nA):
- q_values = network(state.view(1, -1))
- action = q_values.data.max(1)[1][0, 0]
- else:
- action = random.randrange(nA)
- return action
- return policy
-
-class ReplayMemory(object):
-
- def __init__(self, capacity):
- self.memory = deque()
- self.capacity = capacity
-
- def push(self, transition):
- if len(self.memory) > self.capacity:
- self.memory.popleft()
- self.memory.append(transition)
-
- def sample(self, batch_size):
- return random.sample(self.memory, batch_size)
-
- def __len__(self):
- return len(self.memory)
-
-def to_tensor(ndarray, volatile=False):
- return Variable(torch.from_numpy(ndarray), volatile=volatile).float()
-
-def deep_q_learning(num_episodes=10, batch_size=100,
- discount_factor=0.95, epsilon=0.1, epsilon_decay=0.95):
-
- # Q-Network and memory
- net = Net()
- memory = ReplayMemory(10000)
-
- # Loss and Optimizer
- criterion = nn.MSELoss()
- optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
-
- for i_episode in range(num_episodes):
-
- # Set policy (TODO: decaying epsilon)
- #if (i_episode+1) % 100 == 0:
- # epsilon *= 0.9
-
- policy = make_epsilon_greedy_policy(
- net, epsilon, env.action_space.n)
-
- # Start an episode
- state = env.reset()
-
- for t in range(10000):
-
- # Sample action from epsilon greed policy
- action = policy(to_tensor(state))
- next_state, reward, done, _ = env.step(action)
-
-
- # Restore transition in memory
- memory.push([state, action, reward, next_state])
-
-
- if len(memory) >= batch_size:
- # Sample mini-batch transitions from memory
- batch = memory.sample(batch_size)
- state_batch = np.vstack([trans[0] for trans in batch])
- action_batch =np.vstack([trans[1] for trans in batch])
- reward_batch = np.vstack([trans[2] for trans in batch])
- next_state_batch = np.vstack([trans[3] for trans in batch])
-
- # Forward + Backward + Opimize
- net.zero_grad()
- q_values = net(to_tensor(state_batch))
- next_q_values = net(to_tensor(next_state_batch, volatile=True))
- next_q_values.volatile = False
-
- td_target = to_tensor(reward_batch) + discount_factor * (next_q_values).max(1)[0]
- loss = criterion(q_values.gather(1,
- to_tensor(action_batch).long().view(-1, 1)), td_target)
- loss.backward()
- optimizer.step()
-
- if done:
- break
-
- state = next_state
-
- if len(memory) >= batch_size and (i_episode+1) % 10 == 0:
- print ('episode: %d, time: %d, loss: %.4f' %(i_episode, t, loss.data[0]))
\ No newline at end of file