From 47696884697adbebaf1b35dc72e234d5e88c0335 Mon Sep 17 00:00:00 2001
From: yunjey <yunjey47@naver.com>
Date: Thu, 13 Apr 2017 19:51:14 +0900
Subject: [PATCH] modified the code

---
 .../09 - Image Captioning/build_vocab.py      | 77 +++++++++++++++++++
 tutorials/09 - Image Captioning/resize.py     | 44 +++++++++++
 2 files changed, 121 insertions(+)
 create mode 100644 tutorials/09 - Image Captioning/build_vocab.py
 create mode 100644 tutorials/09 - Image Captioning/resize.py
diff --git a/tutorials/09 - Image Captioning/build_vocab.py b/tutorials/09 - Image Captioning/build_vocab.py
new file mode 100644
index 0000000..612920a
--- /dev/null
+++ b/tutorials/09 - Image Captioning/build_vocab.py	
@@ -0,0 +1,77 @@
+import nltk
+import pickle
+import argparse
+from collections import Counter
+from pycocotools.coco import COCO
+
+
+class Vocabulary(object):
+    """Simple vocabulary wrapper."""
+    def __init__(self):
+        self.word2idx = {}
+        self.idx2word = {}
+        self.idx = 0
+
+    def add_word(self, word):
+        if not word in self.word2idx:
+            self.word2idx[word] = self.idx
+            self.idx2word[self.idx] = word
+            self.idx += 1
+
+    def __call__(self, word):
+        if not word in self.word2idx:
+            return self.word2idx['<unk>']
+        return self.word2idx[word]
+
+    def __len__(self):
+        return len(self.word2idx)
+
+def build_vocab(json, threshold):
+    """Build a simple vocabulary wrapper."""
+    coco = COCO(json)
+    counter = Counter()
+    ids = coco.anns.keys()
+    for i, id in enumerate(ids):
+        caption = str(coco.anns[id]['caption'])
+        tokens = nltk.tokenize.word_tokenize(caption.lower())
+        counter.update(tokens)
+
+        if i % 1000 == 0:
+            print("[%d/%d] Tokenized the captions." %(i, len(ids)))
+
+    # If the word frequency is less than 'threshold', then the word is discarded.
+    words = [word for word, cnt in counter.items() if cnt >= threshold]
+
+    # Creates a vocab wrapper and add some special tokens.
+    vocab = Vocabulary()
+    vocab.add_word('<pad>')
+    vocab.add_word('<start>')
+    vocab.add_word('<end>')
+    vocab.add_word('<unk>')
+
+    # Adds the words to the vocabulary.
+    for i, word in enumerate(words):
+        vocab.add_word(word)
+    return vocab
+
+def main(args):
+    vocab = build_vocab(json=args.caption_path,
+                        threshold=args.threshold)
+    vocab_path = args.vocab_path
+    with open(vocab_path, 'wb') as f:
+        pickle.dump(vocab, f, pickle.HIGHEST_PROTOCOL)
+    print("Total vocabulary size: %d" %len(vocab))
+    print("Saved the vocabulary wrapper to '%s'" %vocab_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--caption_path', type=str, 
+                        default='./data/annotations/captions_train2014.json', 
+                        help='path for train annotation file')
+    parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl', 
+                        help='path for saving vocabulary wrapper')
+    parser.add_argument('--threshold', type=int, default=4, 
+                        help='minimum word count threshold')
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file
diff --git a/tutorials/09 - Image Captioning/resize.py b/tutorials/09 - Image Captioning/resize.py
new file mode 100644
index 0000000..783a824
--- /dev/null
+++ b/tutorials/09 - Image Captioning/resize.py	
@@ -0,0 +1,44 @@
+import argparse
+import os
+from PIL import Image
+
+
+def resize_image(image, size):
+    """Resize an image to the given size."""
+    return image.resize(size, Image.ANTIALIAS)
+
+def resize_images(image_dir, output_dir, size):
+    """Resize the images in 'image_dir' and save into 'output_dir'."""
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    images = os.listdir(image_dir)
+    num_images = len(images)
+    for i, image in enumerate(images):
+        with open(os.path.join(image_dir, image), 'r+b') as f:
+            with Image.open(f) as img:
+                img = resize_image(img, size)
+                img.save(os.path.join(output_dir, image), img.format)
+        if i % 100 == 0:
+            print ("[%d/%d] Resized the images and saved into '%s'."
+                   %(i, num_images, output_dir))
+
+def main(args):
+    splits = ['train', 'val']
+    for split in splits:
+        image_dir = args.image_dir
+        output_dir = args.output_dir
+        image_size = [args.image_size, args.image_size]
+        resize_images(image_dir, output_dir, image_size)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_dir', type=str, default='./data/train2014/',
+                        help='directory for train images')
+    parser.add_argument('--output_dir', type=str, default='./data/resized2014/',
+                        help='directory for saving resized images')
+    parser.add_argument('--image_size', type=int, default=256,
+                        help='size for image after processing')
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file