This commit is contained in:
Varuna Jayasiri
2021-08-19 15:21:18 +05:30
parent 1163d64c12
commit f1fe7087f1
156 changed files with 34253 additions and 33602 deletions

View File

@ -64,286 +64,289 @@
</div>
<div class='section' id='section-0'>
<div class='docs doc-strings'>
<div class='section-link'>
<a href='#section-0'>#</a>
</div>
<h1>Train a <a href="index.html">Vision Transformer (ViT)</a> on CIFAR 10</h1>
<p><a href="https://app.labml.ai/run/8b531d9ce3dc11eb84fc87df6756eb8f"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
<div class='section-link'>
<a href='#section-0'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">13</span><span></span><span class="kn">from</span> <span class="nn">labml</span> <span class="kn">import</span> <span class="n">experiment</span>
<h1>Train a <a href="index.html">Vision Transformer (ViT)</a> on CIFAR 10</h1>
<p><a href="https://app.labml.ai/run/8b531d9ce3dc11eb84fc87df6756eb8f"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">13</span><span></span><span class="kn">from</span> <span class="nn">labml</span> <span class="kn">import</span> <span class="n">experiment</span>
<span class="lineno">14</span><span class="kn">from</span> <span class="nn">labml.configs</span> <span class="kn">import</span> <span class="n">option</span>
<span class="lineno">15</span><span class="kn">from</span> <span class="nn">labml_nn.experiments.cifar10</span> <span class="kn">import</span> <span class="n">CIFAR10Configs</span>
<span class="lineno">16</span><span class="kn">from</span> <span class="nn">labml_nn.transformers</span> <span class="kn">import</span> <span class="n">TransformerConfigs</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-1'>
<div class='docs doc-strings'>
<div class='section-link'>
<a href='#section-1'>#</a>
</div>
<h2>Configurations</h2>
<div class='section-link'>
<a href='#section-1'>#</a>
</div>
<h2>Configurations</h2>
<p>We use <a href="../../experiments/cifar10.html"><code>CIFAR10Configs</code></a> which defines all the
dataset related configurations, optimizer, and a training loop.</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">19</span><span class="k">class</span> <span class="nc">Configs</span><span class="p">(</span><span class="n">CIFAR10Configs</span><span class="p">):</span></pre></div>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">19</span><span class="k">class</span> <span class="nc">Configs</span><span class="p">(</span><span class="n">CIFAR10Configs</span><span class="p">):</span></pre></div>
</div>
</div>
<div class='section' id='section-2'>
<div class='docs'>
<div class='section-link'>
<a href='#section-2'>#</a>
</div>
<p><a href="../configs.html#TransformerConfigs">Transformer configurations</a>
<div class='docs'>
<div class='section-link'>
<a href='#section-2'>#</a>
</div>
<p><a href="../configs.html#TransformerConfigs">Transformer configurations</a>
to get <a href="../models.html#TransformerLayer">transformer layer</a></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">29</span> <span class="n">transformer</span><span class="p">:</span> <span class="n">TransformerConfigs</span></pre></div>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">29</span> <span class="n">transformer</span><span class="p">:</span> <span class="n">TransformerConfigs</span></pre></div>
</div>
</div>
<div class='section' id='section-3'>
<div class='docs'>
<div class='section-link'>
<a href='#section-3'>#</a>
</div>
<p>Size of a patch</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">32</span> <span class="n">patch_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-3'>#</a>
</div>
<p>Size of a patch</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">32</span> <span class="n">patch_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4</span></pre></div>
</div>
</div>
<div class='section' id='section-4'>
<div class='docs'>
<div class='section-link'>
<a href='#section-4'>#</a>
</div>
<p>Size of the hidden layer in classification head</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">34</span> <span class="n">n_hidden_classification</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2048</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-4'>#</a>
</div>
<p>Size of the hidden layer in classification head</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">34</span> <span class="n">n_hidden_classification</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2048</span></pre></div>
</div>
</div>
<div class='section' id='section-5'>
<div class='docs'>
<div class='section-link'>
<a href='#section-5'>#</a>
</div>
<p>Number of classes in the task</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">36</span> <span class="n">n_classes</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-5'>#</a>
</div>
<p>Number of classes in the task</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">36</span> <span class="n">n_classes</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span></pre></div>
</div>
</div>
<div class='section' id='section-6'>
<div class='docs doc-strings'>
<div class='section-link'>
<a href='#section-6'>#</a>
</div>
<p>Create transformer configs</p>
<div class='section-link'>
<a href='#section-6'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">39</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">transformer</span><span class="p">)</span>
<p>Create transformer configs</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">39</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">transformer</span><span class="p">)</span>
<span class="lineno">40</span><span class="k">def</span> <span class="nf">_transformer</span><span class="p">():</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-7'>
<div class='docs'>
<div class='section-link'>
<a href='#section-7'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">44</span> <span class="k">return</span> <span class="n">TransformerConfigs</span><span class="p">()</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-7'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">44</span> <span class="k">return</span> <span class="n">TransformerConfigs</span><span class="p">()</span></pre></div>
</div>
</div>
<div class='section' id='section-8'>
<div class='docs doc-strings'>
<div class='section-link'>
<a href='#section-8'>#</a>
</div>
<h3>Create model</h3>
<div class='section-link'>
<a href='#section-8'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">47</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
<h3>Create model</h3>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">47</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
<span class="lineno">48</span><span class="k">def</span> <span class="nf">_vit</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">Configs</span><span class="p">):</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-9'>
<div class='docs'>
<div class='section-link'>
<a href='#section-9'>#</a>
</div>
<div class='docs'>
<div class='section-link'>
<a href='#section-9'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">52</span> <span class="kn">from</span> <span class="nn">labml_nn.transformers.vit</span> <span class="kn">import</span> <span class="n">VisionTransformer</span><span class="p">,</span> <span class="n">LearnedPositionalEmbeddings</span><span class="p">,</span> <span class="n">ClassificationHead</span><span class="p">,</span> \
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">52</span> <span class="kn">from</span> <span class="nn">labml_nn.transformers.vit</span> <span class="kn">import</span> <span class="n">VisionTransformer</span><span class="p">,</span> <span class="n">LearnedPositionalEmbeddings</span><span class="p">,</span> <span class="n">ClassificationHead</span><span class="p">,</span> \
<span class="lineno">53</span> <span class="n">PatchEmbeddings</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-10'>
<div class='docs'>
<div class='section-link'>
<a href='#section-10'>#</a>
</div>
<p>Transformer size from <a href="../configs.html#TransformerConfigs">Transformer configurations</a></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">56</span> <span class="n">d_model</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">d_model</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-10'>#</a>
</div>
<p>Transformer size from <a href="../configs.html#TransformerConfigs">Transformer configurations</a></p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">56</span> <span class="n">d_model</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">d_model</span></pre></div>
</div>
</div>
<div class='section' id='section-11'>
<div class='docs'>
<div class='section-link'>
<a href='#section-11'>#</a>
</div>
<p>Create a vision transformer</p>
<div class='docs'>
<div class='section-link'>
<a href='#section-11'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">58</span> <span class="k">return</span> <span class="n">VisionTransformer</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">encoder_layer</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">n_layers</span><span class="p">,</span>
<p>Create a vision transformer</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">58</span> <span class="k">return</span> <span class="n">VisionTransformer</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">encoder_layer</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">n_layers</span><span class="p">,</span>
<span class="lineno">59</span> <span class="n">PatchEmbeddings</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">patch_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span>
<span class="lineno">60</span> <span class="n">LearnedPositionalEmbeddings</span><span class="p">(</span><span class="n">d_model</span><span class="p">),</span>
<span class="lineno">61</span> <span class="n">ClassificationHead</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">n_hidden_classification</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">n_classes</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">device</span><span class="p">)</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-12'>
<div class='docs'>
<div class='section-link'>
<a href='#section-12'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">64</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-12'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">64</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span></pre></div>
</div>
</div>
<div class='section' id='section-13'>
<div class='docs'>
<div class='section-link'>
<a href='#section-13'>#</a>
</div>
<p>Create experiment</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">66</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;ViT&#39;</span><span class="p">,</span> <span class="n">comment</span><span class="o">=</span><span class="s1">&#39;cifar10&#39;</span><span class="p">)</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-13'>#</a>
</div>
<p>Create experiment</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">66</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;ViT&#39;</span><span class="p">,</span> <span class="n">comment</span><span class="o">=</span><span class="s1">&#39;cifar10&#39;</span><span class="p">)</span></pre></div>
</div>
</div>
<div class='section' id='section-14'>
<div class='docs'>
<div class='section-link'>
<a href='#section-14'>#</a>
</div>
<p>Create configurations</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">68</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">Configs</span><span class="p">()</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-14'>#</a>
</div>
<p>Create configurations</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">68</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">Configs</span><span class="p">()</span></pre></div>
</div>
</div>
<div class='section' id='section-15'>
<div class='docs'>
<div class='section-link'>
<a href='#section-15'>#</a>
</div>
<p>Load configurations</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">70</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-15'>#</a>
</div>
<p>Load configurations</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">70</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span></pre></div>
</div>
</div>
<div class='section' id='section-16'>
<div class='docs'>
<div class='section-link'>
<a href='#section-16'>#</a>
</div>
<p>Optimizer</p>
<div class='docs'>
<div class='section-link'>
<a href='#section-16'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">72</span> <span class="s1">&#39;optimizer.optimizer&#39;</span><span class="p">:</span> <span class="s1">&#39;Adam&#39;</span><span class="p">,</span>
<p>Optimizer</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">72</span> <span class="s1">&#39;optimizer.optimizer&#39;</span><span class="p">:</span> <span class="s1">&#39;Adam&#39;</span><span class="p">,</span>
<span class="lineno">73</span> <span class="s1">&#39;optimizer.learning_rate&#39;</span><span class="p">:</span> <span class="mf">2.5e-4</span><span class="p">,</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-17'>
<div class='docs'>
<div class='section-link'>
<a href='#section-17'>#</a>
</div>
<p>Transformer embedding size</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">76</span> <span class="s1">&#39;transformer.d_model&#39;</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-17'>#</a>
</div>
<p>Transformer embedding size</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">76</span> <span class="s1">&#39;transformer.d_model&#39;</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span></pre></div>
</div>
</div>
<div class='section' id='section-18'>
<div class='docs'>
<div class='section-link'>
<a href='#section-18'>#</a>
</div>
<p>Training epochs and batch size</p>
<div class='docs'>
<div class='section-link'>
<a href='#section-18'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">79</span> <span class="s1">&#39;epochs&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">,</span>
<p>Training epochs and batch size</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">79</span> <span class="s1">&#39;epochs&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">,</span>
<span class="lineno">80</span> <span class="s1">&#39;train_batch_size&#39;</span><span class="p">:</span> <span class="mi">64</span><span class="p">,</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-19'>
<div class='docs'>
<div class='section-link'>
<a href='#section-19'>#</a>
</div>
<p>Augment CIFAR 10 images for training</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">83</span> <span class="s1">&#39;train_dataset&#39;</span><span class="p">:</span> <span class="s1">&#39;cifar10_train_augmented&#39;</span><span class="p">,</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-19'>#</a>
</div>
<p>Augment CIFAR 10 images for training</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">83</span> <span class="s1">&#39;train_dataset&#39;</span><span class="p">:</span> <span class="s1">&#39;cifar10_train_augmented&#39;</span><span class="p">,</span></pre></div>
</div>
</div>
<div class='section' id='section-20'>
<div class='docs'>
<div class='section-link'>
<a href='#section-20'>#</a>
</div>
<p>Do not augment CIFAR 10 images for validation</p>
<div class='docs'>
<div class='section-link'>
<a href='#section-20'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">85</span> <span class="s1">&#39;valid_dataset&#39;</span><span class="p">:</span> <span class="s1">&#39;cifar10_valid_no_augment&#39;</span><span class="p">,</span>
<p>Do not augment CIFAR 10 images for validation</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">85</span> <span class="s1">&#39;valid_dataset&#39;</span><span class="p">:</span> <span class="s1">&#39;cifar10_valid_no_augment&#39;</span><span class="p">,</span>
<span class="lineno">86</span> <span class="p">})</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-21'>
<div class='docs'>
<div class='section-link'>
<a href='#section-21'>#</a>
</div>
<p>Set model for saving/loading</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">88</span> <span class="n">experiment</span><span class="o">.</span><span class="n">add_pytorch_models</span><span class="p">({</span><span class="s1">&#39;model&#39;</span><span class="p">:</span> <span class="n">conf</span><span class="o">.</span><span class="n">model</span><span class="p">})</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-21'>#</a>
</div>
<p>Set model for saving/loading</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">88</span> <span class="n">experiment</span><span class="o">.</span><span class="n">add_pytorch_models</span><span class="p">({</span><span class="s1">&#39;model&#39;</span><span class="p">:</span> <span class="n">conf</span><span class="o">.</span><span class="n">model</span><span class="p">})</span></pre></div>
</div>
</div>
<div class='section' id='section-22'>
<div class='docs'>
<div class='section-link'>
<a href='#section-22'>#</a>
</div>
<p>Start the experiment and run the training loop</p>
<div class='docs'>
<div class='section-link'>
<a href='#section-22'>#</a>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">90</span> <span class="k">with</span> <span class="n">experiment</span><span class="o">.</span><span class="n">start</span><span class="p">():</span>
<p>Start the experiment and run the training loop</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">90</span> <span class="k">with</span> <span class="n">experiment</span><span class="o">.</span><span class="n">start</span><span class="p">():</span>
<span class="lineno">91</span> <span class="n">conf</span><span class="o">.</span><span class="n">run</span><span class="p">()</span></pre></div>
</div>
</div>
</div>
<div class='section' id='section-23'>
<div class='docs'>
<div class='section-link'>
<a href='#section-23'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">95</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="lineno">96</span> <span class="n">main</span><span class="p">()</span></pre></div>
<div class='docs'>
<div class='section-link'>
<a href='#section-23'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="lineno">95</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="lineno">96</span> <span class="n">main</span><span class="p">()</span></pre></div>
</div>
</div>
<div class='footer'>
<a href="https://papers.labml.ai">Trending Research Papers</a>
<a href="https://labml.ai">labml.ai</a>
</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/MathJax.js?config=TeX-AMS_HTML">
@ -362,6 +365,7 @@ to get <a href="../models.html#TransformerLayer">transformer layer</a></p>
displayAlign: 'center',
"HTML-CSS": { fonts: ["TeX"] }
});
</script>
<script>
function handleImages() {