mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-06 15:22:21 +08:00
455 lines
23 KiB
HTML
455 lines
23 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta http-equiv="content-type" content="text/html;charset=utf-8"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
|
<meta name="description" content="This is a reusable trainer for MNIST dataset"/>
|
|
|
|
<meta name="twitter:card" content="summary"/>
|
|
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
|
<meta name="twitter:title" content="MNIST Experiment"/>
|
|
<meta name="twitter:description" content="This is a reusable trainer for MNIST dataset"/>
|
|
<meta name="twitter:site" content="@labmlai"/>
|
|
<meta name="twitter:creator" content="@labmlai"/>
|
|
|
|
<meta property="og:url" content="https://nn.labml.ai/experiments/mnist.html"/>
|
|
<meta property="og:title" content="MNIST Experiment"/>
|
|
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
|
<meta property="og:site_name" content="MNIST Experiment"/>
|
|
<meta property="og:type" content="object"/>
|
|
<meta property="og:title" content="MNIST Experiment"/>
|
|
<meta property="og:description" content="This is a reusable trainer for MNIST dataset"/>
|
|
|
|
<title>MNIST Experiment</title>
|
|
<link rel="shortcut icon" href="/icon.png"/>
|
|
<link rel="stylesheet" href="../pylit.css?v=1">
|
|
<link rel="canonical" href="https://nn.labml.ai/experiments/mnist.html"/>
|
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">
|
|
|
|
<!-- Global site tag (gtag.js) - Google Analytics -->
|
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
|
|
<script>
|
|
window.dataLayer = window.dataLayer || [];
|
|
|
|
function gtag() {
|
|
dataLayer.push(arguments);
|
|
}
|
|
|
|
gtag('js', new Date());
|
|
|
|
gtag('config', 'G-4V3HC8HBLH');
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<div id='container'>
|
|
<div id="background"></div>
|
|
<div class='section'>
|
|
<div class='docs'>
|
|
<p>
|
|
<a class="parent" href="/">home</a>
|
|
<a class="parent" href="index.html">experiments</a>
|
|
</p>
|
|
<p>
|
|
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations" target="_blank">
|
|
<img alt="Github"
|
|
src="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social"
|
|
style="max-width:100%;"/></a>
|
|
<a href="https://twitter.com/labmlai" rel="nofollow" target="_blank">
|
|
<img alt="Twitter"
|
|
src="https://img.shields.io/twitter/follow/labmlai?style=social"
|
|
style="max-width:100%;"/></a>
|
|
</p>
|
|
<p>
|
|
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/experiments/mnist.py" target="_blank">
|
|
View code on Github</a>
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-0'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-0'>#</a>
|
|
</div>
|
|
<h1>MNIST Experiment</h1>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">11</span><span></span><span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
|
|
<span class="lineno">12</span><span class="kn">import</span> <span class="nn">torch.utils.data</span>
|
|
<span class="lineno">13</span>
|
|
<span class="lineno">14</span><span class="kn">from</span> <span class="nn">labml</span> <span class="kn">import</span> <span class="n">tracker</span>
|
|
<span class="lineno">15</span><span class="kn">from</span> <span class="nn">labml.configs</span> <span class="kn">import</span> <span class="n">option</span>
|
|
<span class="lineno">16</span><span class="kn">from</span> <span class="nn">labml_nn.helpers.datasets</span> <span class="kn">import</span> <span class="n">MNISTConfigs</span> <span class="k">as</span> <span class="n">MNISTDatasetConfigs</span>
|
|
<span class="lineno">17</span><span class="kn">from</span> <span class="nn">labml_nn.helpers.device</span> <span class="kn">import</span> <span class="n">DeviceConfigs</span>
|
|
<span class="lineno">18</span><span class="kn">from</span> <span class="nn">labml_nn.helpers.metrics</span> <span class="kn">import</span> <span class="n">Accuracy</span>
|
|
<span class="lineno">19</span><span class="kn">from</span> <span class="nn">labml_nn.helpers.trainer</span> <span class="kn">import</span> <span class="n">TrainValidConfigs</span><span class="p">,</span> <span class="n">BatchIndex</span>
|
|
<span class="lineno">20</span><span class="kn">from</span> <span class="nn">labml_nn.optimizers.configs</span> <span class="kn">import</span> <span class="n">OptimizerConfigs</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-1'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-1'>#</a>
|
|
</div>
|
|
<p> <a id="MNISTConfigs"></a></p>
|
|
<h2>Trainer configurations</h2>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">23</span><span class="k">class</span> <span class="nc">MNISTConfigs</span><span class="p">(</span><span class="n">MNISTDatasetConfigs</span><span class="p">,</span> <span class="n">TrainValidConfigs</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-2'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-2'>#</a>
|
|
</div>
|
|
<p>Optimizer </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">31</span> <span class="n">optimizer</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-3'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-3'>#</a>
|
|
</div>
|
|
<p>Training device </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">33</span> <span class="n">device</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="n">DeviceConfigs</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-4'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-4'>#</a>
|
|
</div>
|
|
<p>Classification model </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">36</span> <span class="n">model</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-5'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-5'>#</a>
|
|
</div>
|
|
<p>Number of epochs to train for </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">38</span> <span class="n">epochs</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-6'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-6'>#</a>
|
|
</div>
|
|
<p>Number of times to switch between training and validation within an epoch </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">41</span> <span class="n">inner_iterations</span> <span class="o">=</span> <span class="mi">10</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-7'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-7'>#</a>
|
|
</div>
|
|
<p>Accuracy function </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">44</span> <span class="n">accuracy</span> <span class="o">=</span> <span class="n">Accuracy</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-8'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-8'>#</a>
|
|
</div>
|
|
<p>Loss function </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">46</span> <span class="n">loss_func</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-9'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-9'>#</a>
|
|
</div>
|
|
<h3>Initialization</h3>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">48</span> <span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-10'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-10'>#</a>
|
|
</div>
|
|
<p>Set tracker configurations </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">53</span> <span class="n">tracker</span><span class="o">.</span><span class="n">set_scalar</span><span class="p">(</span><span class="s2">"loss.*"</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
<span class="lineno">54</span> <span class="n">tracker</span><span class="o">.</span><span class="n">set_scalar</span><span class="p">(</span><span class="s2">"accuracy.*"</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-11'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-11'>#</a>
|
|
</div>
|
|
<p>Add accuracy as a state module. The name is probably confusing, since it's meant to store states between training and validation for RNNs. This will keep the accuracy metric stats separate for training and validation. </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">59</span> <span class="bp">self</span><span class="o">.</span><span class="n">state_modules</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">accuracy</span><span class="p">]</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-12'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-12'>#</a>
|
|
</div>
|
|
<h3>Training or validation step</h3>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">61</span> <span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch</span><span class="p">:</span> <span class="nb">any</span><span class="p">,</span> <span class="n">batch_idx</span><span class="p">:</span> <span class="n">BatchIndex</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-13'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-13'>#</a>
|
|
</div>
|
|
<p>Training/Evaluation mode </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">67</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">is_train</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-14'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-14'>#</a>
|
|
</div>
|
|
<p>Move data to the device </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">70</span> <span class="n">data</span><span class="p">,</span> <span class="n">target</span> <span class="o">=</span> <span class="n">batch</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">),</span> <span class="n">batch</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-15'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-15'>#</a>
|
|
</div>
|
|
<p>Update global step (number of samples processed) when in training mode </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">73</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">is_train</span><span class="p">:</span>
|
|
<span class="lineno">74</span> <span class="n">tracker</span><span class="o">.</span><span class="n">add_global_step</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-16'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-16'>#</a>
|
|
</div>
|
|
<p>Get model outputs. </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">77</span> <span class="n">output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">data</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-17'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-17'>#</a>
|
|
</div>
|
|
<p>Calculate and log loss </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">80</span> <span class="n">loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss_func</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span>
|
|
<span class="lineno">81</span> <span class="n">tracker</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s2">"loss."</span><span class="p">,</span> <span class="n">loss</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-18'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-18'>#</a>
|
|
</div>
|
|
<p>Calculate and log accuracy </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">84</span> <span class="bp">self</span><span class="o">.</span><span class="n">accuracy</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span>
|
|
<span class="lineno">85</span> <span class="bp">self</span><span class="o">.</span><span class="n">accuracy</span><span class="o">.</span><span class="n">track</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-19'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-19'>#</a>
|
|
</div>
|
|
<p>Train the model </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">88</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">is_train</span><span class="p">:</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-20'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-20'>#</a>
|
|
</div>
|
|
<p>Calculate gradients </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">90</span> <span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-21'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-21'>#</a>
|
|
</div>
|
|
<p>Take optimizer step </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">92</span> <span class="bp">self</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">step</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-22'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-22'>#</a>
|
|
</div>
|
|
<p>Log the model parameters and gradients on last batch of every epoch </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">94</span> <span class="k">if</span> <span class="n">batch_idx</span><span class="o">.</span><span class="n">is_last</span><span class="p">:</span>
|
|
<span class="lineno">95</span> <span class="n">tracker</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s1">'model'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-23'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-23'>#</a>
|
|
</div>
|
|
<p>Clear the gradients </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">97</span> <span class="bp">self</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-24'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-24'>#</a>
|
|
</div>
|
|
<p>Save the tracked metrics </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">100</span> <span class="n">tracker</span><span class="o">.</span><span class="n">save</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-25'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-25'>#</a>
|
|
</div>
|
|
<h3>Default optimizer configurations</h3>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">103</span><span class="nd">@option</span><span class="p">(</span><span class="n">MNISTConfigs</span><span class="o">.</span><span class="n">optimizer</span><span class="p">)</span>
|
|
<span class="lineno">104</span><span class="k">def</span> <span class="nf">_optimizer</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">MNISTConfigs</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-26'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-26'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">108</span> <span class="n">opt_conf</span> <span class="o">=</span> <span class="n">OptimizerConfigs</span><span class="p">()</span>
|
|
<span class="lineno">109</span> <span class="n">opt_conf</span><span class="o">.</span><span class="n">parameters</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span>
|
|
<span class="lineno">110</span> <span class="n">opt_conf</span><span class="o">.</span><span class="n">optimizer</span> <span class="o">=</span> <span class="s1">'Adam'</span>
|
|
<span class="lineno">111</span> <span class="k">return</span> <span class="n">opt_conf</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='footer'>
|
|
<a href="https://labml.ai">labml.ai</a>
|
|
</div>
|
|
</div>
|
|
<script src=../interactive.js?v=1"></script>
|
|
<script>
|
|
function handleImages() {
|
|
var images = document.querySelectorAll('p>img')
|
|
|
|
for (var i = 0; i < images.length; ++i) {
|
|
handleImage(images[i])
|
|
}
|
|
}
|
|
|
|
function handleImage(img) {
|
|
img.parentElement.style.textAlign = 'center'
|
|
|
|
var modal = document.createElement('div')
|
|
modal.id = 'modal'
|
|
|
|
var modalContent = document.createElement('div')
|
|
modal.appendChild(modalContent)
|
|
|
|
var modalImage = document.createElement('img')
|
|
modalContent.appendChild(modalImage)
|
|
|
|
var span = document.createElement('span')
|
|
span.classList.add('close')
|
|
span.textContent = 'x'
|
|
modal.appendChild(span)
|
|
|
|
img.onclick = function () {
|
|
console.log('clicked')
|
|
document.body.appendChild(modal)
|
|
modalImage.src = img.src
|
|
}
|
|
|
|
span.onclick = function () {
|
|
document.body.removeChild(modal)
|
|
}
|
|
}
|
|
|
|
handleImages()
|
|
</script>
|
|
</body>
|
|
</html> |