mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-28 12:45:07 +08:00
unescape *
This commit is contained in:
@ -24,6 +24,8 @@
|
||||
<link rel="shortcut icon" href="/icon.png"/>
|
||||
<link rel="stylesheet" href="../pylit.css">
|
||||
<link rel="canonical" href="https://nn.labml.ai/distillation/small.html"/>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">
|
||||
|
||||
<!-- Global site tag (gtag.js) - Google Analytics -->
|
||||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
|
||||
<script>
|
||||
@ -68,7 +70,8 @@
|
||||
</div>
|
||||
<h1>Train a small model on CIFAR 10</h1>
|
||||
<p>This trains a small model on CIFAR 10 to test how much <a href="index.html">distillation</a> benefits.</p>
|
||||
<p><a href="https://app.labml.ai/run/3b8fda8edaef11eb951eebc4a6e2bfac"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
<p><a href="https://app.labml.ai/run/3b8fda8edaef11eb951eebc4a6e2bfac"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen"></a></p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">15</span><span></span><span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
|
||||
@ -85,8 +88,9 @@
|
||||
<a href='#section-1'>#</a>
|
||||
</div>
|
||||
<h2>Configurations</h2>
|
||||
<p>We use <a href="../experiments/cifar10.html"><code>CIFAR10Configs</code></a> which defines all the
|
||||
dataset related configurations, optimizer, and a training loop.</p>
|
||||
<p>We use <a href="../experiments/cifar10.html"><code>CIFAR10Configs</code>
|
||||
</a> which defines all the dataset related configurations, optimizer, and a training loop.</p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">23</span><span class="k">class</span> <span class="nc">Configs</span><span class="p">(</span><span class="n">CIFAR10Configs</span><span class="p">):</span></pre></div>
|
||||
@ -110,6 +114,7 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
</div>
|
||||
<h3>VGG style model for CIFAR-10 classification</h3>
|
||||
<p>This derives from the <a href="../experiments/cifar10.html">generic VGG style architecture</a>.</p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">33</span><span class="k">class</span> <span class="nc">SmallModel</span><span class="p">(</span><span class="n">CIFAR10VGGModel</span><span class="p">):</span></pre></div>
|
||||
@ -120,7 +125,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-4'>#</a>
|
||||
</div>
|
||||
<p>Create a convolution layer and the activations</p>
|
||||
<p> Create a convolution layer and the activations</p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">40</span> <span class="k">def</span> <span class="nf">conv_block</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">in_channels</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">)</span> <span class="o">-></span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">:</span></pre></div>
|
||||
@ -142,7 +148,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-6'>#</a>
|
||||
</div>
|
||||
<p>Convolution layer</p>
|
||||
<p>Convolution layer </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">46</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span></pre></div>
|
||||
@ -153,7 +160,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-7'>#</a>
|
||||
</div>
|
||||
<p>Batch normalization</p>
|
||||
<p>Batch normalization </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">48</span> <span class="n">BatchNorm</span><span class="p">(</span><span class="n">out_channels</span><span class="p">,</span> <span class="n">track_running_stats</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span></pre></div>
|
||||
@ -164,7 +172,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-8'>#</a>
|
||||
</div>
|
||||
<p>ReLU activation</p>
|
||||
<p>ReLU activation </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">50</span> <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(</span><span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span>
|
||||
@ -187,7 +196,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-10'>#</a>
|
||||
</div>
|
||||
<p>Create a model with given convolution sizes (channels)</p>
|
||||
<p>Create a model with given convolution sizes (channels) </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">55</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">([[</span><span class="mi">32</span><span class="p">,</span> <span class="mi">32</span><span class="p">],</span> <span class="p">[</span><span class="mi">64</span><span class="p">,</span> <span class="mi">64</span><span class="p">],</span> <span class="p">[</span><span class="mi">128</span><span class="p">],</span> <span class="p">[</span><span class="mi">128</span><span class="p">],</span> <span class="p">[</span><span class="mi">128</span><span class="p">]])</span></pre></div>
|
||||
@ -199,6 +209,7 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<a href='#section-11'>#</a>
|
||||
</div>
|
||||
<h3>Create model</h3>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">58</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
||||
@ -232,7 +243,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-14'>#</a>
|
||||
</div>
|
||||
<p>Create experiment</p>
|
||||
<p>Create experiment </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">68</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'cifar10'</span><span class="p">,</span> <span class="n">comment</span><span class="o">=</span><span class="s1">'small model'</span><span class="p">)</span></pre></div>
|
||||
@ -243,7 +255,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-15'>#</a>
|
||||
</div>
|
||||
<p>Create configurations</p>
|
||||
<p>Create configurations </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">70</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">Configs</span><span class="p">()</span></pre></div>
|
||||
@ -254,7 +267,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-16'>#</a>
|
||||
</div>
|
||||
<p>Load configurations</p>
|
||||
<p>Load configurations </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">72</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span>
|
||||
@ -269,7 +283,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-17'>#</a>
|
||||
</div>
|
||||
<p>Set model for saving/loading</p>
|
||||
<p>Set model for saving/loading </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">78</span> <span class="n">experiment</span><span class="o">.</span><span class="n">add_pytorch_models</span><span class="p">({</span><span class="s1">'model'</span><span class="p">:</span> <span class="n">conf</span><span class="o">.</span><span class="n">model</span><span class="p">})</span></pre></div>
|
||||
@ -280,7 +295,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-18'>#</a>
|
||||
</div>
|
||||
<p>Print number of parameters in the model</p>
|
||||
<p>Print number of parameters in the model </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">80</span> <span class="n">logger</span><span class="o">.</span><span class="n">inspect</span><span class="p">(</span><span class="n">params</span><span class="o">=</span><span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">numel</span><span class="p">()</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">conf</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span> <span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">requires_grad</span><span class="p">)))</span></pre></div>
|
||||
@ -291,7 +307,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-19'>#</a>
|
||||
</div>
|
||||
<p>Start the experiment and run the training loop</p>
|
||||
<p>Start the experiment and run the training loop </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">82</span> <span class="k">with</span> <span class="n">experiment</span><span class="o">.</span><span class="n">start</span><span class="p">():</span>
|
||||
@ -303,7 +320,8 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<div class='section-link'>
|
||||
<a href='#section-20'>#</a>
|
||||
</div>
|
||||
|
||||
<p> </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">87</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
@ -315,24 +333,6 @@ dataset related configurations, optimizer, and a training loop.</p>
|
||||
<a href="https://labml.ai">labml.ai</a>
|
||||
</div>
|
||||
</div>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/MathJax.js?config=TeX-AMS_HTML">
|
||||
</script>
|
||||
<!-- MathJax configuration -->
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
tex2jax: {
|
||||
inlineMath: [ ['$','$'] ],
|
||||
displayMath: [ ['$$','$$'] ],
|
||||
processEscapes: true,
|
||||
processEnvironments: true
|
||||
},
|
||||
// Center justify equations in code and markdown cells. Elsewhere
|
||||
// we use CSS to left justify single line equations in code cells.
|
||||
displayAlign: 'center',
|
||||
"HTML-CSS": { fonts: ["TeX"] }
|
||||
});
|
||||
|
||||
</script>
|
||||
<script>
|
||||
function handleImages() {
|
||||
var images = document.querySelectorAll('p>img')
|
||||
|
||||
Reference in New Issue
Block a user