mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-11-01 12:01:45 +08:00
docs
This commit is contained in:
@ -82,10 +82,9 @@
|
||||
<span class="lineno">17</span>
|
||||
<span class="lineno">18</span><span class="kn">from</span> <span class="nn">labml</span> <span class="kn">import</span> <span class="n">experiment</span>
|
||||
<span class="lineno">19</span><span class="kn">from</span> <span class="nn">labml.configs</span> <span class="kn">import</span> <span class="n">option</span>
|
||||
<span class="lineno">20</span><span class="kn">from</span> <span class="nn">labml_helpers.module</span> <span class="kn">import</span> <span class="n">Module</span>
|
||||
<span class="lineno">21</span><span class="kn">from</span> <span class="nn">labml_nn.experiments.nlp_classification</span> <span class="kn">import</span> <span class="n">NLPClassificationConfigs</span>
|
||||
<span class="lineno">22</span><span class="kn">from</span> <span class="nn">labml_nn.transformers</span> <span class="kn">import</span> <span class="n">Encoder</span>
|
||||
<span class="lineno">23</span><span class="kn">from</span> <span class="nn">labml_nn.transformers</span> <span class="kn">import</span> <span class="n">TransformerConfigs</span></pre></div>
|
||||
<span class="lineno">20</span><span class="kn">from</span> <span class="nn">labml_nn.experiments.nlp_classification</span> <span class="kn">import</span> <span class="n">NLPClassificationConfigs</span>
|
||||
<span class="lineno">21</span><span class="kn">from</span> <span class="nn">labml_nn.transformers</span> <span class="kn">import</span> <span class="n">Encoder</span>
|
||||
<span class="lineno">22</span><span class="kn">from</span> <span class="nn">labml_nn.transformers</span> <span class="kn">import</span> <span class="n">TransformerConfigs</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-1'>
|
||||
@ -97,7 +96,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">26</span><span class="k">class</span> <span class="nc">TransformerClassifier</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">25</span><span class="k">class</span> <span class="nc">TransformerClassifier</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-2'>
|
||||
@ -114,7 +113,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">30</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoder</span><span class="p">:</span> <span class="n">Encoder</span><span class="p">,</span> <span class="n">src_embed</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span> <span class="n">generator</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">29</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoder</span><span class="p">:</span> <span class="n">Encoder</span><span class="p">,</span> <span class="n">src_embed</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">generator</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-3'>
|
||||
@ -125,10 +124,10 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">37</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="lineno">38</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_embed</span> <span class="o">=</span> <span class="n">src_embed</span>
|
||||
<span class="lineno">39</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoder</span> <span class="o">=</span> <span class="n">encoder</span>
|
||||
<span class="lineno">40</span> <span class="bp">self</span><span class="o">.</span><span class="n">generator</span> <span class="o">=</span> <span class="n">generator</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">36</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="lineno">37</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_embed</span> <span class="o">=</span> <span class="n">src_embed</span>
|
||||
<span class="lineno">38</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoder</span> <span class="o">=</span> <span class="n">encoder</span>
|
||||
<span class="lineno">39</span> <span class="bp">self</span><span class="o">.</span><span class="n">generator</span> <span class="o">=</span> <span class="n">generator</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-4'>
|
||||
@ -139,7 +138,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">42</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">41</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-5'>
|
||||
@ -151,7 +150,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">44</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_embed</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">43</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_embed</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-6'>
|
||||
@ -163,7 +162,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">46</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoder</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">45</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoder</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-7'>
|
||||
@ -180,7 +179,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">52</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">generator</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">51</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">generator</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-8'>
|
||||
@ -192,7 +191,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">56</span> <span class="k">return</span> <span class="n">x</span><span class="p">,</span> <span class="kc">None</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">55</span> <span class="k">return</span> <span class="n">x</span><span class="p">,</span> <span class="kc">None</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-9'>
|
||||
@ -206,7 +205,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">59</span><span class="k">class</span> <span class="nc">Configs</span><span class="p">(</span><span class="n">NLPClassificationConfigs</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">58</span><span class="k">class</span> <span class="nc">Configs</span><span class="p">(</span><span class="n">NLPClassificationConfigs</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-10'>
|
||||
@ -218,7 +217,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">68</span> <span class="n">model</span><span class="p">:</span> <span class="n">TransformerClassifier</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">67</span> <span class="n">model</span><span class="p">:</span> <span class="n">TransformerClassifier</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-11'>
|
||||
@ -230,7 +229,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">70</span> <span class="n">transformer</span><span class="p">:</span> <span class="n">TransformerConfigs</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">69</span> <span class="n">transformer</span><span class="p">:</span> <span class="n">TransformerConfigs</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-12'>
|
||||
@ -242,8 +241,8 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">73</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">transformer</span><span class="p">)</span>
|
||||
<span class="lineno">74</span><span class="k">def</span> <span class="nf">_transformer_configs</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">Configs</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">72</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">transformer</span><span class="p">)</span>
|
||||
<span class="lineno">73</span><span class="k">def</span> <span class="nf">_transformer_configs</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">Configs</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-13'>
|
||||
@ -255,7 +254,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">81</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">TransformerConfigs</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">80</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">TransformerConfigs</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-14'>
|
||||
@ -267,8 +266,8 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">83</span> <span class="n">conf</span><span class="o">.</span><span class="n">n_src_vocab</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">n_tokens</span>
|
||||
<span class="lineno">84</span> <span class="n">conf</span><span class="o">.</span><span class="n">n_tgt_vocab</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">n_tokens</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">82</span> <span class="n">conf</span><span class="o">.</span><span class="n">n_src_vocab</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">n_tokens</span>
|
||||
<span class="lineno">83</span> <span class="n">conf</span><span class="o">.</span><span class="n">n_tgt_vocab</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">n_tokens</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-15'>
|
||||
@ -280,7 +279,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">87</span> <span class="k">return</span> <span class="n">conf</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">86</span> <span class="k">return</span> <span class="n">conf</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-16'>
|
||||
@ -293,8 +292,8 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">90</span><span class="nd">@option</span><span class="p">(</span><span class="n">TransformerConfigs</span><span class="o">.</span><span class="n">encoder_attn</span><span class="p">)</span>
|
||||
<span class="lineno">91</span><span class="k">def</span> <span class="nf">fnet_mix</span><span class="p">():</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">89</span><span class="nd">@option</span><span class="p">(</span><span class="n">TransformerConfigs</span><span class="o">.</span><span class="n">encoder_attn</span><span class="p">)</span>
|
||||
<span class="lineno">90</span><span class="k">def</span> <span class="nf">fnet_mix</span><span class="p">():</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-17'>
|
||||
@ -305,8 +304,8 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">97</span> <span class="kn">from</span> <span class="nn">labml_nn.transformers.fnet</span> <span class="kn">import</span> <span class="n">FNetMix</span>
|
||||
<span class="lineno">98</span> <span class="k">return</span> <span class="n">FNetMix</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">96</span> <span class="kn">from</span> <span class="nn">labml_nn.transformers.fnet</span> <span class="kn">import</span> <span class="n">FNetMix</span>
|
||||
<span class="lineno">97</span> <span class="k">return</span> <span class="n">FNetMix</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-18'>
|
||||
@ -318,8 +317,8 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">101</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="lineno">102</span><span class="k">def</span> <span class="nf">_model</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">Configs</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">100</span><span class="nd">@option</span><span class="p">(</span><span class="n">Configs</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="lineno">101</span><span class="k">def</span> <span class="nf">_model</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">Configs</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-19'>
|
||||
@ -330,11 +329,11 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">106</span> <span class="n">m</span> <span class="o">=</span> <span class="n">TransformerClassifier</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">encoder</span><span class="p">,</span>
|
||||
<span class="lineno">107</span> <span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">src_embed</span><span class="p">,</span>
|
||||
<span class="lineno">108</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">d_model</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">n_classes</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="lineno">109</span>
|
||||
<span class="lineno">110</span> <span class="k">return</span> <span class="n">m</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">105</span> <span class="n">m</span> <span class="o">=</span> <span class="n">TransformerClassifier</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">encoder</span><span class="p">,</span>
|
||||
<span class="lineno">106</span> <span class="n">c</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="n">src_embed</span><span class="p">,</span>
|
||||
<span class="lineno">107</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">d_model</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">n_classes</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">c</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="lineno">108</span>
|
||||
<span class="lineno">109</span> <span class="k">return</span> <span class="n">m</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-20'>
|
||||
@ -345,7 +344,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">113</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">112</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-21'>
|
||||
@ -357,7 +356,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">115</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">"fnet"</span><span class="p">)</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">114</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">"fnet"</span><span class="p">)</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-22'>
|
||||
@ -369,7 +368,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">117</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">Configs</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">116</span> <span class="n">conf</span> <span class="o">=</span> <span class="n">Configs</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-23'>
|
||||
@ -381,7 +380,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">119</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">118</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-24'>
|
||||
@ -393,7 +392,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">121</span> <span class="s1">'tokenizer'</span><span class="p">:</span> <span class="s1">'basic_english'</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">120</span> <span class="s1">'tokenizer'</span><span class="p">:</span> <span class="s1">'basic_english'</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-25'>
|
||||
@ -405,7 +404,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">124</span> <span class="s1">'epochs'</span><span class="p">:</span> <span class="mi">32</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">123</span> <span class="s1">'epochs'</span><span class="p">:</span> <span class="mi">32</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-26'>
|
||||
@ -417,7 +416,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">127</span> <span class="s1">'inner_iterations'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">126</span> <span class="s1">'inner_iterations'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-27'>
|
||||
@ -429,10 +428,10 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">130</span> <span class="s1">'transformer.d_model'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span>
|
||||
<span class="lineno">131</span> <span class="s1">'transformer.ffn.d_ff'</span><span class="p">:</span> <span class="mi">2048</span><span class="p">,</span>
|
||||
<span class="lineno">132</span> <span class="s1">'transformer.n_heads'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span>
|
||||
<span class="lineno">133</span> <span class="s1">'transformer.n_layers'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">129</span> <span class="s1">'transformer.d_model'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span>
|
||||
<span class="lineno">130</span> <span class="s1">'transformer.ffn.d_ff'</span><span class="p">:</span> <span class="mi">2048</span><span class="p">,</span>
|
||||
<span class="lineno">131</span> <span class="s1">'transformer.n_heads'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span>
|
||||
<span class="lineno">132</span> <span class="s1">'transformer.n_layers'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-28'>
|
||||
@ -444,7 +443,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">137</span> <span class="s1">'transformer.encoder_attn'</span><span class="p">:</span> <span class="s1">'fnet_mix'</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">136</span> <span class="s1">'transformer.encoder_attn'</span><span class="p">:</span> <span class="s1">'fnet_mix'</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-29'>
|
||||
@ -456,9 +455,9 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">140</span> <span class="s1">'optimizer.optimizer'</span><span class="p">:</span> <span class="s1">'Noam'</span><span class="p">,</span>
|
||||
<span class="lineno">141</span> <span class="s1">'optimizer.learning_rate'</span><span class="p">:</span> <span class="mf">1.</span><span class="p">,</span>
|
||||
<span class="lineno">142</span> <span class="p">})</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">139</span> <span class="s1">'optimizer.optimizer'</span><span class="p">:</span> <span class="s1">'Noam'</span><span class="p">,</span>
|
||||
<span class="lineno">140</span> <span class="s1">'optimizer.learning_rate'</span><span class="p">:</span> <span class="mf">1.</span><span class="p">,</span>
|
||||
<span class="lineno">141</span> <span class="p">})</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-30'>
|
||||
@ -470,7 +469,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">145</span> <span class="n">experiment</span><span class="o">.</span><span class="n">add_pytorch_models</span><span class="p">({</span><span class="s1">'model'</span><span class="p">:</span> <span class="n">conf</span><span class="o">.</span><span class="n">model</span><span class="p">})</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">144</span> <span class="n">experiment</span><span class="o">.</span><span class="n">add_pytorch_models</span><span class="p">({</span><span class="s1">'model'</span><span class="p">:</span> <span class="n">conf</span><span class="o">.</span><span class="n">model</span><span class="p">})</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-31'>
|
||||
@ -482,7 +481,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">148</span> <span class="k">with</span> <span class="n">experiment</span><span class="o">.</span><span class="n">start</span><span class="p">():</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">147</span> <span class="k">with</span> <span class="n">experiment</span><span class="o">.</span><span class="n">start</span><span class="p">():</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-32'>
|
||||
@ -494,7 +493,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">150</span> <span class="n">conf</span><span class="o">.</span><span class="n">run</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">149</span> <span class="n">conf</span><span class="o">.</span><span class="n">run</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-33'>
|
||||
@ -506,8 +505,8 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">154</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="lineno">155</span> <span class="n">main</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">153</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="lineno">154</span> <span class="n">main</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='footer'>
|
||||
|
||||
Reference in New Issue
Block a user