mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-26 08:41:23 +08:00
RETRO (#110)
This commit is contained in:
@ -394,8 +394,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">112</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span>
|
||||
<span class="lineno">113</span> <span class="s1">'device.cuda_device'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">112</span> <span class="n">experiment</span><span class="o">.</span><span class="n">configs</span><span class="p">(</span><span class="n">conf</span><span class="p">,</span> <span class="p">{</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-26'>
|
||||
@ -407,7 +406,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">116</span> <span class="s1">'tokenizer'</span><span class="p">:</span> <span class="s1">'character'</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">115</span> <span class="s1">'tokenizer'</span><span class="p">:</span> <span class="s1">'character'</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-27'>
|
||||
@ -419,7 +418,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">118</span> <span class="s1">'prompt_separator'</span><span class="p">:</span> <span class="s1">''</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">117</span> <span class="s1">'prompt_separator'</span><span class="p">:</span> <span class="s1">''</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-28'>
|
||||
@ -431,7 +430,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">120</span> <span class="s1">'prompt'</span><span class="p">:</span> <span class="s1">'It is '</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">119</span> <span class="s1">'prompt'</span><span class="p">:</span> <span class="s1">'It is '</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-29'>
|
||||
@ -443,7 +442,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">122</span> <span class="s1">'text'</span><span class="p">:</span> <span class="s1">'tiny_shakespeare'</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">121</span> <span class="s1">'text'</span><span class="p">:</span> <span class="s1">'tiny_shakespeare'</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-30'>
|
||||
@ -455,7 +454,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">125</span> <span class="s1">'seq_len'</span><span class="p">:</span> <span class="mi">256</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">124</span> <span class="s1">'seq_len'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-31'>
|
||||
@ -463,11 +462,11 @@
|
||||
<div class='section-link'>
|
||||
<a href='#section-31'>#</a>
|
||||
</div>
|
||||
<p>Train for <span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">128</span></span></span></span> epochs </p>
|
||||
<p>Train for 32 epochs </p>
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">127</span> <span class="s1">'epochs'</span><span class="p">:</span> <span class="mi">128</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">126</span> <span class="s1">'epochs'</span><span class="p">:</span> <span class="mi">32</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-32'>
|
||||
@ -479,7 +478,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">129</span> <span class="s1">'batch_size'</span><span class="p">:</span> <span class="mi">32</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">128</span> <span class="s1">'batch_size'</span><span class="p">:</span> <span class="mi">16</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-33'>
|
||||
@ -491,7 +490,7 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">132</span> <span class="s1">'inner_iterations'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">131</span> <span class="s1">'inner_iterations'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-34'>
|
||||
@ -503,8 +502,9 @@
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">135</span> <span class="s1">'d_model'</span><span class="p">:</span> <span class="mi">128</span><span class="p">,</span>
|
||||
<span class="lineno">136</span> <span class="s1">'transformer.ffn.d_ff'</span><span class="p">:</span> <span class="mi">256</span><span class="p">,</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">134</span> <span class="s1">'d_model'</span><span class="p">:</span> <span class="mi">256</span><span class="p">,</span>
|
||||
<span class="lineno">135</span> <span class="s1">'transformer.n_heads'</span><span class="p">:</span> <span class="mi">16</span><span class="p">,</span>
|
||||
<span class="lineno">136</span> <span class="s1">'transformer.ffn.d_ff'</span><span class="p">:</span> <span class="mi">1024</span><span class="p">,</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-35'>
|
||||
|
Reference in New Issue
Block a user