mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-26 16:50:39 +08:00
cfr loop
This commit is contained in:
@ -1374,7 +1374,7 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
<p>Loop for <code>epochs</code> times</p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">683</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">monit</span><span class="o">.</span><span class="n">loop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">epochs</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">683</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">monit</span><span class="o">.</span><span class="n">iterate</span><span class="p">(</span><span class="s1">'Train'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">epochs</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-82'>
|
||||
@ -1397,8 +1397,9 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
<p>Track data for analytics</p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">689</span> <span class="bp">self</span><span class="o">.</span><span class="n">tracker</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">info_sets</span><span class="p">)</span>
|
||||
<span class="lineno">690</span> <span class="n">tracker</span><span class="o">.</span><span class="n">save</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">689</span> <span class="n">tracker</span><span class="o">.</span><span class="n">add_global_step</span><span class="p">()</span>
|
||||
<span class="lineno">690</span> <span class="bp">self</span><span class="o">.</span><span class="n">tracker</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">info_sets</span><span class="p">)</span>
|
||||
<span class="lineno">691</span> <span class="n">tracker</span><span class="o">.</span><span class="n">save</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-84'>
|
||||
@ -1409,9 +1410,8 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
<p>Save checkpoints every $1,000$ iterations</p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">693</span> <span class="k">if</span> <span class="p">(</span><span class="n">t</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">%</span> <span class="mi">1_000</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="lineno">694</span> <span class="n">experiment</span><span class="o">.</span><span class="n">save_checkpoint</span><span class="p">()</span>
|
||||
<span class="lineno">695</span> <span class="n">tracker</span><span class="o">.</span><span class="n">new_line</span><span class="p">()</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">694</span> <span class="k">if</span> <span class="p">(</span><span class="n">t</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">%</span> <span class="mi">1_000</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="lineno">695</span> <span class="n">experiment</span><span class="o">.</span><span class="n">save_checkpoint</span><span class="p">()</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-85'>
|
||||
@ -1480,15 +1480,14 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">719</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s2">"Track"</span><span class="p">):</span>
|
||||
<span class="lineno">720</span> <span class="k">for</span> <span class="n">I</span> <span class="ow">in</span> <span class="n">info_sets</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
|
||||
<span class="lineno">721</span> <span class="n">avg_strategy</span> <span class="o">=</span> <span class="n">I</span><span class="o">.</span><span class="n">get_average_strategy</span><span class="p">()</span>
|
||||
<span class="lineno">722</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">I</span><span class="o">.</span><span class="n">actions</span><span class="p">():</span>
|
||||
<span class="lineno">723</span> <span class="n">tracker</span><span class="o">.</span><span class="n">add</span><span class="p">({</span>
|
||||
<span class="lineno">724</span> <span class="sa">f</span><span class="s1">'strategy.</span><span class="si">{</span><span class="n">I</span><span class="o">.</span><span class="n">key</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s1">'</span><span class="p">:</span> <span class="n">I</span><span class="o">.</span><span class="n">strategy</span><span class="p">[</span><span class="n">a</span><span class="p">],</span>
|
||||
<span class="lineno">725</span> <span class="sa">f</span><span class="s1">'average_strategy.</span><span class="si">{</span><span class="n">I</span><span class="o">.</span><span class="n">key</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s1">'</span><span class="p">:</span> <span class="n">avg_strategy</span><span class="p">[</span><span class="n">a</span><span class="p">],</span>
|
||||
<span class="lineno">726</span> <span class="sa">f</span><span class="s1">'regret.</span><span class="si">{</span><span class="n">I</span><span class="o">.</span><span class="n">key</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s1">'</span><span class="p">:</span> <span class="n">I</span><span class="o">.</span><span class="n">regret</span><span class="p">[</span><span class="n">a</span><span class="p">],</span>
|
||||
<span class="lineno">727</span> <span class="p">})</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">719</span> <span class="k">for</span> <span class="n">I</span> <span class="ow">in</span> <span class="n">info_sets</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
|
||||
<span class="lineno">720</span> <span class="n">avg_strategy</span> <span class="o">=</span> <span class="n">I</span><span class="o">.</span><span class="n">get_average_strategy</span><span class="p">()</span>
|
||||
<span class="lineno">721</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">I</span><span class="o">.</span><span class="n">actions</span><span class="p">():</span>
|
||||
<span class="lineno">722</span> <span class="n">tracker</span><span class="o">.</span><span class="n">add</span><span class="p">({</span>
|
||||
<span class="lineno">723</span> <span class="sa">f</span><span class="s1">'strategy.</span><span class="si">{</span><span class="n">I</span><span class="o">.</span><span class="n">key</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s1">'</span><span class="p">:</span> <span class="n">I</span><span class="o">.</span><span class="n">strategy</span><span class="p">[</span><span class="n">a</span><span class="p">],</span>
|
||||
<span class="lineno">724</span> <span class="sa">f</span><span class="s1">'average_strategy.</span><span class="si">{</span><span class="n">I</span><span class="o">.</span><span class="n">key</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s1">'</span><span class="p">:</span> <span class="n">avg_strategy</span><span class="p">[</span><span class="n">a</span><span class="p">],</span>
|
||||
<span class="lineno">725</span> <span class="sa">f</span><span class="s1">'regret.</span><span class="si">{</span><span class="n">I</span><span class="o">.</span><span class="n">key</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">a</span><span class="si">}</span><span class="s1">'</span><span class="p">:</span> <span class="n">I</span><span class="o">.</span><span class="n">regret</span><span class="p">[</span><span class="n">a</span><span class="p">],</span>
|
||||
<span class="lineno">726</span> <span class="p">})</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-91'>
|
||||
@ -1499,7 +1498,7 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
<h3>Configurable CFR module</h3>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">730</span><span class="k">class</span> <span class="nc">CFRConfigs</span><span class="p">(</span><span class="n">BaseConfigs</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">729</span><span class="k">class</span> <span class="nc">CFRConfigs</span><span class="p">(</span><span class="n">BaseConfigs</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-92'>
|
||||
@ -1510,9 +1509,9 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">734</span> <span class="n">create_new_history</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[],</span> <span class="n">History</span><span class="p">]</span>
|
||||
<span class="lineno">735</span> <span class="n">epochs</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1_00_000</span>
|
||||
<span class="lineno">736</span> <span class="n">cfr</span><span class="p">:</span> <span class="n">CFR</span> <span class="o">=</span> <span class="s1">'simple_cfr'</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">733</span> <span class="n">create_new_history</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[],</span> <span class="n">History</span><span class="p">]</span>
|
||||
<span class="lineno">734</span> <span class="n">epochs</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1_00_000</span>
|
||||
<span class="lineno">735</span> <span class="n">cfr</span><span class="p">:</span> <span class="n">CFR</span> <span class="o">=</span> <span class="s1">'simple_cfr'</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-93'>
|
||||
@ -1523,8 +1522,8 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
<p>Initialize <strong>CFR</strong> algorithm</p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">739</span><span class="nd">@option</span><span class="p">(</span><span class="n">CFRConfigs</span><span class="o">.</span><span class="n">cfr</span><span class="p">)</span>
|
||||
<span class="lineno">740</span><span class="k">def</span> <span class="nf">simple_cfr</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">CFRConfigs</span><span class="p">):</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">738</span><span class="nd">@option</span><span class="p">(</span><span class="n">CFRConfigs</span><span class="o">.</span><span class="n">cfr</span><span class="p">)</span>
|
||||
<span class="lineno">739</span><span class="k">def</span> <span class="nf">simple_cfr</span><span class="p">(</span><span class="n">c</span><span class="p">:</span> <span class="n">CFRConfigs</span><span class="p">):</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-94'>
|
||||
@ -1535,8 +1534,8 @@ T \color{orange}{R^T_i(I, a)} &=
|
||||
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">744</span> <span class="k">return</span> <span class="n">CFR</span><span class="p">(</span><span class="n">create_new_history</span><span class="o">=</span><span class="n">c</span><span class="o">.</span><span class="n">create_new_history</span><span class="p">,</span>
|
||||
<span class="lineno">745</span> <span class="n">epochs</span><span class="o">=</span><span class="n">c</span><span class="o">.</span><span class="n">epochs</span><span class="p">)</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">743</span> <span class="k">return</span> <span class="n">CFR</span><span class="p">(</span><span class="n">create_new_history</span><span class="o">=</span><span class="n">c</span><span class="o">.</span><span class="n">create_new_history</span><span class="p">,</span>
|
||||
<span class="lineno">744</span> <span class="n">epochs</span><span class="o">=</span><span class="n">c</span><span class="o">.</span><span class="n">epochs</span><span class="p">)</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -721,7 +721,7 @@ other destinations such as Tensorboard can be relatively time consuming.
|
||||
SQLite is enough for our analytics.</p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">235</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'kuhn_poker'</span><span class="p">,</span> <span class="n">writers</span><span class="o">=</span><span class="p">{</span><span class="s1">'sqlite'</span><span class="p">,</span> <span class="s1">'screen'</span><span class="p">})</span></pre></div>
|
||||
<div class="highlight"><pre><span class="lineno">235</span> <span class="n">experiment</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">'kuhn_poker'</span><span class="p">,</span> <span class="n">writers</span><span class="o">=</span><span class="p">{</span><span class="s1">'sqlite'</span><span class="p">})</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='section' id='section-55'>
|
||||
|
@ -106,21 +106,21 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/gan/stylegan/index.html</loc>
|
||||
<lastmod>2021-06-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/gan/stylegan/readme.html</loc>
|
||||
<lastmod>2021-06-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/gan/stylegan/experiment.html</loc>
|
||||
<lastmod>2021-06-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
@ -148,7 +148,7 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/gan/index.html</loc>
|
||||
<lastmod>2021-06-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
@ -456,7 +456,7 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/index.html</loc>
|
||||
<lastmod>2021-06-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
@ -477,7 +477,7 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/cfr/infoset_saver.html</loc>
|
||||
<lastmod>2021-06-18T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
@ -505,7 +505,7 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/cfr/analytics.html</loc>
|
||||
<lastmod>2021-06-18T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
@ -1002,7 +1002,7 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/transformers/fnet/index.html</loc>
|
||||
<lastmod>2021-06-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-06-21T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
|
Reference in New Issue
Block a user