mirror of
				https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
				synced 2025-11-01 03:43:09 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			340 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			340 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | |
| <html>
 | |
| <head>
 | |
|     <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
 | |
|     <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
 | |
|     <meta name="description" content="Implementation of neural network model for Deep Q Network (DQN)."/>
 | |
| 
 | |
|     <meta name="twitter:card" content="summary"/>
 | |
|     <meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
 | |
|     <meta name="twitter:title" content="Deep Q Network (DQN) Model"/>
 | |
|     <meta name="twitter:description" content="Implementation of neural network model for Deep Q Network (DQN)."/>
 | |
|     <meta name="twitter:site" content="@labmlai"/>
 | |
|     <meta name="twitter:creator" content="@labmlai"/>
 | |
| 
 | |
|     <meta property="og:url" content="https://nn.labml.ai/rl/dqn/model.html"/>
 | |
|     <meta property="og:title" content="Deep Q Network (DQN) Model"/>
 | |
|     <meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
 | |
|     <meta property="og:site_name" content="LabML Neural Networks"/>
 | |
|     <meta property="og:type" content="object"/>
 | |
|     <meta property="og:title" content="Deep Q Network (DQN) Model"/>
 | |
|     <meta property="og:description" content="Implementation of neural network model for Deep Q Network (DQN)."/>
 | |
| 
 | |
|     <title>Deep Q Network (DQN) Model</title>
 | |
|     <link rel="shortcut icon" href="/icon.png"/>
 | |
|     <link rel="stylesheet" href="../../pylit.css">
 | |
|     <link rel="canonical" href="https://nn.labml.ai/rl/dqn/model.html"/>
 | |
|     <!-- Global site tag (gtag.js) - Google Analytics -->
 | |
|     <script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
 | |
|     <script>
 | |
|         window.dataLayer = window.dataLayer || [];
 | |
| 
 | |
|         function gtag() {
 | |
|             dataLayer.push(arguments);
 | |
|         }
 | |
| 
 | |
|         gtag('js', new Date());
 | |
| 
 | |
|         gtag('config', 'G-4V3HC8HBLH');
 | |
|     </script>
 | |
| </head>
 | |
| <body>
 | |
| <div id='container'>
 | |
|     <div id="background"></div>
 | |
|     <div class='section'>
 | |
|         <div class='docs'>
 | |
|             <p>
 | |
|                 <a class="parent" href="/">home</a>
 | |
|                 <a class="parent" href="../index.html">rl</a>
 | |
|                 <a class="parent" href="index.html">dqn</a>
 | |
|             </p>
 | |
|             <p>
 | |
| 
 | |
|                 <a href="https://github.com/lab-ml/labml_nn/tree/master/labml_nn/rl/dqn/model.py">
 | |
|                     <img alt="Github"
 | |
|                          src="https://img.shields.io/github/stars/lab-ml/nn?style=social"
 | |
|                          style="max-width:100%;"/></a>
 | |
|                 <a href="https://join.slack.com/t/labforml/shared_invite/zt-egj9zvq9-Dl3hhZqobexgT7aVKnD14g/"
 | |
|                    rel="nofollow">
 | |
|                     <img alt="Join Slact"
 | |
|                          src="https://img.shields.io/badge/slack-chat-green.svg?logo=slack"
 | |
|                          style="max-width:100%;"/></a>
 | |
|                 <a href="https://twitter.com/labmlai"
 | |
|                    rel="nofollow">
 | |
|                     <img alt="Twitter"
 | |
|                          src="https://img.shields.io/twitter/follow/labmlai?style=social"
 | |
|                          style="max-width:100%;"/></a>
 | |
|             </p>
 | |
|         </div>
 | |
|     </div>
 | |
|     <div class='section' id='section-0'>
 | |
|         <div class='docs doc-strings'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-0'>#</a>
 | |
|                 </div>
 | |
|                 <h1>Deep Q Network (DQN) Model</h1>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">10</span><span></span><span class="kn">import</span> <span class="nn">torch</span>
 | |
| <span class="lineno">11</span><span class="kn">from</span> <span class="nn">torch</span> <span class="kn">import</span> <span class="n">nn</span>
 | |
| <span class="lineno">12</span>
 | |
| <span class="lineno">13</span><span class="kn">from</span> <span class="nn">labml_helpers.module</span> <span class="kn">import</span> <span class="n">Module</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-1'>
 | |
|         <div class='docs doc-strings'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-1'>#</a>
 | |
|                 </div>
 | |
|                 <h2>Dueling Network ⚔️ Model for $Q$ Values</h2>
 | |
| <p>We are using a <a href="https://arxiv.org/abs/1511.06581">dueling network</a>
 | |
|  to calculate Q-values.
 | |
| Intuition behind dueling network architecture is that in most states
 | |
|  the action doesn’t matter,
 | |
| and in some states the action is significant. Dueling network allows
 | |
|  this to be represented very well.</p>
 | |
| <p>
 | |
| <script type="math/tex; mode=display">\begin{align}
 | |
|     Q^\pi(s,a) &= V^\pi(s) + A^\pi(s, a)
 | |
|     \\
 | |
|     \mathop{\mathbb{E}}_{a \sim \pi(s)}
 | |
|      \Big[
 | |
|       A^\pi(s, a)
 | |
|      \Big]
 | |
|     &= 0
 | |
| \end{align}</script>
 | |
| </p>
 | |
| <p>So we create two networks for $V$ and $A$ and get $Q$ from them.
 | |
| <script type="math/tex; mode=display">
 | |
|     Q(s, a) = V(s) +
 | |
|     \Big(
 | |
|         A(s, a) - \frac{1}{|\mathcal{A}|} \sum_{a' \in \mathcal{A}} A(s, a')
 | |
|     \Big)
 | |
| </script>
 | |
| We share the initial layers of the $V$ and $A$ networks.</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">16</span><span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">Module</span><span class="p">):</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-2'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-2'>#</a>
 | |
|                 </div>
 | |
|                 
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">47</span>    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 | |
| <span class="lineno">48</span>        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
 | |
| <span class="lineno">49</span>        <span class="bp">self</span><span class="o">.</span><span class="n">conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-3'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-3'>#</a>
 | |
|                 </div>
 | |
|                 <p>The first convolution layer takes a
 | |
| $84\times84$ frame and produces a $20\times20$ frame</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">52</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">stride</span><span class="o">=</span><span class="mi">4</span><span class="p">),</span>
 | |
| <span class="lineno">53</span>            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-4'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-4'>#</a>
 | |
|                 </div>
 | |
|                 <p>The second convolution layer takes a
 | |
| $20\times20$ frame and produces a $9\times9$ frame</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">57</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">stride</span><span class="o">=</span><span class="mi">2</span><span class="p">),</span>
 | |
| <span class="lineno">58</span>            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-5'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-5'>#</a>
 | |
|                 </div>
 | |
|                 <p>The third convolution layer takes a
 | |
| $9\times9$ frame and produces a $7\times7$ frame</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">62</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">stride</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
 | |
| <span class="lineno">63</span>            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span>
 | |
| <span class="lineno">64</span>        <span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-6'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-6'>#</a>
 | |
|                 </div>
 | |
|                 <p>A fully connected layer takes the flattened
 | |
| frame from third convolution layer, and outputs
 | |
| $512$ features</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">69</span>        <span class="bp">self</span><span class="o">.</span><span class="n">lin</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="o">=</span><span class="mi">7</span> <span class="o">*</span> <span class="mi">7</span> <span class="o">*</span> <span class="mi">64</span><span class="p">,</span> <span class="n">out_features</span><span class="o">=</span><span class="mi">512</span><span class="p">)</span>
 | |
| <span class="lineno">70</span>        <span class="bp">self</span><span class="o">.</span><span class="n">activation</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">()</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-7'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-7'>#</a>
 | |
|                 </div>
 | |
|                 <p>This head gives the state value $V$</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">73</span>        <span class="bp">self</span><span class="o">.</span><span class="n">state_value</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
 | |
| <span class="lineno">74</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="o">=</span><span class="mi">512</span><span class="p">,</span> <span class="n">out_features</span><span class="o">=</span><span class="mi">256</span><span class="p">),</span>
 | |
| <span class="lineno">75</span>            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span>
 | |
| <span class="lineno">76</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">out_features</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
 | |
| <span class="lineno">77</span>        <span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-8'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-8'>#</a>
 | |
|                 </div>
 | |
|                 <p>This head gives the action value $A$</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">79</span>        <span class="bp">self</span><span class="o">.</span><span class="n">action_value</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
 | |
| <span class="lineno">80</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="o">=</span><span class="mi">512</span><span class="p">,</span> <span class="n">out_features</span><span class="o">=</span><span class="mi">256</span><span class="p">),</span>
 | |
| <span class="lineno">81</span>            <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">(),</span>
 | |
| <span class="lineno">82</span>            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">out_features</span><span class="o">=</span><span class="mi">4</span><span class="p">),</span>
 | |
| <span class="lineno">83</span>        <span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-9'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-9'>#</a>
 | |
|                 </div>
 | |
|                 
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">85</span>    <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obs</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-10'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-10'>#</a>
 | |
|                 </div>
 | |
|                 <p>Convolution</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">87</span>        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv</span><span class="p">(</span><span class="n">obs</span><span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-11'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-11'>#</a>
 | |
|                 </div>
 | |
|                 <p>Reshape for linear layers</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">89</span>        <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="o">.</span><span class="n">reshape</span><span class="p">((</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">7</span> <span class="o">*</span> <span class="mi">7</span> <span class="o">*</span> <span class="mi">64</span><span class="p">))</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-12'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-12'>#</a>
 | |
|                 </div>
 | |
|                 <p>Linear layer</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">92</span>        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">activation</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lin</span><span class="p">(</span><span class="n">h</span><span class="p">))</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-13'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-13'>#</a>
 | |
|                 </div>
 | |
|                 <p>$A$</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">95</span>        <span class="n">action_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">action_value</span><span class="p">(</span><span class="n">h</span><span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-14'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-14'>#</a>
 | |
|                 </div>
 | |
|                 <p>$V$</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">97</span>        <span class="n">state_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">state_value</span><span class="p">(</span><span class="n">h</span><span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-15'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-15'>#</a>
 | |
|                 </div>
 | |
|                 <p>$A(s, a) - \frac{1}{|\mathcal{A}|} \sum_{a’ \in \mathcal{A}} A(s, a’)$</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">100</span>        <span class="n">action_score_centered</span> <span class="o">=</span> <span class="n">action_value</span> <span class="o">-</span> <span class="n">action_value</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">keepdim</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     <div class='section' id='section-16'>
 | |
|             <div class='docs'>
 | |
|                 <div class='section-link'>
 | |
|                     <a href='#section-16'>#</a>
 | |
|                 </div>
 | |
|                 <p>$Q(s, a) =V(s) + \Big(A(s, a) - \frac{1}{|\mathcal{A}|} \sum_{a’ \in \mathcal{A}} A(s, a’)\Big)$</p>
 | |
|             </div>
 | |
|             <div class='code'>
 | |
|                 <div class="highlight"><pre><span class="lineno">102</span>        <span class="n">q</span> <span class="o">=</span> <span class="n">state_value</span> <span class="o">+</span> <span class="n">action_score_centered</span>
 | |
| <span class="lineno">103</span>
 | |
| <span class="lineno">104</span>        <span class="k">return</span> <span class="n">q</span></pre></div>
 | |
|             </div>
 | |
|         </div>
 | |
|     </div>
 | |
| </div>
 | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/MathJax.js?config=TeX-AMS_HTML">
 | |
| </script>
 | |
| <!-- MathJax configuration -->
 | |
| <script type="text/x-mathjax-config">
 | |
|     MathJax.Hub.Config({
 | |
|         tex2jax: {
 | |
|             inlineMath: [ ['$','$'] ],
 | |
|             displayMath: [ ['$$','$$'] ],
 | |
|             processEscapes: true,
 | |
|             processEnvironments: true
 | |
|         },
 | |
|         // Center justify equations in code and markdown cells. Elsewhere
 | |
|         // we use CSS to left justify single line equations in code cells.
 | |
|         displayAlign: 'center',
 | |
|         "HTML-CSS": { fonts: ["TeX"] }
 | |
|     });
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| </script>
 | |
| </body>
 | |
| </html> | 
