mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-16 10:51:23 +08:00
753 lines
57 KiB
HTML
753 lines
57 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="si">
|
|
<head>
|
|
<meta http-equiv="content-type" content="text/html;charset=utf-8"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
|
<meta name="description" content="මේවා ට්රාන්ස්ෆෝමර් මත පදනම් වූ එන්කෝඩර් සහ විකේතක ආකෘති මෙන්ම වෙනත් ආශ්රිත මොඩියුලවල පයිටෝච් ක්රියාත්මක කිරීම් වේ."/>
|
|
|
|
<meta name="twitter:card" content="summary"/>
|
|
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
|
<meta name="twitter:title" content="ට්රාන්ස්ෆෝමර් එන්කෝඩර් සහ විකේතක ආකෘති"/>
|
|
<meta name="twitter:description" content="මේවා ට්රාන්ස්ෆෝමර් මත පදනම් වූ එන්කෝඩර් සහ විකේතක ආකෘති මෙන්ම වෙනත් ආශ්රිත මොඩියුලවල පයිටෝච් ක්රියාත්මක කිරීම් වේ."/>
|
|
<meta name="twitter:site" content="@labmlai"/>
|
|
<meta name="twitter:creator" content="@labmlai"/>
|
|
|
|
<meta property="og:url" content="https://nn.labml.ai/transformers/models.html"/>
|
|
<meta property="og:title" content="ට්රාන්ස්ෆෝමර් එන්කෝඩර් සහ විකේතක ආකෘති"/>
|
|
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
|
<meta property="og:site_name" content="ට්රාන්ස්ෆෝමර් එන්කෝඩර් සහ විකේතක ආකෘති"/>
|
|
<meta property="og:type" content="object"/>
|
|
<meta property="og:title" content="ට්රාන්ස්ෆෝමර් එන්කෝඩර් සහ විකේතක ආකෘති"/>
|
|
<meta property="og:description" content="මේවා ට්රාන්ස්ෆෝමර් මත පදනම් වූ එන්කෝඩර් සහ විකේතක ආකෘති මෙන්ම වෙනත් ආශ්රිත මොඩියුලවල පයිටෝච් ක්රියාත්මක කිරීම් වේ."/>
|
|
|
|
<title>ට්රාන්ස්ෆෝමර් එන්කෝඩර් සහ විකේතක ආකෘති</title>
|
|
<link rel="shortcut icon" href="/icon.png"/>
|
|
<link rel="stylesheet" href="../pylit.css?v=1">
|
|
<link rel="canonical" href="https://nn.labml.ai/transformers/models.html"/>
|
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">
|
|
|
|
<!-- Global site tag (gtag.js) - Google Analytics -->
|
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
|
|
<script>
|
|
window.dataLayer = window.dataLayer || [];
|
|
|
|
function gtag() {
|
|
dataLayer.push(arguments);
|
|
}
|
|
|
|
gtag('js', new Date());
|
|
|
|
gtag('config', 'G-4V3HC8HBLH');
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<div id='container'>
|
|
<div id="background"></div>
|
|
<div class='section'>
|
|
<div class='docs'>
|
|
<p>
|
|
<a class="parent" href="/">home</a>
|
|
<a class="parent" href="index.html">transformers</a>
|
|
</p>
|
|
<p>
|
|
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations" target="_blank">
|
|
<img alt="Github"
|
|
src="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social"
|
|
style="max-width:100%;"/></a>
|
|
<a href="https://twitter.com/labmlai" rel="nofollow" target="_blank">
|
|
<img alt="Twitter"
|
|
src="https://img.shields.io/twitter/follow/labmlai?style=social"
|
|
style="max-width:100%;"/></a>
|
|
</p>
|
|
<p>
|
|
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/models.py" target="_blank">
|
|
View code on Github</a>
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-0'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-0'>#</a>
|
|
</div>
|
|
<h1>ට්රාන්ස්ෆෝමර් එන්කෝඩර් සහ විකේතක ආකෘති</h1>
|
|
<p><a href="https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg"></a></p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">13</span><span></span><span class="kn">import</span> <span class="nn">math</span>
|
|
<span class="lineno">14</span>
|
|
<span class="lineno">15</span><span class="kn">import</span> <span class="nn">torch</span>
|
|
<span class="lineno">16</span><span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
|
|
<span class="lineno">17</span>
|
|
<span class="lineno">18</span><span class="kn">from</span> <span class="nn">labml_nn.utils</span> <span class="kn">import</span> <span class="n">clone_module_list</span>
|
|
<span class="lineno">19</span><span class="kn">from</span> <span class="nn">.feed_forward</span> <span class="kn">import</span> <span class="n">FeedForward</span>
|
|
<span class="lineno">20</span><span class="kn">from</span> <span class="nn">.mha</span> <span class="kn">import</span> <span class="n">MultiHeadAttention</span>
|
|
<span class="lineno">21</span><span class="kn">from</span> <span class="nn">.positional_encoding</span> <span class="kn">import</span> <span class="n">get_positional_encoding</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-1'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-1'>#</a>
|
|
</div>
|
|
<p> <a id="EmbeddingsWithPositionalEncoding"></a></p>
|
|
<h2>ටෝකනකාවැද්දීම සහ <a href="positional_encoding.html">ස්ථාවර ස්ථානීය කේතීකරණ</a>එකතු කරන්න</h2>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">24</span><span class="k">class</span> <span class="nc">EmbeddingsWithPositionalEncoding</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-2'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-2'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">31</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">d_model</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">max_len</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5000</span><span class="p">):</span>
|
|
<span class="lineno">32</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
<span class="lineno">33</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">n_vocab</span><span class="p">,</span> <span class="n">d_model</span><span class="p">)</span>
|
|
<span class="lineno">34</span> <span class="bp">self</span><span class="o">.</span><span class="n">d_model</span> <span class="o">=</span> <span class="n">d_model</span>
|
|
<span class="lineno">35</span> <span class="bp">self</span><span class="o">.</span><span class="n">register_buffer</span><span class="p">(</span><span class="s1">'positional_encodings'</span><span class="p">,</span> <span class="n">get_positional_encoding</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="n">max_len</span><span class="p">))</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-3'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-3'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">37</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
|
|
<span class="lineno">38</span> <span class="n">pe</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">positional_encodings</span><span class="p">[:</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
|
<span class="lineno">39</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">math</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">d_model</span><span class="p">)</span> <span class="o">+</span> <span class="n">pe</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-4'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-4'>#</a>
|
|
</div>
|
|
<p> <a id="EmbeddingsWithLearnedPositionalEncoding"></a></p>
|
|
<h2>ටෝකනකාවැද්දීම සහ පරාමිතිකරණය කළ ස්ථානීය කේතීකරණ එකතු කරන්න</h2>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">42</span><span class="k">class</span> <span class="nc">EmbeddingsWithLearnedPositionalEncoding</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-5'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-5'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">49</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">d_model</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">max_len</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5000</span><span class="p">):</span>
|
|
<span class="lineno">50</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
<span class="lineno">51</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">n_vocab</span><span class="p">,</span> <span class="n">d_model</span><span class="p">)</span>
|
|
<span class="lineno">52</span> <span class="bp">self</span><span class="o">.</span><span class="n">d_model</span> <span class="o">=</span> <span class="n">d_model</span>
|
|
<span class="lineno">53</span> <span class="bp">self</span><span class="o">.</span><span class="n">positional_encodings</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">max_len</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">d_model</span><span class="p">),</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-6'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-6'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">55</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
|
|
<span class="lineno">56</span> <span class="n">pe</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">positional_encodings</span><span class="p">[:</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
|
|
<span class="lineno">57</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">math</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">d_model</span><span class="p">)</span> <span class="o">+</span> <span class="n">pe</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-7'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-7'>#</a>
|
|
</div>
|
|
<p> <a id="TransformerLayer"></a></p>
|
|
<h2>ට්රාන්ස්ෆෝමර්ස්ථරය</h2>
|
|
<p>මෙයඑන්කෝඩර් තට්ටුවක් හෝ විකේතක තට්ටුවක් ලෙස ක්රියා කළ හැකිය. </p>
|
|
<p>🗒කඩදාසි ඇතුළු සමහර ක්රියාත්මක කිරීම්, ස්ථර-සාමාන්යකරණය සිදු කරන ස්ථානයේ වෙනස්කම් ඇති බව පෙනේ. මෙන්න අපි අවධානය සහ පෝෂක ඉදිරියට ජාල පෙර ස්ථරයක් සාමාන්යකරණය කරන්න, සහ මුල් අවශේෂ දෛශික එකතු කරන්න. විකල්පයක් වන්නේ අපද්රව්ය එකතු කිරීමෙන් පසු ස්ථර සාමාන්යකරණය කිරීමයි. නමුත් පුහුණුවීමේදී මෙය අඩු ස්ථාවර බව අපට පෙනී ගියේය. <a href="https://papers.labml.ai/paper/2002.04745">ට්රාන්ස්ෆෝමර් ගෘහ නිර්මාණ ශිල්පයේ ON Layer සාමාන්යකරණය පිළිබඳ</a>පත්රිකාවේ මේ පිළිබඳව සවිස්තරාත්මක සාකච්ඡාවක් අපට හමු විය. </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">60</span><span class="k">class</span> <span class="nc">TransformerLayer</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-8'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-8'>#</a>
|
|
</div>
|
|
<ul><li><code class="highlight"><span></span><span class="n">d_model</span></code>
|
|
ටෝකනය කාවැද්දීමේ ප්රමාණයයි </li>
|
|
<li><code class="highlight"><span></span><span class="n">self_attn</span></code>
|
|
ස්වයං අවධානය මොඩියුලය </li>
|
|
<li><code class="highlight"><span></span><span class="n">src_attn</span></code>
|
|
යනු ප්රභව අවධානය යොමු කිරීමේ මොඩියුලය (මෙය විකේතකයක් තුළ භාවිතා කරන විට) </li>
|
|
<li><code class="highlight"><span></span><span class="n">feed_forward</span></code>
|
|
යනු ආහාර ඉදිරි මොඩියුලයයි </li>
|
|
<li><code class="highlight"><span></span><span class="n">dropout_prob</span></code>
|
|
ස්වයං අවධානයෙන් පසු ඉවත් වීමේ සම්භාවිතාව සහ FFN</li></ul>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">78</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span>
|
|
<span class="lineno">79</span> <span class="n">d_model</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
|
|
<span class="lineno">80</span> <span class="n">self_attn</span><span class="p">:</span> <span class="n">MultiHeadAttention</span><span class="p">,</span>
|
|
<span class="lineno">81</span> <span class="n">src_attn</span><span class="p">:</span> <span class="n">MultiHeadAttention</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="lineno">82</span> <span class="n">feed_forward</span><span class="p">:</span> <span class="n">FeedForward</span><span class="p">,</span>
|
|
<span class="lineno">83</span> <span class="n">dropout_prob</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-9'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-9'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">91</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
<span class="lineno">92</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">d_model</span>
|
|
<span class="lineno">93</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_attn</span> <span class="o">=</span> <span class="n">self_attn</span>
|
|
<span class="lineno">94</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_attn</span> <span class="o">=</span> <span class="n">src_attn</span>
|
|
<span class="lineno">95</span> <span class="bp">self</span><span class="o">.</span><span class="n">feed_forward</span> <span class="o">=</span> <span class="n">feed_forward</span>
|
|
<span class="lineno">96</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">dropout_prob</span><span class="p">)</span>
|
|
<span class="lineno">97</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_self_attn</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">d_model</span><span class="p">])</span>
|
|
<span class="lineno">98</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_attn</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="lineno">99</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_src_attn</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">d_model</span><span class="p">])</span>
|
|
<span class="lineno">100</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_ff</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">d_model</span><span class="p">])</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-10'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-10'>#</a>
|
|
</div>
|
|
<p>ආහාරඉදිරි ස්ථරයට ආදානය ඉතිරි කර ගත යුතුද යන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">102</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_save_ff_input</span> <span class="o">=</span> <span class="kc">False</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-11'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-11'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">104</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span>
|
|
<span class="lineno">105</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span>
|
|
<span class="lineno">106</span> <span class="n">mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span>
|
|
<span class="lineno">107</span> <span class="n">src</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="lineno">108</span> <span class="n">src_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-12'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-12'>#</a>
|
|
</div>
|
|
<p>ස්වයංඅවධානය යොමු කිරීමට පෙර දෛශික සාමාන්යකරණය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">110</span> <span class="n">z</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_self_attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-13'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-13'>#</a>
|
|
</div>
|
|
<p>ස්වයංඅවධානය හරහා ධාවනය කරන්න, i.e. යතුරු සහ වටිනාකම් ස්වයං සිට </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">112</span> <span class="n">self_attn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_attn</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">z</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">z</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">z</span><span class="p">,</span> <span class="n">mask</span><span class="o">=</span><span class="n">mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-14'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-14'>#</a>
|
|
</div>
|
|
<p>ස්වයංඅවධානය ප්රතිඵල එකතු </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">114</span> <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">self_attn</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-15'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-15'>#</a>
|
|
</div>
|
|
<p>ප්රභවයක්ලබා දෙන්නේ නම්, ප්රභවයට අවධානය යොමු කිරීමෙන් ප්රති results ල ලබා ගන්න. එන්කෝඩර් ප්රතිදානයන් කෙරෙහි අවධානය යොමු කරන විකේතක තට්ටුවක් ඔබට ඇති විට මෙය </p>වේ
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">119</span> <span class="k">if</span> <span class="n">src</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-16'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-16'>#</a>
|
|
</div>
|
|
<p>දෛශිකසාමාන්යකරණය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">121</span> <span class="n">z</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_src_attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-17'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-17'>#</a>
|
|
</div>
|
|
<p>ප්රභවයටඅවධානය යොමු කරන්න. එනම් යතුරු සහ අගයන් ප්රභවයෙන් වේ </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">123</span> <span class="n">attn_src</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_attn</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">z</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">src</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">src</span><span class="p">,</span> <span class="n">mask</span><span class="o">=</span><span class="n">src_mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-18'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-18'>#</a>
|
|
</div>
|
|
<p>මූලාශ්රඅවධානය යොමු ප්රතිඵල එක් කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">125</span> <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">attn_src</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-19'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-19'>#</a>
|
|
</div>
|
|
<p>පෝෂණයසඳහා සාමාන්යකරණය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">128</span> <span class="n">z</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_ff</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-20'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-20'>#</a>
|
|
</div>
|
|
<p>නිශ්චිතවදක්වා ඇත්නම් ආදානය ආහාර ඉදිරි ස්ථරයට සුරකින්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">130</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_save_ff_input</span><span class="p">:</span>
|
|
<span class="lineno">131</span> <span class="bp">self</span><span class="o">.</span><span class="n">ff_input</span> <span class="o">=</span> <span class="n">z</span><span class="o">.</span><span class="n">clone</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-21'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-21'>#</a>
|
|
</div>
|
|
<p>Feed-forwardජාලය හරහා ගමන් කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">133</span> <span class="n">ff</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">feed_forward</span><span class="p">(</span><span class="n">z</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-22'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-22'>#</a>
|
|
</div>
|
|
<p>ප්රතිපෝෂණඉදිරි ප්රති results ල නැවත එක් කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">135</span> <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">ff</span><span class="p">)</span>
|
|
<span class="lineno">136</span>
|
|
<span class="lineno">137</span> <span class="k">return</span> <span class="n">x</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-23'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-23'>#</a>
|
|
</div>
|
|
<p> <a id="Encoder"></a></p>
|
|
<h2>ට්රාන්ස්ෆෝමර්එන්කෝඩරය</h2>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">140</span><span class="k">class</span> <span class="nc">Encoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-24'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-24'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">147</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">layer</span><span class="p">:</span> <span class="n">TransformerLayer</span><span class="p">,</span> <span class="n">n_layers</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="lineno">148</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-25'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-25'>#</a>
|
|
</div>
|
|
<p>ට්රාන්ස්ෆෝමර්ස්ථරයේ පිටපත් සාදන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">150</span> <span class="bp">self</span><span class="o">.</span><span class="n">layers</span> <span class="o">=</span> <span class="n">clone_module_list</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">n_layers</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-26'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-26'>#</a>
|
|
</div>
|
|
<p>අවසානසාමාන්යකරණ ස්තරය </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">152</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">layer</span><span class="o">.</span><span class="n">size</span><span class="p">])</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-27'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-27'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">154</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-28'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-28'>#</a>
|
|
</div>
|
|
<p>එක්එක් ට්රාන්ස්ෆෝමර් ස්ථරය හරහා ධාවනය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">156</span> <span class="k">for</span> <span class="n">layer</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">layers</span><span class="p">:</span>
|
|
<span class="lineno">157</span> <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">x</span><span class="p">,</span> <span class="n">mask</span><span class="o">=</span><span class="n">mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-29'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-29'>#</a>
|
|
</div>
|
|
<p>අවසානවශයෙන්, දෛශික සාමාන්යකරණය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">159</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-30'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-30'>#</a>
|
|
</div>
|
|
<p> <a id="Decoder"></a></p>
|
|
<h2>ට්රාන්ස්ෆෝමර්විකේතකය</h2>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">162</span><span class="k">class</span> <span class="nc">Decoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-31'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-31'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">169</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">layer</span><span class="p">:</span> <span class="n">TransformerLayer</span><span class="p">,</span> <span class="n">n_layers</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="lineno">170</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-32'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-32'>#</a>
|
|
</div>
|
|
<p>ට්රාන්ස්ෆෝමර්ස්ථරයේ පිටපත් සාදන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">172</span> <span class="bp">self</span><span class="o">.</span><span class="n">layers</span> <span class="o">=</span> <span class="n">clone_module_list</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">n_layers</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-33'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-33'>#</a>
|
|
</div>
|
|
<p>අවසානසාමාන්යකරණ ස්තරය </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">174</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">layer</span><span class="o">.</span><span class="n">size</span><span class="p">])</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-34'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-34'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">176</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">memory</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">tgt_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-35'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-35'>#</a>
|
|
</div>
|
|
<p>එක්එක් ට්රාන්ස්ෆෝමර් ස්ථරය හරහා ධාවනය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">178</span> <span class="k">for</span> <span class="n">layer</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">layers</span><span class="p">:</span>
|
|
<span class="lineno">179</span> <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">x</span><span class="p">,</span> <span class="n">mask</span><span class="o">=</span><span class="n">tgt_mask</span><span class="p">,</span> <span class="n">src</span><span class="o">=</span><span class="n">memory</span><span class="p">,</span> <span class="n">src_mask</span><span class="o">=</span><span class="n">src_mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-36'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-36'>#</a>
|
|
</div>
|
|
<p>අවසානවශයෙන්, දෛශික සාමාන්යකරණය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">181</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-37'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-37'>#</a>
|
|
</div>
|
|
<p> <a id="Generator"></a></p>
|
|
<h2>උත්පාදකයන්ත්රය</h2>
|
|
<p>මෙයටෝකන පුරෝකථනය කරන අතර එම අයගේ සොෆ්ට්මැක්ස් ලබා දෙයි. ඔබ භාවිතා කරන්නේ නම් ඔබට මෙය අවශ්ය නොවේ <code class="highlight"><span></span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span></code>
|
|
. </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">184</span><span class="k">class</span> <span class="nc">Generator</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-38'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-38'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">194</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">d_model</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="lineno">195</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
<span class="lineno">196</span> <span class="bp">self</span><span class="o">.</span><span class="n">projection</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">d_model</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-39'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-39'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">198</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
|
<span class="lineno">199</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">projection</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-40'>
|
|
<div class='docs doc-strings'>
|
|
<div class='section-link'>
|
|
<a href='#section-40'>#</a>
|
|
</div>
|
|
<p> <a id="EncoderDecoder"></a></p>
|
|
<h2>ඒකාබද්ධඑන්කෝඩරය-විකේතකය</h2>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">202</span><span class="k">class</span> <span class="nc">EncoderDecoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-41'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-41'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">209</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoder</span><span class="p">:</span> <span class="n">Encoder</span><span class="p">,</span> <span class="n">decoder</span><span class="p">:</span> <span class="n">Decoder</span><span class="p">,</span> <span class="n">src_embed</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">tgt_embed</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">generator</span><span class="p">:</span> <span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
|
<span class="lineno">210</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
<span class="lineno">211</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoder</span> <span class="o">=</span> <span class="n">encoder</span>
|
|
<span class="lineno">212</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoder</span> <span class="o">=</span> <span class="n">decoder</span>
|
|
<span class="lineno">213</span> <span class="bp">self</span><span class="o">.</span><span class="n">src_embed</span> <span class="o">=</span> <span class="n">src_embed</span>
|
|
<span class="lineno">214</span> <span class="bp">self</span><span class="o">.</span><span class="n">tgt_embed</span> <span class="o">=</span> <span class="n">tgt_embed</span>
|
|
<span class="lineno">215</span> <span class="bp">self</span><span class="o">.</span><span class="n">generator</span> <span class="o">=</span> <span class="n">generator</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-42'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-42'>#</a>
|
|
</div>
|
|
<p>මෙයඔවුන්ගේ කේතයෙන් වැදගත් විය. ග්ලෝරෝට්/fan_avg සමඟ පරාමිතීන් ආරම්භ කරන්න. </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">219</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
|
|
<span class="lineno">220</span> <span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">dim</span><span class="p">()</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
<span class="lineno">221</span> <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">xavier_uniform_</span><span class="p">(</span><span class="n">p</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-43'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-43'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">223</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">src</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">tgt</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">tgt_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-44'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-44'>#</a>
|
|
</div>
|
|
<p>එන්කෝඩරයහරහා ප්රභවය ධාවනය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">225</span> <span class="n">enc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-45'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-45'>#</a>
|
|
</div>
|
|
<p>විකේතකයහරහා කේතීකරණ සහ ඉලක්ක ධාවනය කරන්න </p>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">227</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">enc</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">,</span> <span class="n">tgt</span><span class="p">,</span> <span class="n">tgt_mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-46'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-46'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">229</span> <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">src</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
|
|
<span class="lineno">230</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">src_embed</span><span class="p">(</span><span class="n">src</span><span class="p">),</span> <span class="n">src_mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='section' id='section-47'>
|
|
<div class='docs'>
|
|
<div class='section-link'>
|
|
<a href='#section-47'>#</a>
|
|
</div>
|
|
|
|
</div>
|
|
<div class='code'>
|
|
<div class="highlight"><pre><span class="lineno">232</span> <span class="k">def</span> <span class="nf">decode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">memory</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">tgt</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">tgt_mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
|
|
<span class="lineno">233</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tgt_embed</span><span class="p">(</span><span class="n">tgt</span><span class="p">),</span> <span class="n">memory</span><span class="p">,</span> <span class="n">src_mask</span><span class="p">,</span> <span class="n">tgt_mask</span><span class="p">)</span></pre></div>
|
|
</div>
|
|
</div>
|
|
<div class='footer'>
|
|
<a href="https://papers.labml.ai">Trending Research Papers</a>
|
|
<a href="https://labml.ai">labml.ai</a>
|
|
</div>
|
|
</div>
|
|
<script src=../interactive.js?v=1"></script>
|
|
<script>
|
|
function handleImages() {
|
|
var images = document.querySelectorAll('p>img')
|
|
|
|
for (var i = 0; i < images.length; ++i) {
|
|
handleImage(images[i])
|
|
}
|
|
}
|
|
|
|
function handleImage(img) {
|
|
img.parentElement.style.textAlign = 'center'
|
|
|
|
var modal = document.createElement('div')
|
|
modal.id = 'modal'
|
|
|
|
var modalContent = document.createElement('div')
|
|
modal.appendChild(modalContent)
|
|
|
|
var modalImage = document.createElement('img')
|
|
modalContent.appendChild(modalImage)
|
|
|
|
var span = document.createElement('span')
|
|
span.classList.add('close')
|
|
span.textContent = 'x'
|
|
modal.appendChild(span)
|
|
|
|
img.onclick = function () {
|
|
console.log('clicked')
|
|
document.body.appendChild(modal)
|
|
modalImage.src = img.src
|
|
}
|
|
|
|
span.onclick = function () {
|
|
document.body.removeChild(modal)
|
|
}
|
|
}
|
|
|
|
handleImages()
|
|
</script>
|
|
</body>
|
|
</html> |