mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-30 02:08:50 +08:00
2040 lines
199 KiB
HTML
2040 lines
199 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="si">
|
||
<head>
|
||
<meta http-equiv="content-type" content="text/html;charset=utf-8"/>
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
||
<meta name="description" content="ජීපීටී-නියෝක්ස් හි ආදර්ශ අර්ථ දැක්වීම මෙයයි."/>
|
||
|
||
<meta name="twitter:card" content="summary"/>
|
||
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
||
<meta name="twitter:title" content="ජීපීටී-නියෝක්ස් ආදර්ශ අර්ථ දැක්වීම"/>
|
||
<meta name="twitter:description" content="ජීපීටී-නියෝක්ස් හි ආදර්ශ අර්ථ දැක්වීම මෙයයි."/>
|
||
<meta name="twitter:site" content="@labmlai"/>
|
||
<meta name="twitter:creator" content="@labmlai"/>
|
||
|
||
<meta property="og:url" content="https://nn.labml.ai/neox/model.html"/>
|
||
<meta property="og:title" content="ජීපීටී-නියෝක්ස් ආදර්ශ අර්ථ දැක්වීම"/>
|
||
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
||
<meta property="og:site_name" content="ජීපීටී-නියෝක්ස් ආදර්ශ අර්ථ දැක්වීම"/>
|
||
<meta property="og:type" content="object"/>
|
||
<meta property="og:title" content="ජීපීටී-නියෝක්ස් ආදර්ශ අර්ථ දැක්වීම"/>
|
||
<meta property="og:description" content="ජීපීටී-නියෝක්ස් හි ආදර්ශ අර්ථ දැක්වීම මෙයයි."/>
|
||
|
||
<title>ජීපීටී-නියෝක්ස් ආදර්ශ අර්ථ දැක්වීම</title>
|
||
<link rel="shortcut icon" href="/icon.png"/>
|
||
<link rel="stylesheet" href="../pylit.css?v=1">
|
||
<link rel="canonical" href="https://nn.labml.ai/neox/model.html"/>
|
||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">
|
||
|
||
<!-- Global site tag (gtag.js) - Google Analytics -->
|
||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
|
||
<script>
|
||
window.dataLayer = window.dataLayer || [];
|
||
|
||
function gtag() {
|
||
dataLayer.push(arguments);
|
||
}
|
||
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-4V3HC8HBLH');
|
||
</script>
|
||
</head>
|
||
<body>
|
||
<div id='container'>
|
||
<div id="background"></div>
|
||
<div class='section'>
|
||
<div class='docs'>
|
||
<p>
|
||
<a class="parent" href="/">home</a>
|
||
<a class="parent" href="index.html">neox</a>
|
||
</p>
|
||
<p>
|
||
<a href="https://github.com/sponsors/labmlai" target="_blank">
|
||
<img alt="Sponsor"
|
||
src="https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86"
|
||
style="max-width:100%;"/></a>
|
||
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations" target="_blank">
|
||
<img alt="Github"
|
||
src="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social"
|
||
style="max-width:100%;"/></a>
|
||
<a href="https://twitter.com/labmlai" rel="nofollow" target="_blank">
|
||
<img alt="Twitter"
|
||
src="https://img.shields.io/twitter/follow/labmlai?style=social"
|
||
style="max-width:100%;"/></a>
|
||
</p>
|
||
<p>
|
||
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/neox/model.py" target="_blank">
|
||
View code on Github</a>
|
||
</p>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-0'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-0'>#</a>
|
||
</div>
|
||
<h1>ජීපීටී-නියෝක්ස්ආකෘතිය</h1>
|
||
<p>ජීපීටී-නියෝක්ස්ආකෘතියේ ස්ථර සඳහා කේතය සහ 20B මුරපොල පූරණය කිරීමේ කේතය මෙන්න. </p>
|
||
<p>ස්ථර <code class="highlight"><span></span><span class="n">load_state</span></code>
|
||
වල ඇති ක්රමය එම ස්ථරයේ මුරපොලවල් පූරණය කරයි. මුරපොලවල් පැටවීමේ සහායකයන් ක්රියාත්මක වේ <a href="checkpoint.html"><code class="highlight"><span></span><span class="n">checkpoint</span><span class="o">.</span><span class="n">py</span></code>
|
||
</a></p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">16</span><span></span><span class="kn">import</span> <span class="nn">copy</span>
|
||
<span class="lineno">17</span><span class="kn">import</span> <span class="nn">math</span>
|
||
<span class="lineno">18</span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Set</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Generator</span><span class="p">,</span> <span class="n">Tuple</span>
|
||
<span class="lineno">19</span>
|
||
<span class="lineno">20</span><span class="kn">import</span> <span class="nn">torch</span>
|
||
<span class="lineno">21</span><span class="kn">from</span> <span class="nn">torch</span> <span class="kn">import</span> <span class="n">nn</span>
|
||
<span class="lineno">22</span><span class="kn">from</span> <span class="nn">torch.cuda.amp</span> <span class="kn">import</span> <span class="n">autocast</span>
|
||
<span class="lineno">23</span>
|
||
<span class="lineno">24</span><span class="kn">from</span> <span class="nn">labml</span> <span class="kn">import</span> <span class="n">monit</span>
|
||
<span class="lineno">25</span><span class="kn">from</span> <span class="nn">labml_nn.neox</span> <span class="kn">import</span> <span class="n">checkpoint</span>
|
||
<span class="lineno">26</span><span class="kn">from</span> <span class="nn">labml_nn.neox.utils.cache</span> <span class="kn">import</span> <span class="n">get_cache</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-1'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-1'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">29</span><span class="k">class</span> <span class="nc">NeoXModule</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-2'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-2'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">30</span> <span class="k">def</span> <span class="nf">load_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">p1</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span> <span class="n">p2</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]):</span>
|
||
<span class="lineno">31</span> <span class="k">pass</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-3'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-3'>#</a>
|
||
</div>
|
||
<h2>කාවැද්දීමස්ථරය</h2>
|
||
<p>මෙයමුරපොලට පැටවීම සඳහා කේතය සහිත සම්මත කාවැද්දීම් ස්ථරයකි. </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">34</span><span class="k">class</span> <span class="nc">Embedding</span><span class="p">(</span><span class="n">NeoXModule</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-4'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-4'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_vocab</span></code>
|
||
වචන මාලාවේ ප්රමාණය වේ </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
මෙම කාවැද්දීම් ප්රමාණය</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">41</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50_432</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-5'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-5'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">46</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
<span class="lineno">47</span>
|
||
<span class="lineno">48</span> <span class="bp">self</span><span class="o">.</span><span class="n">emb</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">n_vocab</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-6'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-6'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩයේ ටෝකන් හැඳුනුම් වේ <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">]</span></code>
|
||
</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">50</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-7'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-7'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">54</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">emb</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-8'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-8'>#</a>
|
||
</div>
|
||
<p> මුරපොලපූරණය කිරීමට කේතය</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">56</span> <span class="k">def</span> <span class="nf">load_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">p1</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span> <span class="n">p2</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-9'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-9'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">60</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Load embedding layer'</span><span class="p">):</span>
|
||
<span class="lineno">61</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_0</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">emb</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'word_embeddings.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-10'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-10'>#</a>
|
||
</div>
|
||
<h2>රොටරිස්ථානීය කාවැද්දීම්</h2>
|
||
<p>ජීපීටී-නියෝක්ස් <a href="https://papers.labml.ai/paper/2104.09864">භ්රමණ ස්ථානීය කාවැද්දීම් (කඹය)</a>භාවිතා කරයි. </p>
|
||
<p>අපින්යාය වැඩි සටහන් සමඟ <a href="https://nn.labml.ai/transformers/rope/index.html">මෙහි</a> කඹය ක්රියාත්මක කිරීම විස්තර කර ඇත. </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">64</span><span class="k">class</span> <span class="nc">RoPE</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-11'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-11'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">d_rope</span></code>
|
||
කඹය කාවැද්දීම් සඳහා විශේෂාංග ගණන </li>
|
||
<li><code class="highlight"><span></span><span class="n">base</span></code>
|
||
සඳහා පදනම වේ <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1.12379em;vertical-align:0em;"></span><span class="mord"><span class="mord coloredeq eql" style=""><span class="mord" style="">10000</span></span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:1.12379em;"><span style="top:-3.3973400000000002em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.0377857142857143em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.5020714285714285em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span></span></span></span></span></span></span></span></span></span></span></span></span>, එය පැහැර හරින <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord coloredeq eql" style=""><span class="mord" style="">10000</span></span></span></span></span></span></li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">74</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">d_rope</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">base</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">10_000.</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-12'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-12'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">79</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-13'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-13'>#</a>
|
||
</div>
|
||
<p>විශේෂාංග <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span> සඳහා ගබඩා කිරීමට </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">82</span> <span class="bp">self</span><span class="o">.</span><span class="n">theta</span> <span class="o">=</span> <span class="kc">None</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-14'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-14'>#</a>
|
||
</div>
|
||
<p>හැඹිලිය <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqf" style=""><span class="mop" style=""><span style="">c</span><span style="">o</span><span style="">s</span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span></span> සහ <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mop">sin</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">84</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="lineno">85</span> <span class="bp">self</span><span class="o">.</span><span class="n">sin_cached</span> <span class="o">=</span> <span class="kc">None</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-15'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-15'>#</a>
|
||
</div>
|
||
<p>සඳහාමූලික <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1.12379em;vertical-align:0em;"></span><span class="mord"><span class="mord coloredeq eql" style=""><span class="mord" style="">10000</span></span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:1.12379em;"><span style="top:-3.3973400000000002em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.0377857142857143em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.5020714285714285em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span></span></span></span></span></span></span></span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">88</span> <span class="bp">self</span><span class="o">.</span><span class="n">base</span> <span class="o">=</span> <span class="n">base</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-16'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-16'>#</a>
|
||
</div>
|
||
<p>කඹයසඳහා විශේෂාංග ගණන </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">90</span> <span class="bp">self</span><span class="o">.</span><span class="n">d_rope</span> <span class="o">=</span> <span class="n">d_rope</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-17'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-17'>#</a>
|
||
</div>
|
||
<h3>විශේෂාංගකරකවන්න</h3>
|
||
<p><span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:1.22902em;vertical-align:-0.25em;"></span><span class="mopen">[</span><span class="mord">−</span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.97902em;"><span style="top:-3.363em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span><span class="mbin mtight">+</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">−</span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.97902em;"><span style="top:-3.363em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span><span class="mbin mtight">+</span><span class="mord mtight">2</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">...</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">−</span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mathnormal mtight">d</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">2</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">...</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">−</span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.97902em;"><span style="top:-3.363em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class="mclose">]</span></span></span></span></span></p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">92</span> <span class="nd">@staticmethod</span>
|
||
<span class="lineno">93</span> <span class="k">def</span> <span class="nf">rotate_half</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-18'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-18'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">99</span> <span class="n">x1</span><span class="p">,</span> <span class="n">x2</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">//</span> <span class="mi">2</span><span class="p">],</span> <span class="n">x</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">//</span> <span class="mi">2</span><span class="p">:]</span>
|
||
<span class="lineno">100</span> <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="o">-</span><span class="n">x2</span><span class="p">,</span> <span class="n">x1</span><span class="p">),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-19'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-19'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩය ඇත <code class="highlight"><span></span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">seq</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">,</span> <span class="n">d_k</span><span class="p">]</span></code>
|
||
</li>
|
||
<li><code class="highlight"><span></span><span class="n">offset</span></code>
|
||
යනු ආරම්භක ස්ථානයයි <code class="highlight"><span></span><span class="n">x</span></code>
|
||
. පෙර තනතුරු වල යතුරු සහ විමසුම් අප හැඹිලි කළ <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mrel">></span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">0</span></span></span></span></span> විට මෙය සිදු වේ</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">102</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">offset</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-20'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-20'>#</a>
|
||
</div>
|
||
<p>සත්යඅනුක්රමයේ දිග ලබා ගන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">110</span> <span class="n">seq_len</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">3</span><span class="p">]</span> <span class="o">+</span> <span class="n">offset</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-21'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-21'>#</a>
|
||
</div>
|
||
<p>ආරම්භකරන්න <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.69444em;vertical-align:0em;"></span><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">113</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">theta</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-22'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-22'>#</a>
|
||
</div>
|
||
<p> <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1.12379em;vertical-align:0em;"></span><span class="mord"><span class="mord coloredeq eql" style=""><span class="mord" style="">10000</span></span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:1.12379em;"><span style="top:-3.3973400000000002em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.0377857142857143em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.5020714285714285em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span></span></span></span></span></span></span></span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">115</span> <span class="n">theta</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base</span> <span class="o">**</span> <span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">d_rope</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">float</span><span class="p">()</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">d_rope</span><span class="p">))</span>
|
||
<span class="lineno">116</span> <span class="bp">self</span><span class="o">.</span><span class="n">theta</span> <span class="o">=</span> <span class="n">theta</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-23'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-23'>#</a>
|
||
</div>
|
||
<p>ආරම්භකරන්න <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqf" style=""><span class="mop" style=""><span style="">c</span><span style="">o</span><span style="">s</span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span></span> සහ <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mop">sin</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span> හැඹිලිය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">119</span> <span class="k">if</span> <span class="p">(</span>
|
||
<span class="lineno">120</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span>
|
||
<span class="lineno">121</span> <span class="n">seq_len</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">or</span>
|
||
<span class="lineno">122</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span><span class="o">.</span><span class="n">device</span> <span class="o">!=</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span> <span class="ow">or</span>
|
||
<span class="lineno">123</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span><span class="o">.</span><span class="n">dtype</span> <span class="o">!=</span> <span class="n">x</span><span class="o">.</span><span class="n">dtype</span>
|
||
<span class="lineno">124</span> <span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-24'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-24'>#</a>
|
||
</div>
|
||
<p>ස්ථානදර්ශක ලබා ගන්න <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">126</span> <span class="n">seq_idx</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">seq_len</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">theta</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-25'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-25'>#</a>
|
||
</div>
|
||
<p><span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqh" style=""><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">128</span> <span class="n">idx_theta</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s2">"s,d->sd"</span><span class="p">,</span> <span class="n">seq_idx</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">theta</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-26'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-26'>#</a>
|
||
</div>
|
||
<p>පේළියසඳහා <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span></span></span></span></span> අපට ඇති පරිදි සංයුක්ත කරන්න</p>
|
||
<p><span ><span class="katex-display"><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:1.26202em;vertical-align:-0.5120199999999999em;"></span><span class="mopen">[</span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord"><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord"><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">...</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord"><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3448em;"><span style="top:-2.7287800000000004em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.5120199999999999em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord"><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord"><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">...</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord"><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3448em;"><span style="top:-2.7287800000000004em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.5120199999999999em;"><span></span></span></span></span></span></span><span class="mclose">]</span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">132</span> <span class="n">idx_theta2</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">idx_theta</span><span class="p">,</span> <span class="n">idx_theta</span><span class="p">),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-27'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-27'>#</a>
|
||
</div>
|
||
<p>ගණනයකරන්න <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord coloredeq eqf" style=""><span class="mop" style=""><span style="">c</span><span style="">o</span><span style="">s</span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span></span> සහ <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mop">sin</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span> fp32 </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">135</span> <span class="k">with</span> <span class="n">autocast</span><span class="p">(</span><span class="n">enabled</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||
<span class="lineno">136</span> <span class="n">idx_theta2</span> <span class="o">=</span> <span class="n">idx_theta2</span><span class="o">.</span><span class="n">float</span><span class="p">()</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-28'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-28'>#</a>
|
||
</div>
|
||
<p>හිසමානයක් එක් කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">138</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span> <span class="o">=</span> <span class="n">idx_theta2</span><span class="o">.</span><span class="n">cos</span><span class="p">()[:,</span> <span class="kc">None</span><span class="p">,</span> <span class="p">:]</span>
|
||
<span class="lineno">139</span> <span class="bp">self</span><span class="o">.</span><span class="n">sin_cached</span> <span class="o">=</span> <span class="n">idx_theta2</span><span class="o">.</span><span class="n">sin</span><span class="p">()[:,</span> <span class="kc">None</span><span class="p">,</span> <span class="p">:]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-29'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-29'>#</a>
|
||
</div>
|
||
<p>ඒවාහැඹිලිය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">142</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
||
<span class="lineno">143</span> <span class="bp">self</span><span class="o">.</span><span class="n">sin_cached</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sin_cached</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-30'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-30'>#</a>
|
||
</div>
|
||
<p>විශේෂාංගබෙදන්න. <code class="highlight"><span></span><span class="n">d_rope</span></code>
|
||
විශේෂාංග සඳහා පමණක් අපි කඹය යොදන්නෙමු </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">146</span> <span class="n">x_rope</span><span class="p">,</span> <span class="n">x_pass</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span><span class="bp">self</span><span class="o">.</span><span class="n">d_rope</span><span class="p">],</span> <span class="n">x</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">d_rope</span><span class="p">:]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-31'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-31'>#</a>
|
||
</div>
|
||
<p>හැඹිලියසිට පාපය සහ කෝස් අගයන් ලබා ගන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">149</span> <span class="n">cos</span><span class="p">,</span> <span class="n">sin</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cos_cached</span><span class="p">[</span><span class="n">offset</span><span class="p">:</span> <span class="n">seq_len</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">sin_cached</span><span class="p">[</span><span class="n">offset</span><span class="p">:</span> <span class="n">seq_len</span><span class="p">]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-32'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-32'>#</a>
|
||
</div>
|
||
<p>කඹයකාවැද්දීම්</p>
|
||
<span ><span class="katex-display"><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:3.42324em;vertical-align:-1.4616200000000001em;"></span><span class="mord"><span class="mtable"><span class="col-align-r"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.9616199999999997em;"><span style="top:-3.96162em;"><span class="pstrut" style="height:3.8116199999999996em;"></span><span class="mord"><span class="minner"><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class="mord"><span class="mtable"><span class="col-align-c"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.8116199999999998em;"><span style="top:-3.81162em;"><span class="pstrut" style="height:3.20162em;"></span><span class="mord"><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.0448em;"><span style="top:-2.5834080000000004em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight coloredeq eqn" style=""><span class="mord mathnormal mtight" style="">m</span></span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.11659199999999997em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mop">cos</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqh" style=""><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.20162em;"><span style="top:-2.883408em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight coloredeq eqn" style=""><span class="mord mathnormal mtight" style="">m</span></span></span></span><span style="top:-3.5856000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mbin mtight">+</span><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.11659199999999997em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mop">sin</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqh" style=""><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span><span style="top:-2.25em;"><span class="pstrut" style="height:3.20162em;"></span><span class="mord"><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.20162em;"><span style="top:-2.883408em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight coloredeq eqn" style=""><span class="mord mathnormal mtight" style="">m</span></span></span></span><span style="top:-3.5856000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mbin mtight">+</span><span class="mord mtight"><span class="mopen nulldelimiter sizing reset-size3 size6"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8800285714285714em;"><span style="top:-2.656em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.2255000000000003em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line mtight" style="border-bottom-width:0.049em;"></span></span><span style="top:-3.384em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.344em;"><span></span></span></span></span></span><span class="mclose nulldelimiter sizing reset-size3 size6"></span></span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.11659199999999997em;"><span></span></span></span></span></span></span><span class="mord coloredeq eqf" style=""><span class="mop" style=""><span style="">c</span><span style="">o</span><span style="">s</span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mord"><span class="mord mathnormal">x</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.0448em;"><span style="top:-2.5834080000000004em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight coloredeq eqn" style=""><span class="mord mathnormal mtight" style="">m</span></span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mathnormal mtight">i</span><span class="mclose mtight">)</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.11659199999999997em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mop">sin</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord coloredeq eqh" style=""><span class="mord coloredeq eqn" style=""><span class="mord mathnormal" style="">m</span></span><span class="mord coloredeq eqi" style=""><span class="mord" style=""><span class="mord coloredeq eqk" style=""><span class="mord mathnormal" style="margin-right:0.02778em">θ</span></span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight" style=""><span class="mord mathnormal mtight" style="">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:1.3116200000000002em;"><span></span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:1.4616200000000001em;"><span></span></span></span></span></span></span></span></span></span></span></span></span><p>සඳහා <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.69862em;vertical-align:-0.0391em;"></span><span class="mord mathnormal">i</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">∈</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1.2251079999999999em;vertical-align:-0.345em;"></span><span class="mord"><span class="mord">1</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">2</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">...</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.8801079999999999em;"><span style="top:-2.6550000000000002em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.394em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">d</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.345em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span></span></span> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">161</span> <span class="n">x_rope</span> <span class="o">=</span> <span class="p">(</span><span class="n">x_rope</span> <span class="o">*</span> <span class="n">cos</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rotate_half</span><span class="p">(</span><span class="n">x_rope</span><span class="p">)</span> <span class="o">*</span> <span class="n">sin</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-33'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-33'>#</a>
|
||
</div>
|
||
<p>කඹයකාවැද්දීම් ලබා නොගත් විශේෂාංග සමඟ සංයුක්ත වන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">164</span> <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x_rope</span><span class="p">,</span> <span class="n">x_pass</span><span class="p">),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-34'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-34'>#</a>
|
||
</div>
|
||
<h2>අවධානයස්ථරය</h2>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">167</span><span class="k">class</span> <span class="nc">AttentionLayer</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-35'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-35'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
කාවැද්දීම් වල විශේෂාංග ගණන </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_heads</span></code>
|
||
අවධානය යොමු ප්රධානීන් සංඛ්යාව </li>
|
||
<li><code class="highlight"><span></span><span class="n">rope_percentage</span></code>
|
||
කඹය කාවැද්දීම් එකතු කිරීම සඳහා විශේෂාංග ප්රතිශතය </li>
|
||
<li><code class="highlight"><span></span><span class="n">mask_fill</span></code>
|
||
අවධානය යොමු න්යාසය සඳහා ආවරණ පිරවුම් අගය</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">172</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">64</span><span class="p">,</span> <span class="n">rope_percentage</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.25</span><span class="p">,</span>
|
||
<span class="lineno">173</span> <span class="n">mask_fill</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">-</span><span class="mf">10_000.0</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-36'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-36'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">180</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
<span class="lineno">181</span>
|
||
<span class="lineno">182</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_heads</span> <span class="o">=</span> <span class="n">n_heads</span>
|
||
<span class="lineno">183</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_fill</span> <span class="o">=</span> <span class="n">mask_fill</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-37'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-37'>#</a>
|
||
</div>
|
||
<p>විමසුම, යතුර සහ වටිනාකම සඳහා රේඛීය ස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">186</span> <span class="bp">self</span><span class="o">.</span><span class="n">qkv_lin</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">,</span> <span class="n">n_hidden</span> <span class="o">*</span> <span class="mi">3</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-38'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-38'>#</a>
|
||
</div>
|
||
<p>අවසානරේඛීය ස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">188</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-39'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-39'>#</a>
|
||
</div>
|
||
<p>හිසකටවිශේෂාංග ගණන </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">191</span> <span class="n">d_k</span> <span class="o">=</span> <span class="n">n_hidden</span> <span class="o">//</span> <span class="n">n_heads</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-40'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-40'>#</a>
|
||
</div>
|
||
<p>කඹයකාවැද්දීම මොඩියුලය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">193</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span> <span class="o">=</span> <span class="n">RoPE</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">d_k</span> <span class="o">*</span> <span class="n">rope_percentage</span><span class="p">))</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-41'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-41'>#</a>
|
||
</div>
|
||
<p>අවධානයපරිමාණ සාධකය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">196</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">math</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">d_k</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-42'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-42'>#</a>
|
||
</div>
|
||
<p>හේතුආවරණ හැඹිලි කිරීමට </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">199</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span> <span class="o">=</span> <span class="kc">None</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-43'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-43'>#</a>
|
||
</div>
|
||
<p>අවධානයයොමු කරන්න සොෆ්ට්මැක්ස් මොඩියුලය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">202</span> <span class="bp">self</span><span class="o">.</span><span class="n">softmax</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Softmax</span><span class="p">(</span><span class="n">dim</span><span class="o">=-</span><span class="mi">2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-44'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-44'>#</a>
|
||
</div>
|
||
<h4>හේතුආවරණ ගණනය කරන්න</h4>
|
||
<ul><li><code class="highlight"><span></span><span class="n">attn</span></code>
|
||
හැඩයේ <a href="batch_size, query_seq_len, key_seq_len, n_heads">කාණ්ඩ_ප්රමාණය, විමර්_සෙක්_ලන්, යතුරු_සේක්_ලන්, එන්_හෙඩ්ස්</a>ඇත</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">204</span> <span class="k">def</span> <span class="nf">_get_mask</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">attn</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-45'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-45'>#</a>
|
||
</div>
|
||
<p>විමසුමසහ යතුරු දිග </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">212</span> <span class="n">nq</span><span class="p">,</span> <span class="n">nk</span> <span class="o">=</span> <span class="n">attn</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-46'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-46'>#</a>
|
||
</div>
|
||
<p>වෙස්මුහුණ සාදන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">215</span> <span class="k">if</span> <span class="p">(</span>
|
||
<span class="lineno">216</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span>
|
||
<span class="lineno">217</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">!=</span> <span class="n">nq</span> <span class="ow">or</span>
|
||
<span class="lineno">218</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="n">nk</span> <span class="ow">or</span>
|
||
<span class="lineno">219</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span><span class="o">.</span><span class="n">device</span> <span class="o">!=</span> <span class="n">attn</span><span class="o">.</span><span class="n">device</span>
|
||
<span class="lineno">220</span> <span class="p">):</span>
|
||
<span class="lineno">221</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">triu</span><span class="p">(</span><span class="n">attn</span><span class="o">.</span><span class="n">new_ones</span><span class="p">([</span><span class="n">nq</span><span class="p">,</span> <span class="n">nk</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">bool</span><span class="p">),</span> <span class="mi">1</span> <span class="o">+</span> <span class="n">nk</span> <span class="o">-</span> <span class="n">nq</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-47'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-47'>#</a>
|
||
</div>
|
||
<p>හැඹිලියසිට ආපසු </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">224</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">causal_mask</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:,</span> <span class="kc">None</span><span class="p">]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-48'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-48'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩය ඇත <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">]</span></code>
|
||
</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">226</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-49'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-49'>#</a>
|
||
</div>
|
||
<p>විමසුම, යතුර සහ වටිනාකම් කාවැද්දීම් ලබා ගන්න (සියල්ල සංයුක්ත කර ඇත). පසුගිය මානයක් ප්රමාණය n_hidden -> සිට වෙනස් වනු ඇත <code class="highlight"><span></span><span class="mi">3</span> <span class="n">x</span> <span class="n">n_hidden</span></code>
|
||
</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">232</span> <span class="n">qkv</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">qkv_lin</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-50'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-50'>#</a>
|
||
</div>
|
||
<p>හැඩයවෙනස් කිරීමෙන් හිස් වලට බෙදන්න <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">,</span> <span class="mi">3</span> <span class="o">*</span> <span class="n">d_k</span><span class="p">]</span></code>
|
||
</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">235</span> <span class="n">qkv</span> <span class="o">=</span> <span class="n">qkv</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">*</span><span class="n">qkv</span><span class="o">.</span><span class="n">shape</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_heads</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-51'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-51'>#</a>
|
||
</div>
|
||
<p>විමසුමටබෙදන්න, යතුර සහ හැඩය එක් එක් අගය කරන්න <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">,</span> <span class="mi">3</span> <span class="o">*</span> <span class="n">d_k</span><span class="p">]</span></code>
|
||
</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">237</span> <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">qkv</span><span class="p">,</span> <span class="n">qkv</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">//</span> <span class="mi">3</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-52'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-52'>#</a>
|
||
</div>
|
||
<p>අපිපෙර ටෝකන වල තත්වයන් හැඹිලි කරන්නේ නම් </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">240</span> <span class="k">if</span> <span class="n">get_cache</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'use_cache'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-53'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-53'>#</a>
|
||
</div>
|
||
<p>රාජ්යid ගේ ලබා ගන්න. අපි පෙර රාජ්යයන් ලබා ගැනීමට හා ඉදිරි රාජ්යයන් ගබඩා කිරීම සඳහා භාවිතා </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">242</span> <span class="n">prev_state_id</span><span class="p">,</span> <span class="n">next_state_id</span> <span class="o">=</span> <span class="n">get_cache</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'state_ids'</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-54'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-54'>#</a>
|
||
</div>
|
||
<p>හැඹිලියතිබේ නම් </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">244</span> <span class="k">if</span> <span class="n">prev_state_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-55'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-55'>#</a>
|
||
</div>
|
||
<p>අතීතයතුරු සහ අගයන් ලබා ගන්න. මේවාට හැඩය ඇත <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">prev_seq_len</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">,</span> <span class="n">d_k</span><span class="p">]</span></code>
|
||
</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">246</span> <span class="n">k_past</span><span class="p">,</span> <span class="n">v_past</span> <span class="o">=</span> <span class="n">get_cache</span><span class="p">()</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="sa">f</span><span class="s1">'attn_kv_</span><span class="si">{</span><span class="n">prev_state_id</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-56'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-56'>#</a>
|
||
</div>
|
||
<p>වත්මන්කාවැද්දීම් වල ඕෆ්සෙට් </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">248</span> <span class="n">offset</span> <span class="o">=</span> <span class="n">k_past</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-57'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-57'>#</a>
|
||
</div>
|
||
<p>කඹයකාවැද්දීම් එකතු කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">251</span> <span class="n">q</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">)</span>
|
||
<span class="lineno">252</span> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-58'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-58'>#</a>
|
||
</div>
|
||
<p>අතීතයසංයුක්ත කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">255</span> <span class="n">k</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">k_past</span><span class="p">,</span> <span class="n">k</span><span class="p">],</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="lineno">256</span> <span class="n">v</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">v_past</span><span class="p">,</span> <span class="n">v</span><span class="p">],</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="lineno">257</span> <span class="k">else</span><span class="p">:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-59'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-59'>#</a>
|
||
</div>
|
||
<p>කඹයකාවැද්දීම් එකතු කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">259</span> <span class="n">q</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span><span class="p">(</span><span class="n">q</span><span class="p">)</span>
|
||
<span class="lineno">260</span> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-60'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-60'>#</a>
|
||
</div>
|
||
<p>වත්මන්තත්වය සුරකින්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">263</span> <span class="n">get_cache</span><span class="p">()</span><span class="o">.</span><span class="n">push</span><span class="p">(</span><span class="sa">f</span><span class="s1">'attn_kv_</span><span class="si">{</span><span class="n">next_state_id</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span>
|
||
<span class="lineno">264</span> <span class="k">else</span><span class="p">:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-61'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-61'>#</a>
|
||
</div>
|
||
<p>හැඹිලියක්නැත - කඹය කාවැද්දීම් එකතු කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">266</span> <span class="n">q</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span><span class="p">(</span><span class="n">q</span><span class="p">)</span>
|
||
<span class="lineno">267</span> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rope</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-62'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-62'>#</a>
|
||
</div>
|
||
<p>අවධානයගණනය කිරීම සඳහා fp16 කිරීමට ස්වයංක්රීය-වාත්තු අක්රීය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">270</span> <span class="k">with</span> <span class="n">autocast</span><span class="p">(</span><span class="n">enabled</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||
<span class="lineno">271</span> <span class="k">if</span> <span class="n">q</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-63'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-63'>#</a>
|
||
</div>
|
||
<p>වත්මන්dtype fp16 නම් fp32 බවට පරිවර්තනය කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">273</span> <span class="n">attn</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">'bihk,bjhk->bijh'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">k</span><span class="o">.</span><span class="n">float</span><span class="p">())</span>
|
||
<span class="lineno">274</span> <span class="k">else</span><span class="p">:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-64'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-64'>#</a>
|
||
</div>
|
||
<p>bfloatසඳහා වාත්තු නොකරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">276</span> <span class="n">attn</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">'bihk,bjhk->bijh'</span><span class="p">,</span> <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-65'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-65'>#</a>
|
||
</div>
|
||
<p>පරිමාණඅවධානය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">279</span> <span class="n">attn</span> <span class="o">=</span> <span class="n">attn</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-66'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-66'>#</a>
|
||
</div>
|
||
<p>හේතුවෙස්මුහුණ ලබා ගන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">282</span> <span class="n">mask</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_mask</span><span class="p">(</span><span class="n">attn</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-67'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-67'>#</a>
|
||
</div>
|
||
<p>වෙස්යොදන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">284</span> <span class="n">attn</span><span class="o">.</span><span class="n">masked_fill_</span><span class="p">(</span><span class="n">mask</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_fill</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-68'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-68'>#</a>
|
||
</div>
|
||
<p>අවධානයසොෆ්ට්මැක්ස් </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">287</span> <span class="n">attn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">attn</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-69'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-69'>#</a>
|
||
</div>
|
||
<p>අවධානයබර තැබූ අගයන් ලබා ගන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">290</span> <span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">'bijh,bjhk->bihk'</span><span class="p">,</span> <span class="n">attn</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">v</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> <span class="n">v</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-70'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-70'>#</a>
|
||
</div>
|
||
<p><code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">,</span> <span class="n">d_k</span><span class="p">]</span> <span class="n">to</span></code>
|
||
<a href="batch_size, seq_len, n_hidden">Batch_size, seq_len, n_hidden</a>`වෙතින් නැවත සකස් කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">293</span> <span class="n">output</span> <span class="o">=</span> <span class="n">output</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="o">*</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-71'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-71'>#</a>
|
||
</div>
|
||
<p>අවසානරේඛීය ස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">296</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">output</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-72'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-72'>#</a>
|
||
</div>
|
||
<h2>ප්රතිපෝෂණජාලය</h2>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">299</span><span class="k">class</span> <span class="nc">FFNLayer</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-73'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-73'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
කාවැද්දීම ප්රමාණය වේ</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">304</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">,</span> <span class="n">d_ff</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-74'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-74'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">308</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
<span class="lineno">309</span>
|
||
<span class="lineno">310</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">d_ff</span><span class="p">:</span>
|
||
<span class="lineno">311</span> <span class="n">d_ff</span> <span class="o">=</span> <span class="n">n_hidden</span> <span class="o">*</span> <span class="mi">4</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-75'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-75'>#</a>
|
||
</div>
|
||
<p>පුළුල්රේඛීය ස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">314</span> <span class="bp">self</span><span class="o">.</span><span class="n">dense_h_h4</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">,</span> <span class="n">d_ff</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-76'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-76'>#</a>
|
||
</div>
|
||
<p>GELUසක්රිය කිරීම </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">316</span> <span class="bp">self</span><span class="o">.</span><span class="n">activation</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">GELU</span><span class="p">()</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-77'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-77'>#</a>
|
||
</div>
|
||
<p>සංකෝචනයරේඛීය ස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">318</span> <span class="bp">self</span><span class="o">.</span><span class="n">dense_h4_h</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">d_ff</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-78'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-78'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩය ඇත <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">]</span></code>
|
||
</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">320</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-79'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-79'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">324</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dense_h_h4</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||
<span class="lineno">325</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">activation</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||
<span class="lineno">326</span> <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dense_h4_h</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||
<span class="lineno">327</span>
|
||
<span class="lineno">328</span> <span class="k">return</span> <span class="n">x</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-80'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-80'>#</a>
|
||
</div>
|
||
<h2>ට්රාන්ස්ෆෝමර්ස්ථරය</h2>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">331</span><span class="k">class</span> <span class="nc">TransformerLayer</span><span class="p">(</span><span class="n">NeoXModule</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-81'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-81'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
කාවැද්දීම ප්රමාණය වේ </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_heads</span></code>
|
||
හිස් සංඛ්යාව වේ</li></ul>
|
||
<p><em>පිටතක්රියාත්මක කිරීම අතහැර දැමීම ඇතුළත් නොවේ</em>. </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">336</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">64</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-82'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-82'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">343</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-83'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-83'>#</a>
|
||
</div>
|
||
<p>අවධානයටපෙර ස්ථර සාමාන්යකරණය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">346</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_attn</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-84'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-84'>#</a>
|
||
</div>
|
||
<p>FFNට පෙර ස්ථර සාමාන්යකරණය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">348</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_ffn</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-85'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-85'>#</a>
|
||
</div>
|
||
<p>අවධානයස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">351</span> <span class="bp">self</span><span class="o">.</span><span class="n">attention</span> <span class="o">=</span> <span class="n">AttentionLayer</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-86'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-86'>#</a>
|
||
</div>
|
||
<p>FFNස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">353</span> <span class="bp">self</span><span class="o">.</span><span class="n">ffn</span> <span class="o">=</span> <span class="n">FFNLayer</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-87'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-87'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩයේ කාවැද්දීම් වේ <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">]</span></code>
|
||
</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">355</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-88'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-88'>#</a>
|
||
</div>
|
||
<p>අවශේෂසම්බන්ධතාවය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">361</span> <span class="n">residual</span> <span class="o">=</span> <span class="n">x</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-89'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-89'>#</a>
|
||
</div>
|
||
<p>නියෝක්ස්සමාන්තරව අවධානය සහ ප්රතිපෝෂණ ජාලය ක්රියාත්මක කරයි </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">363</span> <span class="n">attn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">attention</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_attn</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||
<span class="lineno">364</span> <span class="n">ffn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ffn</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_ffn</span><span class="p">(</span><span class="n">x</span><span class="p">))</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-90'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-90'>#</a>
|
||
</div>
|
||
<p>ඒවාසහ අවශේෂ සම්බන්ධතාවය එකතු කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">366</span> <span class="k">return</span> <span class="n">attn</span> <span class="o">+</span> <span class="n">ffn</span> <span class="o">+</span> <span class="n">residual</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-91'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-91'>#</a>
|
||
</div>
|
||
<p> මුරපොලපූරණය කිරීමට කේතය</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">368</span> <span class="k">def</span> <span class="nf">load_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">p1</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span> <span class="n">p2</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-92'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-92'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">372</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Load transformer layer'</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-93'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-93'>#</a>
|
||
</div>
|
||
<p>අවධානයයොමු ප්රතිදානය පරිණාමනය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">374</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_sum</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">output</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'attention.dense.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">375</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_1</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">output</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'attention.dense.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-94'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-94'>#</a>
|
||
</div>
|
||
<p>අවධානයවිමසුම, යතුර සහ අගය පරිවර්තනය කිරීම </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">378</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_0</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">qkv_lin</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'attention.query_key_value.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">379</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_0</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">qkv_lin</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'attention.query_key_value.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-95'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-95'>#</a>
|
||
</div>
|
||
<p>අවධානයටපෙර ස්ථර සම්මතය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">382</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_duplicate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_attn</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'input_layernorm.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">383</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_duplicate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_attn</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'input_layernorm.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-96'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-96'>#</a>
|
||
</div>
|
||
<p>FFNදෙවන පරිණාමනය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">386</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_0</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h_h4</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'mlp.dense_h_to_4h.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">387</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_0</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h_h4</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'mlp.dense_h_to_4h.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-97'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-97'>#</a>
|
||
</div>
|
||
<p>FFNපළමු පරිවර්තනය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">390</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_sum</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h4_h</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'mlp.dense_4h_to_h.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">391</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_1</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h4_h</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'mlp.dense_4h_to_h.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-98'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-98'>#</a>
|
||
</div>
|
||
<p>FFNට පෙර ස්ථර සම්මතය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">394</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_duplicate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_ffn</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'post_attention_layernorm.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">395</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_duplicate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_ln_ffn</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'post_attention_layernorm.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-99'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-99'>#</a>
|
||
</div>
|
||
<h2>අවසානසාමාන්යකරණ ස්තරය</h2>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">398</span><span class="k">class</span> <span class="nc">FinalNorm</span><span class="p">(</span><span class="n">NeoXModule</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-100'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-100'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
කාවැද්දීම ප්රමාණය වේ</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">403</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-101'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-101'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">407</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
<span class="lineno">408</span>
|
||
<span class="lineno">409</span> <span class="bp">self</span><span class="o">.</span><span class="n">ln</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-102'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-102'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩයේ කාවැද්දීම් වේ <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">]</span></code>
|
||
</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">411</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-103'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-103'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">415</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">ln</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-104'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-104'>#</a>
|
||
</div>
|
||
<p> මුරපොලපූරණය කිරීමට කේතය</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">417</span> <span class="k">def</span> <span class="nf">load_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">p1</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span> <span class="n">p2</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-105'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-105'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">421</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Load final normalization layer'</span><span class="p">):</span>
|
||
<span class="lineno">422</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_duplicate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ln</span><span class="o">.</span><span class="n">bias</span><span class="p">,</span> <span class="s1">'norm.bias'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span>
|
||
<span class="lineno">423</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_duplicate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ln</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'norm.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-106'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-106'>#</a>
|
||
</div>
|
||
<p> කියවීමේස්ථරය</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">426</span><span class="k">class</span> <span class="nc">ReadoutLayer</span><span class="p">(</span><span class="n">NeoXModule</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-107'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-107'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
කාවැද්දීම ප්රමාණය වේ </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_vocab</span></code>
|
||
වචන මාලාවේ ප්රමාණය වේ</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">431</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50_432</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-108'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-108'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">436</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
<span class="lineno">437</span>
|
||
<span class="lineno">438</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">n_hidden</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-109'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-109'>#</a>
|
||
</div>
|
||
<ul><li><code class="highlight"><span></span><span class="n">x</span></code>
|
||
හැඩයේ කාවැද්දීම් වේ <code class="highlight"><span></span><span class="p">[</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">]</span></code>
|
||
</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">440</span> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-110'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-110'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">444</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-111'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-111'>#</a>
|
||
</div>
|
||
<p> මුරපොලපූරණය කිරීමට කේතය</p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">446</span> <span class="k">def</span> <span class="nf">load_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">p1</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span> <span class="n">p2</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-112'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-112'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">450</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Load final linear layer'</span><span class="p">):</span>
|
||
<span class="lineno">451</span> <span class="n">checkpoint</span><span class="o">.</span><span class="n">merge_params_dim_0</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">linear</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="s1">'final_linear.weight'</span><span class="p">,</span> <span class="n">p1</span><span class="p">,</span> <span class="n">p2</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-113'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-113'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">454</span><span class="k">class</span> <span class="nc">LayerGenerator</span><span class="p">:</span>
|
||
<span class="lineno">455</span> <span class="n">pre_created_layers</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="n">NeoXModule</span><span class="p">]]</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-114'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-114'>#</a>
|
||
</div>
|
||
<h3>ස්ථරනිර්මාණය කිරීමට උත්පාදක යන්ත්රය</h3>
|
||
<p>ස්ථරජනනය කරනු ලබන්නේ මුරපොලවල් මෙන් එකම අනුපිළිවෙලකි. </p>
|
||
<p>ස්ථරයක්නොමැති <code class="highlight"><span></span><span class="kc">None</span></code>
|
||
විට එය ලබා දෙයි; අපි ස්ථර දර්ශක නියෝක්ස් ලෙස භාවිතා කරන අතර අපගේ ක්රියාත්මක කිරීමේදී අපට අවශ්ය නොවන පරිවර්තන ස්ථර දෙකක් තිබේ. </p>
|
||
<ul><li><code class="highlight"><span></span><span class="n">n_vocab</span></code>
|
||
යනු වචන මාලාවේ ටෝකන ගණන </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_hidden</span></code>
|
||
කාවැද්දීම් වල ඇති ලක්ෂණ ගණන </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_layers</span></code>
|
||
ට්රාන්ස්ෆෝමර් ස්ථර ගණන </li>
|
||
<li><code class="highlight"><span></span><span class="n">n_heads</span></code>
|
||
අවධානය යොමු ප්රධානීන් සංඛ්යාව වේ </li>
|
||
<li><code class="highlight"><span></span><span class="n">filter_layers</span></code>
|
||
භාවිතා කළ යුතු ස්ථර සමූහයයි. කිසිවක් නොමැති නම් සියලුම ස්ථර භාවිතා කරනු ඇත. අඩු ස්ථර සහිත ආකෘතියේ කුඩා අනුවාදයන් පරීක්ෂා කිරීමට මෙය භාවිතා </li>කරයි
|
||
<li><code class="highlight"><span></span><span class="n">is_clone_layers</span></code>
|
||
ට්රාන්ස්ෆෝමර් ස්ථර ක්ලෝන කළ යුතුද යන්න නියම කරයි (ටිකක් වේගවත්) </li>
|
||
<li><code class="highlight"><span></span><span class="n">dtype</span></code>
|
||
ආකෘතියේ දත්ත වර්ගයයි </li>
|
||
<li><code class="highlight"><span></span><span class="n">device</span></code>
|
||
ආකෘතියේ උපාංගය වේ </li>
|
||
<li><code class="highlight"><span></span><span class="n">is_llm_int8</span></code>
|
||
INT8 ප්රමාණකරණය භාවිතා කළ යුතුද යන්න නියම කරයි </li>
|
||
<li><code class="highlight"><span></span><span class="n">llm_int8_threshold</span></code>
|
||
යනු පිටත විශේෂාංග වෙන් කිරීම <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathnormal" style="margin-right:0.0037em;">α</span></span></span></span></span> සඳහා භාවිතා කරන එළිපත්ත</li></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">457</span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">n_vocab</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50_432</span><span class="p">,</span> <span class="n">n_hidden</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6_144</span><span class="p">,</span>
|
||
<span class="lineno">458</span> <span class="n">n_layers</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">44</span><span class="p">,</span> <span class="n">n_heads</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">64</span><span class="p">,</span>
|
||
<span class="lineno">459</span> <span class="n">filter_layers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Set</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
||
<span class="lineno">460</span> <span class="n">is_clone_layers</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
|
||
<span class="lineno">461</span> <span class="n">dtype</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">dtype</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span>
|
||
<span class="lineno">462</span> <span class="n">device</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cpu'</span><span class="p">),</span>
|
||
<span class="lineno">463</span> <span class="n">is_llm_int8</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
||
<span class="lineno">464</span> <span class="n">llm_int8_threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">6.0</span><span class="p">,</span>
|
||
<span class="lineno">465</span> <span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-115'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-115'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">486</span> <span class="k">if</span> <span class="n">filter_layers</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="lineno">487</span> <span class="n">filter_layers</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">n_layers</span> <span class="o">+</span> <span class="mi">3</span><span class="p">))</span>
|
||
<span class="lineno">488</span>
|
||
<span class="lineno">489</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_vocab</span> <span class="o">=</span> <span class="n">n_vocab</span>
|
||
<span class="lineno">490</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_hidden</span> <span class="o">=</span> <span class="n">n_hidden</span>
|
||
<span class="lineno">491</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_layers</span> <span class="o">=</span> <span class="n">n_layers</span>
|
||
<span class="lineno">492</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_heads</span> <span class="o">=</span> <span class="n">n_heads</span>
|
||
<span class="lineno">493</span> <span class="bp">self</span><span class="o">.</span><span class="n">filter_layers</span> <span class="o">=</span> <span class="n">filter_layers</span>
|
||
<span class="lineno">494</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_clone_layers</span> <span class="o">=</span> <span class="n">is_clone_layers</span>
|
||
<span class="lineno">495</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span> <span class="o">=</span> <span class="n">dtype</span>
|
||
<span class="lineno">496</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="n">device</span>
|
||
<span class="lineno">497</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_llm_int8</span> <span class="o">=</span> <span class="n">is_llm_int8</span>
|
||
<span class="lineno">498</span> <span class="bp">self</span><span class="o">.</span><span class="n">llm_int8_threshold</span> <span class="o">=</span> <span class="n">llm_int8_threshold</span>
|
||
<span class="lineno">499</span>
|
||
<span class="lineno">500</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_created_layers</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span>
|
||
<span class="lineno">501</span> <span class="n">transformer_layer</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||
<span class="lineno">502</span> <span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-116'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-116'>#</a>
|
||
</div>
|
||
<h4>භාවිතයසඳහා ස්තරය සකස් කරයි</h4>
|
||
<p>අපිස්තරය උපාංගය වෙත ගෙන ගොස් නිවැරදි දත්ත වර්ගයට පරිවර්තනය කරමු</p>
|
||
<ul><li><code class="highlight"><span></span><span class="n">layer</span></code>
|
||
සකස් කළ යුතු ස්ථරයයි </li>
|
||
<p>සකස්කළ ස්තරය<em>ආපසු ලබා දෙයි</em> </p></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">504</span> <span class="k">def</span> <span class="nf">_prepare_layer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">layer</span><span class="p">:</span> <span class="n">NeoXModule</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-117'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-117'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">513</span> <span class="k">return</span> <span class="n">layer</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-118'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-118'>#</a>
|
||
</div>
|
||
<p> <a id="post_load_prepare"></a></p>
|
||
<h3>පිරික්සුම්ස්ථානය පැටවීමෙන් පසු ස්ථර පරිවර්තනයන්</h3>
|
||
<p>මෙමශ්රිතය පිරික්සුම් ස්ථානය පැටවීමෙන් පසු ස්ථර පරිවර්තනයන් ක්රියාත්මක කරයි. </p>
|
||
<p>දැනටඑය අදාළ වන්නේ INT8 ප්රමාණකරණය පමණි. </p>
|
||
<ul><li><code class="highlight"><span></span><span class="n">layer</span></code>
|
||
සකස් කළ යුතු ස්ථරයයි </li>
|
||
<li><code class="highlight"><span></span><span class="n">is_llm_int8</span></code>
|
||
INT8 ප්රමාණකරණය භාවිතා කළ යුතුද යන්න නියම කරයි </li>
|
||
<li><code class="highlight"><span></span><span class="n">device</span></code>
|
||
ආකෘතියේ උපාංගය වේ </li>
|
||
<li><code class="highlight"><span></span><span class="n">llm_int8_threshold</span></code>
|
||
යනු පිටත විශේෂාංග වෙන් කිරීම <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathnormal" style="margin-right:0.0037em;">α</span></span></span></span></span> සඳහා භාවිතා කරන එළිපත්ත </li>
|
||
<p>සකස්කළ ස්තරය<em>ආපසු ලබා දෙයි</em> </p></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">515</span> <span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
|
||
<span class="lineno">516</span> <span class="k">def</span> <span class="nf">post_load_prepare</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">layer</span><span class="p">:</span> <span class="n">NeoXModule</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span>
|
||
<span class="lineno">517</span> <span class="n">is_llm_int8</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
||
<span class="lineno">518</span> <span class="n">device</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
||
<span class="lineno">519</span> <span class="n">llm_int8_threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
||
<span class="lineno">520</span> <span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-119'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-119'>#</a>
|
||
</div>
|
||
<p>නියමකර නොමැති නම් පෙරනිමි අගයන් ලබා ගන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">538</span> <span class="k">if</span> <span class="n">is_llm_int8</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="lineno">539</span> <span class="n">is_llm_int8</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_llm_int8</span>
|
||
<span class="lineno">540</span> <span class="k">if</span> <span class="n">device</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="lineno">541</span> <span class="n">device</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span>
|
||
<span class="lineno">542</span> <span class="k">if</span> <span class="n">llm_int8_threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="lineno">543</span> <span class="n">llm_int8_threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">llm_int8_threshold</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-120'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-120'>#</a>
|
||
</div>
|
||
<p>INT8ප්රමාණකරණය භාවිතා නොකරන්නේ නම් මඟ හරින්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">546</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_llm_int8</span><span class="p">:</span>
|
||
<span class="lineno">547</span> <span class="k">return</span> <span class="n">layer</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-121'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-121'>#</a>
|
||
</div>
|
||
<p>ට්රාන්ස්ෆෝමර්ස්ථර වල රේඛීය ස්ථර පමණක් පරිවර්තනය කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">550</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">TransformerLayer</span><span class="p">):</span>
|
||
<span class="lineno">551</span> <span class="k">return</span> <span class="n">layer</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-122'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-122'>#</a>
|
||
</div>
|
||
<p><a href="./utils/llm_int8.html">උපයෝගිතා</a>වල <code class="highlight"><span></span><span class="n">make_llm_int8_linear</span></code>
|
||
අර්ථ දක්වා ඇති භාවිතා කරන්න. </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">554</span> <span class="kn">from</span> <span class="nn">labml_nn.neox.utils.llm_int8</span> <span class="kn">import</span> <span class="n">make_llm_int8_linear</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-123'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-123'>#</a>
|
||
</div>
|
||
<p>රේඛීයස්ථර පරිවර්තනය කරන්න </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">557</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Convert to int8'</span><span class="p">):</span>
|
||
<span class="lineno">558</span> <span class="n">layer</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">make_llm_int8_linear</span><span class="p">(</span><span class="n">layer</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">output</span><span class="p">,</span>
|
||
<span class="lineno">559</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
|
||
<span class="lineno">560</span> <span class="n">threshold</span><span class="o">=</span><span class="n">llm_int8_threshold</span><span class="p">)</span>
|
||
<span class="lineno">561</span> <span class="n">layer</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">qkv_lin</span> <span class="o">=</span> <span class="n">make_llm_int8_linear</span><span class="p">(</span><span class="n">layer</span><span class="o">.</span><span class="n">attention</span><span class="o">.</span><span class="n">qkv_lin</span><span class="p">,</span>
|
||
<span class="lineno">562</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
|
||
<span class="lineno">563</span> <span class="n">threshold</span><span class="o">=</span><span class="n">llm_int8_threshold</span><span class="p">)</span>
|
||
<span class="lineno">564</span> <span class="n">layer</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h_h4</span> <span class="o">=</span> <span class="n">make_llm_int8_linear</span><span class="p">(</span><span class="n">layer</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h_h4</span><span class="p">,</span>
|
||
<span class="lineno">565</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
|
||
<span class="lineno">566</span> <span class="n">threshold</span><span class="o">=</span><span class="n">llm_int8_threshold</span><span class="p">)</span>
|
||
<span class="lineno">567</span> <span class="n">layer</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h4_h</span> <span class="o">=</span> <span class="n">make_llm_int8_linear</span><span class="p">(</span><span class="n">layer</span><span class="o">.</span><span class="n">ffn</span><span class="o">.</span><span class="n">dense_h4_h</span><span class="p">,</span>
|
||
<span class="lineno">568</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
|
||
<span class="lineno">569</span> <span class="n">threshold</span><span class="o">=</span><span class="n">llm_int8_threshold</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-124'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-124'>#</a>
|
||
</div>
|
||
<p> </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">571</span> <span class="k">return</span> <span class="n">layer</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-125'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-125'>#</a>
|
||
</div>
|
||
<h4>ස්තරයක්නිර්මාණය කර හැච් කරයි</h4>
|
||
<p>පරාමිතීන්ආරම්භ කිරීමට කාලය ගතවන නිසා හැඹිලි ස්ථර පිටපත් කිරීම නව ස්ථර ආරම්භ කිරීමට වඩා වේගවත් වේ. </p>
|
||
<ul><li><code class="highlight"><span></span><span class="n">name</span></code>
|
||
යනු ස්තරයේ නමයි </li>
|
||
<li><code class="highlight"><span></span><span class="n">creator</span></code>
|
||
ස්තරය නිර්මාණය කිරීමේ කාර්යයයි </li>
|
||
<p>සාදනලද ස්තරය හෝ කැච් ස්ථරයේ පිටපතක්<em>ආපසු ලබා දෙයි</em> </p></ul>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">573</span> <span class="k">def</span> <span class="nf">_create_and_cache_layer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">creator</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[],</span> <span class="n">NeoXModule</span><span class="p">]):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-126'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-126'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">585</span> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_clone_layers</span><span class="p">:</span>
|
||
<span class="lineno">586</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_prepare_layer</span><span class="p">(</span><span class="n">creator</span><span class="p">())</span>
|
||
<span class="lineno">587</span>
|
||
<span class="lineno">588</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_created_layers</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="lineno">589</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_created_layers</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_prepare_layer</span><span class="p">(</span><span class="n">creator</span><span class="p">())</span>
|
||
<span class="lineno">590</span>
|
||
<span class="lineno">591</span> <span class="n">layer</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pre_created_layers</span><span class="p">[</span><span class="n">name</span><span class="p">])</span>
|
||
<span class="lineno">592</span> <span class="k">return</span> <span class="n">layer</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-127'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-127'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">594</span> <span class="k">def</span> <span class="nf">_create_transformer_layer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="lineno">595</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_create_and_cache_layer</span><span class="p">(</span>
|
||
<span class="lineno">596</span> <span class="s1">'transformer_layer'</span><span class="p">,</span>
|
||
<span class="lineno">597</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">TransformerLayer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_hidden</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_heads</span><span class="p">)</span>
|
||
<span class="lineno">598</span> <span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-128'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-128'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">600</span> <span class="k">def</span> <span class="nf">_create_embedding_layer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="lineno">601</span> <span class="k">return</span> <span class="n">Embedding</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_vocab</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-129'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-129'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">603</span> <span class="k">def</span> <span class="nf">_create_final_norm_layer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="lineno">604</span> <span class="k">return</span> <span class="n">FinalNorm</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_hidden</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-130'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-130'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">606</span> <span class="k">def</span> <span class="nf">_create_readout_layer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="lineno">607</span> <span class="k">return</span> <span class="n">ReadoutLayer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_hidden</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_vocab</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-131'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-131'>#</a>
|
||
</div>
|
||
<h3>ස්ථරලබා ගැනීම සඳහා උත්පාදක යන්ත්රය</h3>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">609</span> <span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
|
||
<span class="lineno">610</span> <span class="k">def</span> <span class="nf">get_layers</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Generator</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">NeoXModule</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]],</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-132'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-132'>#</a>
|
||
</div>
|
||
<p>කාවැද්දීමස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">615</span> <span class="k">if</span> <span class="mi">0</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">filter_layers</span><span class="p">:</span>
|
||
<span class="lineno">616</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Embedding layer'</span><span class="p">):</span>
|
||
<span class="lineno">617</span> <span class="n">layer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_prepare_layer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_create_embedding_layer</span><span class="p">())</span>
|
||
<span class="lineno">618</span> <span class="k">yield</span> <span class="n">layer</span><span class="p">,</span> <span class="p">(</span><span class="s1">'layer_00-model_00-model_states.pt'</span><span class="p">,</span> <span class="s1">'layer_00-model_01-model_states.pt'</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-133'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-133'>#</a>
|
||
</div>
|
||
<p>ට්රාන්ස්ෆෝමර්ස්ථර </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">621</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_layers</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-134'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-134'>#</a>
|
||
</div>
|
||
<p>ට්රාන්ස්ෆෝමර්ස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">623</span> <span class="k">if</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">filter_layers</span><span class="p">:</span>
|
||
<span class="lineno">624</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Transformer Layer </span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
||
<span class="lineno">625</span> <span class="k">yield</span> <span class="bp">self</span><span class="o">.</span><span class="n">_create_transformer_layer</span><span class="p">(),</span> \
|
||
<span class="lineno">626</span> <span class="p">(</span><span class="sa">f</span><span class="s1">'layer_</span><span class="si">{</span><span class="n">i</span> <span class="o">+</span> <span class="mi">2</span> <span class="si">:</span><span class="s1">02d</span><span class="si">}</span><span class="s1">-model_00-model_states.pt'</span><span class="p">,</span>
|
||
<span class="lineno">627</span> <span class="sa">f</span><span class="s1">'layer_</span><span class="si">{</span><span class="n">i</span> <span class="o">+</span> <span class="mi">2</span> <span class="si">:</span><span class="s1">02d</span><span class="si">}</span><span class="s1">-model_01-model_states.pt'</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-135'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-135'>#</a>
|
||
</div>
|
||
<p>අවසානසාමාන්යකරණ ස්තරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">630</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_layers</span> <span class="o">+</span> <span class="mi">1</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">filter_layers</span><span class="p">:</span>
|
||
<span class="lineno">631</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Final norm layer'</span><span class="p">):</span>
|
||
<span class="lineno">632</span> <span class="n">layer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_prepare_layer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_create_final_norm_layer</span><span class="p">())</span>
|
||
<span class="lineno">633</span> <span class="k">yield</span> <span class="n">layer</span><span class="p">,</span> <span class="p">(</span><span class="s1">'layer_47-model_00-model_states.pt'</span><span class="p">,</span> <span class="s1">'layer_47-model_01-model_states.pt'</span><span class="p">)</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-136'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-136'>#</a>
|
||
</div>
|
||
<p>කියවීමේස්ථරය </p>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">636</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_layers</span> <span class="o">+</span> <span class="mi">2</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">filter_layers</span><span class="p">:</span>
|
||
<span class="lineno">637</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s1">'Readout layer'</span><span class="p">):</span>
|
||
<span class="lineno">638</span> <span class="n">layer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_prepare_layer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_create_readout_layer</span><span class="p">())</span>
|
||
<span class="lineno">639</span> <span class="k">yield</span> <span class="n">layer</span><span class="p">,</span> <span class="p">(</span><span class="s1">'layer_48-model_00-model_states.pt'</span><span class="p">,</span> <span class="s1">'layer_48-model_01-model_states.pt'</span><span class="p">)</span>
|
||
<span class="lineno">640</span>
|
||
<span class="lineno">641</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_created_layers</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
|
||
<span class="lineno">642</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_created_layers</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-137'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-137'>#</a>
|
||
</div>
|
||
<h3>මුළුස්ථර ගණන නැවත ලබා දෙයි</h3>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">644</span> <span class="nd">@property</span>
|
||
<span class="lineno">645</span> <span class="k">def</span> <span class="nf">total_layers</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-138'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-138'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">649</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_layers</span> <span class="o">+</span> <span class="mi">3</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-139'>
|
||
<div class='docs doc-strings'>
|
||
<div class='section-link'>
|
||
<a href='#section-139'>#</a>
|
||
</div>
|
||
<h3>ස්ථරපැටවීමට උත්පාදක යන්ත්රය</h3>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">651</span> <span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
|
||
<span class="lineno">652</span> <span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Generator</span><span class="p">[</span><span class="n">NeoXModule</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='section' id='section-140'>
|
||
<div class='docs'>
|
||
<div class='section-link'>
|
||
<a href='#section-140'>#</a>
|
||
</div>
|
||
|
||
</div>
|
||
<div class='code'>
|
||
<div class="highlight"><pre><span class="lineno">656</span> <span class="k">with</span> <span class="n">monit</span><span class="o">.</span><span class="n">section</span><span class="p">(</span><span class="s2">"Layers"</span><span class="p">):</span>
|
||
<span class="lineno">657</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">files</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_layers</span><span class="p">()):</span>
|
||
<span class="lineno">658</span> <span class="k">if</span> <span class="n">files</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="lineno">659</span> <span class="n">layer</span><span class="o">.</span><span class="n">load_state</span><span class="p">(</span><span class="o">*</span><span class="n">checkpoint</span><span class="o">.</span><span class="n">load_checkpoint_files</span><span class="p">(</span><span class="n">files</span><span class="p">))</span>
|
||
<span class="lineno">660</span>
|
||
<span class="lineno">661</span> <span class="n">layer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_load_prepare</span><span class="p">(</span><span class="n">layer</span><span class="p">)</span>
|
||
<span class="lineno">662</span>
|
||
<span class="lineno">663</span> <span class="n">monit</span><span class="o">.</span><span class="n">progress</span><span class="p">(</span><span class="nb">min</span><span class="p">(</span><span class="mf">0.99</span><span class="p">,</span> <span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">total_layers</span><span class="p">))</span>
|
||
<span class="lineno">664</span> <span class="k">yield</span> <span class="n">layer</span></pre></div>
|
||
</div>
|
||
</div>
|
||
<div class='footer'>
|
||
<a href="https://papers.labml.ai">Trending Research Papers</a>
|
||
<a href="https://labml.ai">labml.ai</a>
|
||
</div>
|
||
</div>
|
||
<script src=../interactive.js?v=1"></script>
|
||
<script>
|
||
function handleImages() {
|
||
var images = document.querySelectorAll('p>img')
|
||
|
||
for (var i = 0; i < images.length; ++i) {
|
||
handleImage(images[i])
|
||
}
|
||
}
|
||
|
||
function handleImage(img) {
|
||
img.parentElement.style.textAlign = 'center'
|
||
|
||
var modal = document.createElement('div')
|
||
modal.id = 'modal'
|
||
|
||
var modalContent = document.createElement('div')
|
||
modal.appendChild(modalContent)
|
||
|
||
var modalImage = document.createElement('img')
|
||
modalContent.appendChild(modalImage)
|
||
|
||
var span = document.createElement('span')
|
||
span.classList.add('close')
|
||
span.textContent = 'x'
|
||
modal.appendChild(span)
|
||
|
||
img.onclick = function () {
|
||
console.log('clicked')
|
||
document.body.appendChild(modal)
|
||
modalImage.src = img.src
|
||
}
|
||
|
||
span.onclick = function () {
|
||
document.body.removeChild(modal)
|
||
}
|
||
}
|
||
|
||
handleImages()
|
||
</script>
|
||
</body>
|
||
</html> |