mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-14 09:31:42 +08:00
fix value pe double rotation
This commit is contained in:
@ -412,7 +412,7 @@ M834 80h400000v40h-400000z"></path></svg></span></span></span><span class="vlist
|
|||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div class='code'>
|
<div class='code'>
|
||||||
<div class="highlight"><pre><span class="lineno">234</span> <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s2">"ijbh,jbhd->ibhd"</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">value_rotary_pe</span><span class="p">(</span><span class="n">value</span><span class="p">))</span></pre></div>
|
<div class="highlight"><pre><span class="lineno">234</span> <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s2">"ijbh,jbhd->ibhd"</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span></pre></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class='section' id='section-21'>
|
<div class='section' id='section-21'>
|
||||||
|
@ -231,7 +231,7 @@ class RotaryValuePEMultiHeadAttention(RotaryPEMultiHeadAttention):
|
|||||||
|
|
||||||
# Multiply by values
|
# Multiply by values
|
||||||
# $$\underset{seq}{softmax}\Bigg(\frac{Q K^\top}{\sqrt{d_k}}\Bigg)V$$
|
# $$\underset{seq}{softmax}\Bigg(\frac{Q K^\top}{\sqrt{d_k}}\Bigg)V$$
|
||||||
x = torch.einsum("ijbh,jbhd->ibhd", attn, self.value_rotary_pe(value))
|
x = torch.einsum("ijbh,jbhd->ibhd", attn, value)
|
||||||
|
|
||||||
# Rotate in the opposite direction so that each embedding hold the relative positions
|
# Rotate in the opposite direction so that each embedding hold the relative positions
|
||||||
x = self.value_reverse_rotary_pe(x)
|
x = self.value_reverse_rotary_pe(x)
|
||||||
|
Reference in New Issue
Block a user