diff --git a/labml_nn/transformers/fast_weights/__init__.py b/labml_nn/transformers/fast_weights/__init__.py index 52d4bf3d..0324d4d3 100644 --- a/labml_nn/transformers/fast_weights/__init__.py +++ b/labml_nn/transformers/fast_weights/__init__.py @@ -181,7 +181,7 @@ class FastWeightsAttention(Module): The model first retrieves the current value $\bar{v}^{(i)}$ paired with the key $k^{(i)}$. Then stores a combination $v^{(i)}_{new}$ - of the retrieved value $\bar{v}^{̄(i)}$ and the input $v^{(i)}$. + of the retrieved value $\bar{v}^{(i)}$ and the input $v^{(i)}$. \begin{align} k^{(i)}, v^{(i)}, q^{(i)} &= diff --git a/labml_nn/transformers/positional_encoding.py b/labml_nn/transformers/positional_encoding.py index f974b096..fdcb99bc 100644 --- a/labml_nn/transformers/positional_encoding.py +++ b/labml_nn/transformers/positional_encoding.py @@ -50,7 +50,7 @@ def get_positional_encoding(d_model: int, max_len: int = 5000): position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1) # $2 * i$ two_i = torch.arange(0, d_model, 2, dtype=torch.float32) - # $10000^{\frac{2i}{d_{model}}$ + # $10000^{\frac{2i}{d_{model}}}$ div_term = torch.exp(two_i * -(math.log(10000.0) / d_model)) # $PE_{p,2i} = sin\Bigg(\frac{p}{10000^{\frac{2i}{d_{model}}}}\Bigg)$ encodings[:, 0::2] = torch.sin(position * div_term)