diff --git a/labml_nn/transformers/mha.py b/labml_nn/transformers/mha.py index 846d4471..68ba05f3 100644 --- a/labml_nn/transformers/mha.py +++ b/labml_nn/transformers/mha.py @@ -65,7 +65,7 @@ class MultiHeadAttention(Module): This computes scaled multi-headed attention for given `query`, `key` and `value` vectors. - $$Attention(Q, K, V) = \underset{seq}{softmax}\Bigg(\frac{Q K^T}{\sqrt{d_k}}\Bigg)V$$ + $$Attention(Q, K, V) = \\underset{seq}{softmax}\Bigg(\frac{Q K^T}{\sqrt{d_k}}\Bigg)V$$ In simple terms, it finds keys that matches the query, and get the values of those keys. diff --git a/setup.py b/setup.py index c32a99c2..4ad00768 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("readme.md", "r") as f: setuptools.setup( name='labml_nn', - version='0.4.72', + version='0.4.73', author="Varuna Jayasiri, Nipun Wijerathne", author_email="vpjayasiri@gmail.com, hnipun@gmail.com", description="A collection of PyTorch implementations of neural network architectures and layers.",