diff --git a/docs/graphs/gat/index.html b/docs/graphs/gat/index.html index 673796e9..ef133506 100644 --- a/docs/graphs/gat/index.html +++ b/docs/graphs/gat/index.html @@ -544,7 +544,7 @@ 39-84.5 73-119.5s73.7-60.2 119-75.5c6-2 9-5.7 9-11s-3-9-9-11c-45.3-15.3-85 -40.5-119-75.5s-58.3-74.8-73-119.5c-4.7-14-8.3-27.3-11-40-1.3-6.7-3.2-10.8-5.5 -12.5-2.3-1.7-7.5-2.5-15.5-2.5-14 0-21 3.7-21 11 0 2 2 10.3 6 25 20.7 83.3 67 - 151.7 139 205zm0 0v40h399900v-40z">,∥g2∥g2,∥g2∥g2
+We then normalize attention scores (or coefficients)
where is the set of nodes connected to .
We do this by setting unconnected to which makes for unconnected pairs.
diff --git a/docs/graphs/gatv2/index.html b/docs/graphs/gatv2/index.html index bd776238..fbd1f7b4 100644 --- a/docs/graphs/gatv2/index.html +++ b/docs/graphs/gatv2/index.html @@ -646,7 +646,7 @@ 39-84.5 73-119.5s73.7-60.2 119-75.5c6-2 9-5.7 9-11s-3-9-9-11c-45.3-15.3-85 -40.5-119-75.5s-58.3-74.8-73-119.5c-4.7-14-8.3-27.3-11-40-1.3-6.7-3.2-10.8-5.5 -12.5-2.3-1.7-7.5-2.5-15.5-2.5-14 0-21 3.7-21 11 0 2 2 10.3 6 25 20.7 83.3 67 - 151.7 139 205zm0 0v40h399900v-40z">,+gr2+gr2,+gr2+gr2 +We then normalize attention scores (or coefficients)
where is the set of nodes connected to .
We do this by setting unconnected to which makes for unconnected pairs.
diff --git a/docs/index.html b/docs/index.html index 833d9c0f..223bce20 100644 --- a/docs/index.html +++ b/docs/index.html @@ -78,6 +78,7 @@This implements Transformer XL model using relative multi-head attention
This implements Rotary Positional Embeddings (RoPE)
+This implements the Retrieval-Enhanced Transformer (RETRO).
This is an implementation of compressive transformer that extends upon Transformer XL by compressing oldest memories to give a longer attention span.
106from .configs import TransformerConfigs
-107from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
-108from .mha import MultiHeadAttention
-109from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention
109from .configs import TransformerConfigs
+110from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
+111from .mha import MultiHeadAttention
+112from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention