diff --git a/docs/index.html b/docs/index.html index 1c500ed6..a6e0faf1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -90,7 +90,8 @@
This is an implementation of the paper An Image Is Worth 16x16 Words: Transformers For Image Recognition At Scale.
This is an implementation of the paper Primer: Searching for Efficient Transformers for Language Modeling.
+This is an implementation of the paper Hierarchical Transformers Are More Efficient Language Models
98from .configs import TransformerConfigs
-99from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
-100from .mha import MultiHeadAttention
-101from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention
103from .configs import TransformerConfigs
+104from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
+105from .mha import MultiHeadAttention
+106from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention