diff --git a/docs/index.html b/docs/index.html index 1c500ed6..a6e0faf1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -90,7 +90,8 @@
  • MLP-Mixer: An all-MLP Architecture for Vision
  • Pay Attention to MLPs (gMLP)
  • Vision Transformer (ViT)
  • -
  • Primer EZ
  • +
  • Primer EZ
  • +
  • Hourglass
  • Recurrent Highway Networks

    LSTM

    HyperNetworks - HyperLSTM

    diff --git a/docs/transformers/index.html b/docs/transformers/index.html index dce7dcc4..33989c35 100644 --- a/docs/transformers/index.html +++ b/docs/transformers/index.html @@ -104,13 +104,15 @@

    This is an implementation of the paper An Image Is Worth 16x16 Words: Transformers For Image Recognition At Scale.

    Primer EZ

    This is an implementation of the paper Primer: Searching for Efficient Transformers for Language Modeling.

    +

    Hourglass

    +

    This is an implementation of the paper Hierarchical Transformers Are More Efficient Language Models

    -
    98from .configs import TransformerConfigs
    -99from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
    -100from .mha import MultiHeadAttention
    -101from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention
    +
    103from .configs import TransformerConfigs
    +104from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
    +105from .mha import MultiHeadAttention
    +106from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention