From 3a54a2099d46db7cc59daff64a7cadc41c77c2db Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Sat, 6 Nov 2021 14:29:38 +0530 Subject: [PATCH] links --- docs/index.html | 3 ++- docs/transformers/index.html | 10 ++++++---- labml_nn/__init__.py | 1 + labml_nn/transformers/__init__.py | 5 +++++ readme.md | 1 + 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/index.html b/docs/index.html index 1c500ed6..a6e0faf1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -90,7 +90,8 @@
  • MLP-Mixer: An all-MLP Architecture for Vision
  • Pay Attention to MLPs (gMLP)
  • Vision Transformer (ViT)
  • -
  • Primer EZ
  • +
  • Primer EZ
  • +
  • Hourglass
  • Recurrent Highway Networks

    LSTM

    HyperNetworks - HyperLSTM

    diff --git a/docs/transformers/index.html b/docs/transformers/index.html index dce7dcc4..33989c35 100644 --- a/docs/transformers/index.html +++ b/docs/transformers/index.html @@ -104,13 +104,15 @@

    This is an implementation of the paper An Image Is Worth 16x16 Words: Transformers For Image Recognition At Scale.

    Primer EZ

    This is an implementation of the paper Primer: Searching for Efficient Transformers for Language Modeling.

    +

    Hourglass

    +

    This is an implementation of the paper Hierarchical Transformers Are More Efficient Language Models

    -
    98from .configs import TransformerConfigs
    -99from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
    -100from .mha import MultiHeadAttention
    -101from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention
    +
    103from .configs import TransformerConfigs
    +104from .models import TransformerLayer, Encoder, Decoder, Generator, EncoderDecoder
    +105from .mha import MultiHeadAttention
    +106from labml_nn.transformers.xl.relative_mha import RelativeMultiHeadAttention