This commit is contained in:
Varuna Jayasiri
2021-10-21 15:01:58 +05:30
parent d0c6b369fe
commit c5b13162cf
191 changed files with 908 additions and 80050 deletions

View File

@ -44,9 +44,8 @@ from labml_nn.utils import clone_module_list
class ConvMixerLayer(Module):
"""
<a id="ConvMixerLayer">
<a id="ConvMixerLayer"></a>
## ConvMixer layer
</a>
This is a single ConvMixer layer. The model will have a series of these.
"""
@ -100,9 +99,8 @@ class ConvMixerLayer(Module):
class PatchEmbeddings(Module):
"""
<a id="PatchEmbeddings">
<a id="PatchEmbeddings"></a>
## Get patch embeddings
</a>
This splits the image into patches of size $p \times p$ and gives an embedding for each patch.
"""
@ -140,9 +138,8 @@ class PatchEmbeddings(Module):
class ClassificationHead(Module):
"""
<a id="ClassificationHead">
<a id="ClassificationHead"></a>
## Classification Head
</a>
They do average pooling (taking the mean of all patch embeddings) and a final linear transformation
to predict the log-probabilities of the image classes.

View File

@ -23,9 +23,8 @@ from labml_nn.optimizers.configs import OptimizerConfigs
class MNISTConfigs(MNISTDatasetConfigs, TrainValidConfigs):
"""
<a id="MNISTConfigs">
<a id="MNISTConfigs"></a>
## Trainer configurations
</a>
"""
# Optimizer

View File

@ -40,9 +40,8 @@ class CrossEntropyLoss(Module):
class NLPAutoRegressionConfigs(TrainValidConfigs):
"""
<a id="NLPAutoRegressionConfigs">
<a id="NLPAutoRegressionConfigs"></a>
## Trainer configurations
</a>
This has the basic configurations for NLP auto-regressive task training.
All the properties are configurable.

View File

@ -28,9 +28,8 @@ from labml_nn.optimizers.configs import OptimizerConfigs
class NLPClassificationConfigs(TrainValidConfigs):
"""
<a id="NLPClassificationConfigs">
<a id="NLPClassificationConfigs"></a>
## Trainer configurations
</a>
This has the basic configurations for NLP classification task training.
All the properties are configurable.

View File

@ -17,9 +17,8 @@ from labml_nn.optimizers.amsgrad import AMSGrad
class AdamWarmupCosineDecay(AMSGrad):
"""
<a id="EmbeddingsWithPositionalEncoding">
<a id="EmbeddingsWithPositionalEncoding"></a>
## Adam Optimizer with Warmup and Cosine Decay
</a>
This class extends from AMSGrad optimizer defined in [`amsgrad.py`](amsgrad.html).
"""

View File

@ -17,9 +17,8 @@ from labml_nn.optimizers import WeightDecay
class OptimizerConfigs(BaseConfigs):
"""
<a id="OptimizerConfigs">
<a id="OptimizerConfigs"></a>
## Optimizer Configurations
</a>
"""
# Optimizer

View File

@ -16,7 +16,10 @@ import numpy as np
class Game:
"""
## <a name="game-environment"></a>Game environment
<a name="game-environment"></a>
## Game environment
This is a wrapper for OpenAI gym game environment.
We do a few things here:

View File

@ -20,9 +20,8 @@ from .models import EmbeddingsWithPositionalEncoding, EmbeddingsWithLearnedPosit
class FeedForwardConfigs(BaseConfigs):
"""
<a id="FFN">
<a id="FFN"></a>
## FFN Configurations
</a>
Creates a Position-wise FeedForward Network defined in
[`feed_forward.py`](feed_forward.html).
@ -143,10 +142,9 @@ aggregate(FeedForwardConfigs.glu_variant, 'SwiGLU',
class TransformerConfigs(BaseConfigs):
"""
<a id="TransformerConfigs">
<a id="TransformerConfigs"></a>
## Transformer Configurations
</a>
This defines configurations for a transformer.
The configurations are calculate using option functions.
These are lazy loaded and therefore only the necessary modules

View File

@ -311,9 +311,8 @@ class FeedbackTransformer(Module):
return self.norm(res)
# <a id="shared_kv">
# <a id="shared_kv"></a>
# # Shared keys and values among layers
# </a>
class StackFunction(torch.autograd.Function):
"""

View File

@ -29,9 +29,8 @@ from labml_helpers.module import Module
class PrepareForMultiHeadAttention(Module):
"""
<a id="PrepareMHA">
<a id="PrepareMHA"></a>
## Prepare for multi-head attention
</a>
This module does a linear transformation and splits the vector into given
number of heads for multi-head attention.
@ -65,9 +64,8 @@ class PrepareForMultiHeadAttention(Module):
class MultiHeadAttention(Module):
r"""
<a id="MHA">
<a id="MHA"></a>
## Multi-Head Attention Module
</a>
This computes scaled multi-headed attention for given `query`, `key` and `value` vectors.

View File

@ -22,9 +22,8 @@ from .positional_encoding import get_positional_encoding
class EmbeddingsWithPositionalEncoding(Module):
"""
<a id="EmbeddingsWithPositionalEncoding">
<a id="EmbeddingsWithPositionalEncoding"></a>
## Embed tokens and add [fixed positional encoding](positional_encoding.html)
</a>
"""
def __init__(self, d_model: int, n_vocab: int, max_len: int = 5000):
@ -40,9 +39,8 @@ class EmbeddingsWithPositionalEncoding(Module):
class EmbeddingsWithLearnedPositionalEncoding(Module):
"""
<a id="EmbeddingsWithLearnedPositionalEncoding">
<a id="EmbeddingsWithLearnedPositionalEncoding"></a>
## Embed tokens and add parameterized positional encodings
</a>
"""
def __init__(self, d_model: int, n_vocab: int, max_len: int = 5000):
@ -58,9 +56,8 @@ class EmbeddingsWithLearnedPositionalEncoding(Module):
class TransformerLayer(Module):
"""
<a id="TransformerLayer">
<a id="TransformerLayer"></a>
## Transformer Layer
</a>
This can act as an encoder layer or a decoder layer.
@ -138,9 +135,8 @@ class TransformerLayer(Module):
class Encoder(Module):
"""
<a id="Encoder">
<a id="Encoder"></a>
## Transformer Encoder
</a>
"""
def __init__(self, layer: TransformerLayer, n_layers: int):
@ -160,9 +156,8 @@ class Encoder(Module):
class Decoder(Module):
"""
<a id="Decoder">
<a id="Decoder"></a>
## Transformer Decoder
</a>
"""
def __init__(self, layer: TransformerLayer, n_layers: int):
@ -182,9 +177,8 @@ class Decoder(Module):
class Generator(Module):
"""
<a id="Generator">
<a id="Generator"></a>
## Generator
</a>
This predicts the tokens and gives the lof softmax of those.
You don't need this if you are using `nn.CrossEntropyLoss`.
@ -200,9 +194,8 @@ class Generator(Module):
class EncoderDecoder(Module):
"""
<a id="EncoderDecoder">
<a id="EncoderDecoder"></a>
## Combined Encoder-Decoder
</a>
"""
def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: Module, tgt_embed: Module, generator: Module):

View File

@ -52,9 +52,8 @@ from labml_nn.utils import clone_module_list
class PatchEmbeddings(Module):
"""
<a id="PatchEmbeddings">
<a id="PatchEmbeddings"></a>
## Get patch embeddings
</a>
The paper splits the image into patches of equal size and do a linear transformation
on the flattened pixels for each patch.
@ -93,9 +92,8 @@ class PatchEmbeddings(Module):
class LearnedPositionalEmbeddings(Module):
"""
<a id="LearnedPositionalEmbeddings">
<a id="LearnedPositionalEmbeddings"></a>
## Add parameterized positional encodings
</a>
This adds learned positional embeddings to patch embeddings.
"""
@ -121,9 +119,8 @@ class LearnedPositionalEmbeddings(Module):
class ClassificationHead(Module):
"""
<a id="ClassificationHead">
<a id="ClassificationHead"></a>
## MLP Classification Head
</a>
This is the two layer MLP head to classify the image based on `[CLS]` token embedding.
"""

View File

@ -5,9 +5,8 @@ from labml.configs import BaseConfigs, option
class TokenizerConfigs(BaseConfigs):
"""
<a id="OptimizerConfigs">
## Optimizer Configurations
</a>
<a id="TokenizerConfigs"></a>
## Tokenizer Configurations
"""
tokenizer: Callable = 'character'