mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-31 02:39:16 +08:00
anchors
This commit is contained in:
@ -44,9 +44,8 @@ from labml_nn.utils import clone_module_list
|
||||
|
||||
class ConvMixerLayer(Module):
|
||||
"""
|
||||
<a id="ConvMixerLayer">
|
||||
<a id="ConvMixerLayer"></a>
|
||||
## ConvMixer layer
|
||||
</a>
|
||||
|
||||
This is a single ConvMixer layer. The model will have a series of these.
|
||||
"""
|
||||
@ -100,9 +99,8 @@ class ConvMixerLayer(Module):
|
||||
|
||||
class PatchEmbeddings(Module):
|
||||
"""
|
||||
<a id="PatchEmbeddings">
|
||||
<a id="PatchEmbeddings"></a>
|
||||
## Get patch embeddings
|
||||
</a>
|
||||
|
||||
This splits the image into patches of size $p \times p$ and gives an embedding for each patch.
|
||||
"""
|
||||
@ -140,9 +138,8 @@ class PatchEmbeddings(Module):
|
||||
|
||||
class ClassificationHead(Module):
|
||||
"""
|
||||
<a id="ClassificationHead">
|
||||
<a id="ClassificationHead"></a>
|
||||
## Classification Head
|
||||
</a>
|
||||
|
||||
They do average pooling (taking the mean of all patch embeddings) and a final linear transformation
|
||||
to predict the log-probabilities of the image classes.
|
||||
|
||||
@ -23,9 +23,8 @@ from labml_nn.optimizers.configs import OptimizerConfigs
|
||||
|
||||
class MNISTConfigs(MNISTDatasetConfigs, TrainValidConfigs):
|
||||
"""
|
||||
<a id="MNISTConfigs">
|
||||
<a id="MNISTConfigs"></a>
|
||||
## Trainer configurations
|
||||
</a>
|
||||
"""
|
||||
|
||||
# Optimizer
|
||||
|
||||
@ -40,9 +40,8 @@ class CrossEntropyLoss(Module):
|
||||
|
||||
class NLPAutoRegressionConfigs(TrainValidConfigs):
|
||||
"""
|
||||
<a id="NLPAutoRegressionConfigs">
|
||||
<a id="NLPAutoRegressionConfigs"></a>
|
||||
## Trainer configurations
|
||||
</a>
|
||||
|
||||
This has the basic configurations for NLP auto-regressive task training.
|
||||
All the properties are configurable.
|
||||
|
||||
@ -28,9 +28,8 @@ from labml_nn.optimizers.configs import OptimizerConfigs
|
||||
|
||||
class NLPClassificationConfigs(TrainValidConfigs):
|
||||
"""
|
||||
<a id="NLPClassificationConfigs">
|
||||
<a id="NLPClassificationConfigs"></a>
|
||||
## Trainer configurations
|
||||
</a>
|
||||
|
||||
This has the basic configurations for NLP classification task training.
|
||||
All the properties are configurable.
|
||||
|
||||
@ -17,9 +17,8 @@ from labml_nn.optimizers.amsgrad import AMSGrad
|
||||
|
||||
class AdamWarmupCosineDecay(AMSGrad):
|
||||
"""
|
||||
<a id="EmbeddingsWithPositionalEncoding">
|
||||
<a id="EmbeddingsWithPositionalEncoding"></a>
|
||||
## Adam Optimizer with Warmup and Cosine Decay
|
||||
</a>
|
||||
|
||||
This class extends from AMSGrad optimizer defined in [`amsgrad.py`](amsgrad.html).
|
||||
"""
|
||||
|
||||
@ -17,9 +17,8 @@ from labml_nn.optimizers import WeightDecay
|
||||
|
||||
class OptimizerConfigs(BaseConfigs):
|
||||
"""
|
||||
<a id="OptimizerConfigs">
|
||||
<a id="OptimizerConfigs"></a>
|
||||
## Optimizer Configurations
|
||||
</a>
|
||||
"""
|
||||
|
||||
# Optimizer
|
||||
|
||||
@ -16,7 +16,10 @@ import numpy as np
|
||||
|
||||
class Game:
|
||||
"""
|
||||
## <a name="game-environment"></a>Game environment
|
||||
<a name="game-environment"></a>
|
||||
|
||||
## Game environment
|
||||
|
||||
This is a wrapper for OpenAI gym game environment.
|
||||
We do a few things here:
|
||||
|
||||
|
||||
@ -20,9 +20,8 @@ from .models import EmbeddingsWithPositionalEncoding, EmbeddingsWithLearnedPosit
|
||||
|
||||
class FeedForwardConfigs(BaseConfigs):
|
||||
"""
|
||||
<a id="FFN">
|
||||
<a id="FFN"></a>
|
||||
## FFN Configurations
|
||||
</a>
|
||||
|
||||
Creates a Position-wise FeedForward Network defined in
|
||||
[`feed_forward.py`](feed_forward.html).
|
||||
@ -143,10 +142,9 @@ aggregate(FeedForwardConfigs.glu_variant, 'SwiGLU',
|
||||
|
||||
class TransformerConfigs(BaseConfigs):
|
||||
"""
|
||||
<a id="TransformerConfigs">
|
||||
<a id="TransformerConfigs"></a>
|
||||
## Transformer Configurations
|
||||
</a>
|
||||
|
||||
|
||||
This defines configurations for a transformer.
|
||||
The configurations are calculate using option functions.
|
||||
These are lazy loaded and therefore only the necessary modules
|
||||
|
||||
@ -311,9 +311,8 @@ class FeedbackTransformer(Module):
|
||||
return self.norm(res)
|
||||
|
||||
|
||||
# <a id="shared_kv">
|
||||
# <a id="shared_kv"></a>
|
||||
# # Shared keys and values among layers
|
||||
# </a>
|
||||
|
||||
class StackFunction(torch.autograd.Function):
|
||||
"""
|
||||
|
||||
@ -29,9 +29,8 @@ from labml_helpers.module import Module
|
||||
|
||||
class PrepareForMultiHeadAttention(Module):
|
||||
"""
|
||||
<a id="PrepareMHA">
|
||||
<a id="PrepareMHA"></a>
|
||||
## Prepare for multi-head attention
|
||||
</a>
|
||||
|
||||
This module does a linear transformation and splits the vector into given
|
||||
number of heads for multi-head attention.
|
||||
@ -65,9 +64,8 @@ class PrepareForMultiHeadAttention(Module):
|
||||
|
||||
class MultiHeadAttention(Module):
|
||||
r"""
|
||||
<a id="MHA">
|
||||
<a id="MHA"></a>
|
||||
## Multi-Head Attention Module
|
||||
</a>
|
||||
|
||||
This computes scaled multi-headed attention for given `query`, `key` and `value` vectors.
|
||||
|
||||
|
||||
@ -22,9 +22,8 @@ from .positional_encoding import get_positional_encoding
|
||||
|
||||
class EmbeddingsWithPositionalEncoding(Module):
|
||||
"""
|
||||
<a id="EmbeddingsWithPositionalEncoding">
|
||||
<a id="EmbeddingsWithPositionalEncoding"></a>
|
||||
## Embed tokens and add [fixed positional encoding](positional_encoding.html)
|
||||
</a>
|
||||
"""
|
||||
|
||||
def __init__(self, d_model: int, n_vocab: int, max_len: int = 5000):
|
||||
@ -40,9 +39,8 @@ class EmbeddingsWithPositionalEncoding(Module):
|
||||
|
||||
class EmbeddingsWithLearnedPositionalEncoding(Module):
|
||||
"""
|
||||
<a id="EmbeddingsWithLearnedPositionalEncoding">
|
||||
<a id="EmbeddingsWithLearnedPositionalEncoding"></a>
|
||||
## Embed tokens and add parameterized positional encodings
|
||||
</a>
|
||||
"""
|
||||
|
||||
def __init__(self, d_model: int, n_vocab: int, max_len: int = 5000):
|
||||
@ -58,9 +56,8 @@ class EmbeddingsWithLearnedPositionalEncoding(Module):
|
||||
|
||||
class TransformerLayer(Module):
|
||||
"""
|
||||
<a id="TransformerLayer">
|
||||
<a id="TransformerLayer"></a>
|
||||
## Transformer Layer
|
||||
</a>
|
||||
|
||||
This can act as an encoder layer or a decoder layer.
|
||||
|
||||
@ -138,9 +135,8 @@ class TransformerLayer(Module):
|
||||
|
||||
class Encoder(Module):
|
||||
"""
|
||||
<a id="Encoder">
|
||||
<a id="Encoder"></a>
|
||||
## Transformer Encoder
|
||||
</a>
|
||||
"""
|
||||
|
||||
def __init__(self, layer: TransformerLayer, n_layers: int):
|
||||
@ -160,9 +156,8 @@ class Encoder(Module):
|
||||
|
||||
class Decoder(Module):
|
||||
"""
|
||||
<a id="Decoder">
|
||||
<a id="Decoder"></a>
|
||||
## Transformer Decoder
|
||||
</a>
|
||||
"""
|
||||
|
||||
def __init__(self, layer: TransformerLayer, n_layers: int):
|
||||
@ -182,9 +177,8 @@ class Decoder(Module):
|
||||
|
||||
class Generator(Module):
|
||||
"""
|
||||
<a id="Generator">
|
||||
<a id="Generator"></a>
|
||||
## Generator
|
||||
</a>
|
||||
|
||||
This predicts the tokens and gives the lof softmax of those.
|
||||
You don't need this if you are using `nn.CrossEntropyLoss`.
|
||||
@ -200,9 +194,8 @@ class Generator(Module):
|
||||
|
||||
class EncoderDecoder(Module):
|
||||
"""
|
||||
<a id="EncoderDecoder">
|
||||
<a id="EncoderDecoder"></a>
|
||||
## Combined Encoder-Decoder
|
||||
</a>
|
||||
"""
|
||||
|
||||
def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: Module, tgt_embed: Module, generator: Module):
|
||||
|
||||
@ -52,9 +52,8 @@ from labml_nn.utils import clone_module_list
|
||||
|
||||
class PatchEmbeddings(Module):
|
||||
"""
|
||||
<a id="PatchEmbeddings">
|
||||
<a id="PatchEmbeddings"></a>
|
||||
## Get patch embeddings
|
||||
</a>
|
||||
|
||||
The paper splits the image into patches of equal size and do a linear transformation
|
||||
on the flattened pixels for each patch.
|
||||
@ -93,9 +92,8 @@ class PatchEmbeddings(Module):
|
||||
|
||||
class LearnedPositionalEmbeddings(Module):
|
||||
"""
|
||||
<a id="LearnedPositionalEmbeddings">
|
||||
<a id="LearnedPositionalEmbeddings"></a>
|
||||
## Add parameterized positional encodings
|
||||
</a>
|
||||
|
||||
This adds learned positional embeddings to patch embeddings.
|
||||
"""
|
||||
@ -121,9 +119,8 @@ class LearnedPositionalEmbeddings(Module):
|
||||
|
||||
class ClassificationHead(Module):
|
||||
"""
|
||||
<a id="ClassificationHead">
|
||||
<a id="ClassificationHead"></a>
|
||||
## MLP Classification Head
|
||||
</a>
|
||||
|
||||
This is the two layer MLP head to classify the image based on `[CLS]` token embedding.
|
||||
"""
|
||||
|
||||
@ -5,9 +5,8 @@ from labml.configs import BaseConfigs, option
|
||||
|
||||
class TokenizerConfigs(BaseConfigs):
|
||||
"""
|
||||
<a id="OptimizerConfigs">
|
||||
## Optimizer Configurations
|
||||
</a>
|
||||
<a id="TokenizerConfigs"></a>
|
||||
## Tokenizer Configurations
|
||||
"""
|
||||
|
||||
tokenizer: Callable = 'character'
|
||||
|
||||
Reference in New Issue
Block a user