mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-14 09:31:42 +08:00
25 lines
724 B
Python
25 lines
724 B
Python
from labml.configs import BaseConfigs
|
|
|
|
|
|
class RWKVConfigs(BaseConfigs):
|
|
"""
|
|
## Transformer Configurations
|
|
|
|
This defines configurations for a transformer.
|
|
The configurations are calculate using option functions.
|
|
These are lazy loaded and therefore only the necessary modules
|
|
are calculated.
|
|
"""
|
|
# Number of attention heads
|
|
n_heads: int = 8
|
|
# Transformer embedding size
|
|
d_model: int = 512
|
|
# Number of layers
|
|
n_layers: int = 6
|
|
# Dropout probability
|
|
dropout: float = 0.1
|
|
# Number of tokens in the source vocabulary (for token embeddings)
|
|
n_src_vocab: int
|
|
# Number of tokens in the target vocabulary (to generate logits for prediction)
|
|
n_tgt_vocab: int
|