Files
Varuna Jayasiri dc4762161d Clean up LoRA
2024-08-02 15:32:02 +05:30

25 lines
724 B
Python

from labml.configs import BaseConfigs
class RWKVConfigs(BaseConfigs):
"""
## Transformer Configurations
This defines configurations for a transformer.
The configurations are calculate using option functions.
These are lazy loaded and therefore only the necessary modules
are calculated.
"""
# Number of attention heads
n_heads: int = 8
# Transformer embedding size
d_model: int = 512
# Number of layers
n_layers: int = 6
# Dropout probability
dropout: float = 0.1
# Number of tokens in the source vocabulary (for token embeddings)
n_src_vocab: int
# Number of tokens in the target vocabulary (to generate logits for prediction)
n_tgt_vocab: int