mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-30 02:08:50 +08:00
hyper parameters
This commit is contained in:
@ -74,7 +74,7 @@ class Configs(NLPAutoRegressionConfigs):
|
|||||||
model: AutoregressiveTransformer
|
model: AutoregressiveTransformer
|
||||||
|
|
||||||
# Number of layers
|
# Number of layers
|
||||||
n_layers: int = 64
|
n_layers: int = 32
|
||||||
|
|
||||||
# $\alpha$ and $\beta$ for DeepNorm
|
# $\alpha$ and $\beta$ for DeepNorm
|
||||||
deep_norm_alpha: float
|
deep_norm_alpha: float
|
||||||
@ -153,9 +153,13 @@ def main():
|
|||||||
# Switch between training and validation for $10$ times per epoch
|
# Switch between training and validation for $10$ times per epoch
|
||||||
'inner_iterations': 10,
|
'inner_iterations': 10,
|
||||||
|
|
||||||
|
# Number of layers
|
||||||
|
'n_layers': 50,
|
||||||
|
|
||||||
|
|
||||||
# Adam optimizer with no warmup
|
# Adam optimizer with no warmup
|
||||||
'optimizer.optimizer': 'Adam',
|
'optimizer.optimizer': 'Adam',
|
||||||
'optimizer.learning_rate': 3e-4,
|
'optimizer.learning_rate': 1.25e-4,
|
||||||
})
|
})
|
||||||
|
|
||||||
# Set model(s) for saving and loading
|
# Set model(s) for saving and loading
|
||||||
|
|||||||
Reference in New Issue
Block a user