mirror of
				https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
				synced 2025-10-31 10:48:49 +08:00 
			
		
		
		
	hyper parameters
This commit is contained in:
		| @ -74,7 +74,7 @@ class Configs(NLPAutoRegressionConfigs): | |||||||
|     model: AutoregressiveTransformer |     model: AutoregressiveTransformer | ||||||
|  |  | ||||||
|     # Number of layers |     # Number of layers | ||||||
|     n_layers: int = 64 |     n_layers: int = 32 | ||||||
|  |  | ||||||
|     # $\alpha$ and $\beta$ for DeepNorm |     # $\alpha$ and $\beta$ for DeepNorm | ||||||
|     deep_norm_alpha: float |     deep_norm_alpha: float | ||||||
| @ -153,9 +153,13 @@ def main(): | |||||||
|         # Switch between training and validation for $10$ times per epoch |         # Switch between training and validation for $10$ times per epoch | ||||||
|         'inner_iterations': 10, |         'inner_iterations': 10, | ||||||
|  |  | ||||||
|  |         # Number of layers | ||||||
|  |         'n_layers': 50, | ||||||
|  |  | ||||||
|  |  | ||||||
|         # Adam optimizer with no warmup |         # Adam optimizer with no warmup | ||||||
|         'optimizer.optimizer': 'Adam', |         'optimizer.optimizer': 'Adam', | ||||||
|         'optimizer.learning_rate': 3e-4, |         'optimizer.learning_rate': 1.25e-4, | ||||||
|     }) |     }) | ||||||
|  |  | ||||||
|     # Set model(s) for saving and loading |     # Set model(s) for saving and loading | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	 Varuna Jayasiri
					Varuna Jayasiri