mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-30 02:08:50 +08:00
unoptimized adam
This commit is contained in:
@ -168,6 +168,7 @@ class Adam(GenericAdaptiveOptimizer):
|
||||
# Bias correction term for $\hat{v}_t$, $1 - \beta_2^t$
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
|
||||
# Whether to optimize the computation
|
||||
if self.optimized_update:
|
||||
# $\sqrt{v_t} + \hat{\epsilon}$
|
||||
denominator = v.sqrt().add_(group['eps'])
|
||||
@ -176,6 +177,7 @@ class Adam(GenericAdaptiveOptimizer):
|
||||
# $\theta_t \leftarrow \theta_{t-1} - \alpha \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \cdot
|
||||
# \frac{m_t}{\sqrt{v_t} + \hat{\epsilon}}$
|
||||
param.data.addcdiv_(m, denominator, value=-step_size)
|
||||
# Computation without optimization
|
||||
else:
|
||||
# $\frac{\sqrt{v_t}}{\sqrt{1-\beta_2^t}} + \epsilon$
|
||||
denominator = (v.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
|
||||
@ -185,7 +187,6 @@ class Adam(GenericAdaptiveOptimizer):
|
||||
# \frac{\hat{m}_t}{\sqrt{\hat{v}_t} + \epsilon}$
|
||||
param.data.addcdiv_(m, denominator, value=-step_size)
|
||||
|
||||
|
||||
def step_param(self, state: Dict[str, any], group: Dict[str, any], grad: torch.Tensor, param: torch.nn.Parameter):
|
||||
"""
|
||||
### Take an update step for a given parameter tensor
|
||||
|
||||
Reference in New Issue
Block a user