Source code for chemprop.schedulers

from torch.optim import Optimizer
from torch.optim.lr_scheduler import LambdaLR


[docs] def build_NoamLike_LRSched( optimizer: Optimizer, warmup_steps: int, cooldown_steps: int, init_lr: float, max_lr: float, final_lr: float, ): r"""Build a Noam-like learning rate scheduler which schedules the learning rate with a piecewise linear followed by an exponential decay. The learning rate increases linearly from ``init_lr`` to ``max_lr`` over the course of the first warmup_steps then decreases exponentially to ``final_lr`` over the course of the remaining ``total_steps - warmup_steps`` (where ``total_steps = total_epochs * steps_per_epoch``). This is roughly based on the learning rate schedule from [1]_, section 5.3. Formally, the learning rate schedule is defined as: .. math:: \mathtt{lr}(i) &= \begin{cases} \mathtt{init\_lr} + \delta \cdot i &\text{if } i < \mathtt{warmup\_steps} \\ \mathtt{max\_lr} \cdot \left( \frac{\mathtt{final\_lr}}{\mathtt{max\_lr}} \right)^{\gamma(i)} &\text{otherwise} \\ \end{cases} \\ \delta &\mathrel{:=} \frac{\mathtt{max\_lr} - \mathtt{init\_lr}}{\mathtt{warmup\_steps}} \\ \gamma(i) &\mathrel{:=} \frac{i - \mathtt{warmup\_steps}}{\mathtt{total\_steps} - \mathtt{warmup\_steps}} Parameters ----------- optimizer : Optimizer A PyTorch optimizer. warmup_steps : int The number of steps during which to linearly increase the learning rate. cooldown_steps : int The number of steps during which to exponential decay the learning rate. init_lr : float The initial learning rate. max_lr : float The maximum learning rate (achieved after ``warmup_steps``). final_lr : float The final learning rate (achieved after ``cooldown_steps``). References ---------- .. [1] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I. "Attention is all you need." Advances in neural information processing systems, 2017, 30. https://arxiv.org/abs/1706.03762 """ def lr_lambda(step: int): if step < warmup_steps: warmup_factor = (max_lr - init_lr) / warmup_steps return step * warmup_factor / init_lr + 1 elif warmup_steps <= step < warmup_steps + cooldown_steps: cooldown_factor = (final_lr / max_lr) ** (1 / cooldown_steps) return (max_lr * (cooldown_factor ** (step - warmup_steps))) / init_lr else: return final_lr / init_lr return LambdaLR(optimizer, lr_lambda)