# coding=utf-8
# Copyright 2021 The OneFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import oneflow as flow
logger = logging.getLogger(__name__)
[docs]def WarmupCosineLR(
optimizer: flow.optim.Optimizer,
max_iter: int,
warmup_factor: float,
warmup_iter: int,
alpha: float = 0.0,
warmup_method: str = "linear",
):
"""Create a schedule with a learning rate that decreases following
the values of the Cosine function between the initial lr set in the
optimizer to 0, after a warmup period during which it increases linearly
between 0 and the initial lr set in the optimizer.
Args:
optimizer (flow.optim.Optimizer): Wrapped optimizer.
max_iter (int): Total training iters.
warmup_factor (float): The warmup factor.
warmup_iter (int): The number of warmup steps.
alpha (float, optional): The learning rate scale factor (:math:`\\alpha`). Defaults to 0.0.
warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant".
In linear mode, the multiplication factor starts with warmup_factor in
the first epoch and then inreases linearly to reach 1. Defaults to "linear".
"""
cosine_decay_lr = flow.optim.lr_scheduler.CosineDecayLR(
optimizer, decay_steps=max_iter, alpha=alpha
)
if warmup_iter == 0:
logger.warning("warmup iters equals to zero, return CosineLR")
return cosine_decay_lr
elif warmup_iter > max_iter:
logger.warning("warmup iters is larger than the total training iters")
warmup_cosine_lr = flow.optim.lr_scheduler.WarmUpLR(
cosine_decay_lr,
warmup_factor=warmup_factor,
warmup_iters=warmup_iter,
warmup_method=warmup_method,
)
return warmup_cosine_lr
[docs]def WarmupCosineAnnealingLR(
optimizer: flow.optim.Optimizer,
max_iter: int,
warmup_factor: float,
warmup_iter: int,
eta_min: float = 0.0,
warmup_method: str = "linear",
):
"""Create a schedule with a learning rate that decreases following
the values of the Cosine Annealing function between the initial
lr set in the optimizer to 0, after a warmup period during which
it increases linearly between 0 and the initial lr set in the optimizer.
Args:
optimizer (flow.optim.Optimizer): Wrapped optimizer.
max_iter (int): Total training iters.
warmup_factor (float): The warmup factor.
warmup_iter (int): The number of warmup steps.
eta_min (float, optional): Minimum learning rate. Defaults to 0.0.
warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant".
In linear mode, the multiplication factor starts with warmup_factor in the first epoch
and then inreases linearly to reach 1. Defaults to "linear".
"""
cosine_annealing_lr = flow.optim.lr_scheduler.CosineAnnealingLR(
optimizer, T_max=max_iter, eta_min=eta_min
)
if warmup_iter == 0:
logger.warning("warmup iters equals to zero, return CosineAnnealingLR")
return cosine_annealing_lr
warmup_cosine_annealing_lr = flow.optim.lr_scheduler.WarmUpLR(
cosine_annealing_lr,
warmup_factor=warmup_factor,
warmup_iters=warmup_iter,
warmup_method=warmup_method,
)
return warmup_cosine_annealing_lr
def WarmupStepLR(
optimizer: flow.optim.Optimizer,
max_iter: int,
warmup_factor: float,
warmup_iter: int,
step_size: int,
gamma: float = 0.1,
warmup_method: str = "linear",
):
"""Create a schedule with a learning rate that decreases following the values of the Step
function between the initial lr set in the optimizer to 0, after a warmup period during which
it increases linearly between 0 and the initial lr set in the optimizer.
Args:
optimizer (flow.optim.Optimizer): Wrapped optimizer.
max_iter (int): Total training iters.
warmup_factor (float): The warmup factor.
warmup_iter (int): The number of warmup steps.
step_size (int): Period of learning rate decay.
gamma (float, optional): Multiplicative factor of learning rate decay. Defaults to 0.1.
warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant".
In linear mode, the multiplication factor starts with warmup_factor in the first
epoch and then inreases linearly to reach 1. Defaults to "linear".
"""
step_lr = flow.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
if warmup_iter == 0:
logger.warning("warmup iters equals to zero, return StepLR")
return step_lr
warmup_step_lr = flow.optim.lr_scheduler.WarmUpLR(
step_lr,
warmup_factor=warmup_factor,
warmup_iters=warmup_iter,
warmup_method=warmup_method,
)
return warmup_step_lr
[docs]def WarmupMultiStepLR(
optimizer: flow.optim.Optimizer,
max_iter: int,
warmup_factor: float,
warmup_iter: int,
milestones: list,
gamma: float = 0.1,
warmup_method: str = "linear",
):
"""Create a schedule with a learning rate that decreases following the values of the MultiStep
function between the initial lr set in the optimizer to 0, after a warmup period during which
it increases linearly between 0 and the initial lr set in the optimizer.
Args:
optimizer (flow.optim.Optimizer): Wrapped optimizer.
max_iter (int): Total training iters.
warmup_factor (float): The warmup factor.
warmup_iter (int): The number of warmup steps.
milestones (list): List of step indices. Must be increasing.
gamma (float, optional): Multiplicative factor of learning rate decay. Defaults to 0.1.
warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant".
In linear mode, the multiplication factor starts with warmup_factor in the first
epoch and then inreases linearly to reach 1. Defaults to "linear".
"""
multistep_lr = flow.optim.lr_scheduler.MultiStepLR(
optimizer, milestones=milestones, gamma=gamma
)
if warmup_iter == 0:
logger.warning("warmup iters equals to zero, return MultiStepLR")
return multistep_lr
warmup_multistep_lr = flow.optim.lr_scheduler.WarmUpLR(
multistep_lr,
warmup_factor=warmup_factor,
warmup_iters=warmup_iter,
warmup_method=warmup_method,
)
return warmup_multistep_lr
[docs]def WarmupExponentialLR(
optimizer: flow.optim.Optimizer,
max_iter: int,
gamma: float,
warmup_factor: float,
warmup_iter: int,
warmup_method: str = "linear",
):
"""Create a schedule with a learning rate that decreases following the values of
the Exponential function between the initial lr set in the optimizer to 0,
after a warmup period during which it increases linearly between 0 and the
initial lr set in the optimizer.
Args:
optimizer (flow.optim.Optimizer): Wrapped optimizer.
max_iter (int): Total training iters.
gamma (float): Multiplicative factor of learning rate decay.
warmup_factor (float): The warmup factor.
warmup_iter (int): The number of warmup steps.
warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant".
In linear mode, the multiplication factor starts with warmup_factor in the first epoch
and then inreases linearly to reach 1. Defaults to "linear".
"""
exponential_lr = flow.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)
if warmup_iter == 0:
logger.warning("warmup iters equals to zero, return ExponentialLR")
return exponential_lr
warmup_exponential_lr = flow.optim.lr_scheduler.WarmUpLR(
exponential_lr,
warmup_factor=warmup_factor,
warmup_iters=warmup_iter,
warmup_method=warmup_method,
)
return warmup_exponential_lr
[docs]def WarmupPolynomialLR(
optimizer: flow.optim.Optimizer,
max_iter: int,
warmup_factor: float,
warmup_iter: int,
end_learning_rate: float = 0.0001,
power: float = 1.0,
cycle: bool = False,
warmup_method: str = "linear",
):
"""Create a schedule with a learning rate that decreases as a polynomial decay from
the initial lr set in the optimizer to end lr defined by `lr_end`,
after a warmup period during which it increases linearly from 0 to the
initial lr set in the optimizer.
Args:
optimizer (flow.optim.Optimizer): Wrapped optimizer.
max_iter (int): Total training iters.
warmup_factor (float): The warmup factor.
warmup_iter (int): The number of warmup steps.
end_learning_rate (float, optional): The final learning rate. Defaults to 0.0001.
power (float, optional): The power of polynomial. Defaults to 1.0.
cycle (bool, optional): If cycle is True, the scheduler will decay the learning rate
every decay steps. Defaults to False.
warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant".
In linear mode, the multiplication factor starts with warmup_factor in the first
epoch and then inreases linearly to reach 1. Defaults to "linear".
"""
polynomial_lr = flow.optim.lr_scheduler.PolynomialLR(
optimizer,
decay_batch=max_iter,
end_learning_rate=end_learning_rate,
power=power,
cycle=cycle,
)
if warmup_iter == 0:
logger.warning("warmup iters equals to zero, return PolynomialLR")
return polynomial_lr
warmup_polynomial_lr = flow.optim.lr_scheduler.WarmUpLR(
polynomial_lr,
warmup_factor=warmup_factor,
warmup_iters=warmup_iter,
warmup_method=warmup_method,
)
return warmup_polynomial_lr