-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscheduler_test.py
128 lines (109 loc) · 4 KB
/
scheduler_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from timm.scheduler.scheduler import Scheduler
import math
import torch
import matplotlib.pyplot as plt
class CosineLRScheduler(Scheduler):
"""
Cosine decay with restarts.
This is described in the paper https://arxiv.org/abs/1608.03983.
Inspiration from
https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py
"""
def __init__(self,
optimizer: torch.optim.Optimizer,
t_initial: int,
t_mul: float = 1.,
lr_min: float = 0.,
decay_rate: float = 1.,
warmup_t=0,
warmup_lr_init=0,
warmup_prefix=False,
cycle_limit=0,
t_in_epochs=True,
initialize=True) -> None:
super().__init__(optimizer, param_group_field="lr", initialize=initialize)
assert t_initial > 0
assert lr_min >= 0
if t_initial == 1 and t_mul == 1 and decay_rate == 1:
print("Cosine annealing scheduler will have no effect on the learning "
"rate since t_initial = t_mul = eta_mul = 1.")
self.t_initial = t_initial
self.t_mul = t_mul
self.lr_min = lr_min
self.decay_rate = decay_rate
self.cycle_limit = cycle_limit
self.warmup_t = warmup_t
self.warmup_lr_init = warmup_lr_init
self.warmup_prefix = warmup_prefix
self.t_in_epochs = t_in_epochs
if self.warmup_t:
self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values]
super().update_groups(self.warmup_lr_init)
else:
self.warmup_steps = [1 for _ in self.base_values]
def _get_lr(self, t):
if t < self.warmup_t:
lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps]
else:
if self.warmup_prefix:
t = t - self.warmup_t
if self.t_mul != 1:
i = math.floor(math.log(1 - t / self.t_initial * (1 - self.t_mul), self.t_mul))
t_i = self.t_mul ** i * self.t_initial
t_curr = t - (1 - self.t_mul ** i) / (1 - self.t_mul) * self.t_initial
else:
i = t // self.t_initial
t_i = self.t_initial
t_curr = t - (self.t_initial * i)
gamma = self.decay_rate ** i
lr_min = self.lr_min * gamma
lr_max_values = [v * gamma for v in self.base_values]
if self.cycle_limit == 0 or (self.cycle_limit > 0 and i < self.cycle_limit):
lrs = [
lr_min + 0.5 * (lr_max - lr_min) * (1 + math.cos(math.pi * t_curr / t_i)) for lr_max in lr_max_values
]
else:
lrs = [self.lr_min for _ in self.base_values]
return lrs
def get_epoch_values(self, epoch: int):
if self.t_in_epochs:
return self._get_lr(epoch)
else:
return None
def get_update_values(self, num_updates: int):
if not self.t_in_epochs:
return self._get_lr(num_updates)
else:
return None
def get_cycle_length(self, cycles=0):
if not cycles:
cycles = self.cycle_limit
assert cycles > 0
if self.t_mul == 1.0:
return self.t_initial * cycles
else:
return int(math.floor(-self.t_initial * (self.t_mul ** cycles - 1) / (1 - self.t_mul)))
optimizer = torch.optim.SGD(params=[torch.tensor([1.0,2]), torch.tensor(2)],lr=1)
lr_scheduler = CosineLRScheduler(
optimizer,
t_initial=150,
t_mul=2,
lr_min=5e-1,
decay_rate=1,
warmup_lr_init=0,
warmup_t=0,
cycle_limit=1,
t_in_epochs=True,
)
num_epochs = lr_scheduler.get_cycle_length()
lr = []
T = 350
for t in range(T):
lr.append(lr_scheduler.get_epoch_values(t))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(list(range(T)), lr)
plt.show()
print(num_epochs)
print('Finished')
lr_scheduler.get_cycle_length()