wavedl 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wavedl/__init__.py +1 -1
- wavedl/hpo.py +451 -451
- wavedl/{hpc.py → launcher.py} +135 -61
- wavedl/models/__init__.py +28 -0
- wavedl/models/{_timm_utils.py → _pretrained_utils.py} +128 -0
- wavedl/models/base.py +48 -0
- wavedl/models/caformer.py +1 -1
- wavedl/models/cnn.py +2 -27
- wavedl/models/convnext.py +5 -18
- wavedl/models/convnext_v2.py +6 -22
- wavedl/models/densenet.py +5 -18
- wavedl/models/efficientnetv2.py +315 -315
- wavedl/models/efficientvit.py +398 -0
- wavedl/models/fastvit.py +6 -39
- wavedl/models/mamba.py +44 -24
- wavedl/models/maxvit.py +51 -48
- wavedl/models/mobilenetv3.py +295 -295
- wavedl/models/regnet.py +406 -406
- wavedl/models/resnet.py +14 -56
- wavedl/models/resnet3d.py +258 -258
- wavedl/models/swin.py +443 -443
- wavedl/models/tcn.py +393 -409
- wavedl/models/unet.py +1 -5
- wavedl/models/unireplknet.py +491 -0
- wavedl/models/vit.py +3 -3
- wavedl/train.py +1427 -1430
- wavedl/utils/config.py +367 -367
- wavedl/utils/cross_validation.py +530 -530
- wavedl/utils/losses.py +216 -216
- wavedl/utils/optimizers.py +216 -216
- wavedl/utils/schedulers.py +251 -251
- {wavedl-1.6.0.dist-info → wavedl-1.6.2.dist-info}/METADATA +150 -113
- wavedl-1.6.2.dist-info/RECORD +46 -0
- {wavedl-1.6.0.dist-info → wavedl-1.6.2.dist-info}/entry_points.txt +2 -2
- wavedl-1.6.0.dist-info/RECORD +0 -44
- {wavedl-1.6.0.dist-info → wavedl-1.6.2.dist-info}/LICENSE +0 -0
- {wavedl-1.6.0.dist-info → wavedl-1.6.2.dist-info}/WHEEL +0 -0
- {wavedl-1.6.0.dist-info → wavedl-1.6.2.dist-info}/top_level.txt +0 -0
wavedl/utils/schedulers.py
CHANGED
|
@@ -1,251 +1,251 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Learning Rate Schedulers
|
|
3
|
-
========================
|
|
4
|
-
|
|
5
|
-
Provides a comprehensive set of learning rate schedulers with a factory
|
|
6
|
-
function for easy selection via CLI arguments.
|
|
7
|
-
|
|
8
|
-
Supported Schedulers:
|
|
9
|
-
- plateau: ReduceLROnPlateau (default, adaptive)
|
|
10
|
-
- cosine: CosineAnnealingLR
|
|
11
|
-
- cosine_restarts: CosineAnnealingWarmRestarts
|
|
12
|
-
- onecycle: OneCycleLR
|
|
13
|
-
- step: StepLR
|
|
14
|
-
- multistep: MultiStepLR
|
|
15
|
-
- exponential: ExponentialLR
|
|
16
|
-
- linear_warmup: LinearLR (warmup phase)
|
|
17
|
-
|
|
18
|
-
Author: Ductho Le (ductho.le@outlook.com)
|
|
19
|
-
Version: 1.0.0
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
import torch.optim as optim
|
|
23
|
-
from torch.optim.lr_scheduler import (
|
|
24
|
-
CosineAnnealingLR,
|
|
25
|
-
CosineAnnealingWarmRestarts,
|
|
26
|
-
ExponentialLR,
|
|
27
|
-
LinearLR,
|
|
28
|
-
LRScheduler,
|
|
29
|
-
MultiStepLR,
|
|
30
|
-
OneCycleLR,
|
|
31
|
-
ReduceLROnPlateau,
|
|
32
|
-
SequentialLR,
|
|
33
|
-
StepLR,
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# ==============================================================================
|
|
38
|
-
# SCHEDULER REGISTRY
|
|
39
|
-
# ==============================================================================
|
|
40
|
-
def list_schedulers() -> list[str]:
|
|
41
|
-
"""
|
|
42
|
-
Return list of available scheduler names.
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
List of registered scheduler names
|
|
46
|
-
"""
|
|
47
|
-
return [
|
|
48
|
-
"plateau",
|
|
49
|
-
"cosine",
|
|
50
|
-
"cosine_restarts",
|
|
51
|
-
"onecycle",
|
|
52
|
-
"step",
|
|
53
|
-
"multistep",
|
|
54
|
-
"exponential",
|
|
55
|
-
"linear_warmup",
|
|
56
|
-
]
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def get_scheduler(
|
|
60
|
-
name: str,
|
|
61
|
-
optimizer: optim.Optimizer,
|
|
62
|
-
# Common parameters
|
|
63
|
-
epochs: int = 100,
|
|
64
|
-
steps_per_epoch: int | None = None,
|
|
65
|
-
min_lr: float = 1e-6,
|
|
66
|
-
# ReduceLROnPlateau parameters
|
|
67
|
-
patience: int = 10,
|
|
68
|
-
factor: float = 0.5,
|
|
69
|
-
# Cosine parameters
|
|
70
|
-
T_max: int | None = None,
|
|
71
|
-
T_0: int = 10,
|
|
72
|
-
T_mult: int = 2,
|
|
73
|
-
# OneCycleLR parameters
|
|
74
|
-
max_lr: float | None = None,
|
|
75
|
-
pct_start: float = 0.3,
|
|
76
|
-
# Step/MultiStep parameters
|
|
77
|
-
step_size: int = 30,
|
|
78
|
-
milestones: list[int] | None = None,
|
|
79
|
-
gamma: float = 0.1,
|
|
80
|
-
# Linear warmup parameters
|
|
81
|
-
warmup_epochs: int = 5,
|
|
82
|
-
start_factor: float = 0.1,
|
|
83
|
-
**kwargs,
|
|
84
|
-
) -> LRScheduler:
|
|
85
|
-
"""
|
|
86
|
-
Factory function to create learning rate scheduler by name.
|
|
87
|
-
|
|
88
|
-
Args:
|
|
89
|
-
name: Scheduler name (see list_schedulers())
|
|
90
|
-
optimizer: Optimizer instance to schedule
|
|
91
|
-
epochs: Total training epochs (for cosine, onecycle)
|
|
92
|
-
steps_per_epoch: Steps per epoch (required for onecycle)
|
|
93
|
-
min_lr: Minimum learning rate (eta_min for cosine)
|
|
94
|
-
patience: Patience for ReduceLROnPlateau
|
|
95
|
-
factor: Reduction factor for plateau/step
|
|
96
|
-
T_max: Period for CosineAnnealingLR (default: epochs)
|
|
97
|
-
T_0: Initial period for CosineAnnealingWarmRestarts
|
|
98
|
-
T_mult: Period multiplier for warm restarts
|
|
99
|
-
max_lr: Maximum LR for OneCycleLR (default: optimizer's initial LR)
|
|
100
|
-
pct_start: Percentage of cycle spent increasing LR (OneCycleLR)
|
|
101
|
-
step_size: Period for StepLR
|
|
102
|
-
milestones: Epochs to decay LR for MultiStepLR
|
|
103
|
-
gamma: Decay factor for step/multistep/exponential
|
|
104
|
-
warmup_epochs: Number of warmup epochs for linear_warmup
|
|
105
|
-
start_factor: Starting LR factor for warmup (LR * start_factor)
|
|
106
|
-
**kwargs: Additional arguments passed to scheduler
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
Instantiated learning rate scheduler
|
|
110
|
-
|
|
111
|
-
Raises:
|
|
112
|
-
ValueError: If scheduler name is not recognized
|
|
113
|
-
|
|
114
|
-
Example:
|
|
115
|
-
>>> scheduler = get_scheduler("plateau", optimizer, patience=15)
|
|
116
|
-
>>> scheduler = get_scheduler("cosine", optimizer, epochs=100)
|
|
117
|
-
>>> scheduler = get_scheduler(
|
|
118
|
-
... "onecycle", optimizer, epochs=100, steps_per_epoch=1000, max_lr=1e-3
|
|
119
|
-
... )
|
|
120
|
-
"""
|
|
121
|
-
name_lower = name.lower().replace("-", "_")
|
|
122
|
-
|
|
123
|
-
# Get initial LR from optimizer
|
|
124
|
-
base_lr = optimizer.param_groups[0]["lr"]
|
|
125
|
-
|
|
126
|
-
if name_lower == "plateau":
|
|
127
|
-
return ReduceLROnPlateau(
|
|
128
|
-
optimizer,
|
|
129
|
-
mode="min",
|
|
130
|
-
factor=factor,
|
|
131
|
-
patience=patience,
|
|
132
|
-
min_lr=min_lr,
|
|
133
|
-
**kwargs,
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
elif name_lower == "cosine":
|
|
137
|
-
return CosineAnnealingLR(
|
|
138
|
-
optimizer,
|
|
139
|
-
T_max=T_max if T_max is not None else epochs,
|
|
140
|
-
eta_min=min_lr,
|
|
141
|
-
**kwargs,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
elif name_lower == "cosine_restarts":
|
|
145
|
-
return CosineAnnealingWarmRestarts(
|
|
146
|
-
optimizer, T_0=T_0, T_mult=T_mult, eta_min=min_lr, **kwargs
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
elif name_lower == "onecycle":
|
|
150
|
-
if steps_per_epoch is None:
|
|
151
|
-
raise ValueError(
|
|
152
|
-
"OneCycleLR requires 'steps_per_epoch'. "
|
|
153
|
-
"Pass len(train_dataloader) as steps_per_epoch."
|
|
154
|
-
)
|
|
155
|
-
return OneCycleLR(
|
|
156
|
-
optimizer,
|
|
157
|
-
max_lr=max_lr if max_lr is not None else base_lr,
|
|
158
|
-
epochs=epochs,
|
|
159
|
-
steps_per_epoch=steps_per_epoch,
|
|
160
|
-
pct_start=pct_start,
|
|
161
|
-
**kwargs,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
elif name_lower == "step":
|
|
165
|
-
return StepLR(optimizer, step_size=step_size, gamma=gamma, **kwargs)
|
|
166
|
-
|
|
167
|
-
elif name_lower == "multistep":
|
|
168
|
-
if milestones is None:
|
|
169
|
-
# Default milestones at 30%, 60%, 90% of epochs
|
|
170
|
-
milestones = [int(epochs * 0.3), int(epochs * 0.6), int(epochs * 0.9)]
|
|
171
|
-
return MultiStepLR(optimizer, milestones=milestones, gamma=gamma, **kwargs)
|
|
172
|
-
|
|
173
|
-
elif name_lower == "exponential":
|
|
174
|
-
return ExponentialLR(optimizer, gamma=gamma, **kwargs)
|
|
175
|
-
|
|
176
|
-
elif name_lower == "linear_warmup":
|
|
177
|
-
return LinearLR(
|
|
178
|
-
optimizer,
|
|
179
|
-
start_factor=start_factor,
|
|
180
|
-
end_factor=1.0,
|
|
181
|
-
total_iters=warmup_epochs,
|
|
182
|
-
**kwargs,
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
else:
|
|
186
|
-
available = ", ".join(list_schedulers())
|
|
187
|
-
raise ValueError(f"Unknown scheduler: '{name}'. Available options: {available}")
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def get_scheduler_with_warmup(
|
|
191
|
-
name: str,
|
|
192
|
-
optimizer: optim.Optimizer,
|
|
193
|
-
warmup_epochs: int = 5,
|
|
194
|
-
start_factor: float = 0.1,
|
|
195
|
-
**kwargs,
|
|
196
|
-
) -> LRScheduler:
|
|
197
|
-
"""
|
|
198
|
-
Create a scheduler with linear warmup phase.
|
|
199
|
-
|
|
200
|
-
Combines LinearLR warmup with any other scheduler using SequentialLR.
|
|
201
|
-
|
|
202
|
-
Args:
|
|
203
|
-
name: Main scheduler name (after warmup)
|
|
204
|
-
optimizer: Optimizer instance
|
|
205
|
-
warmup_epochs: Number of warmup epochs
|
|
206
|
-
start_factor: Starting LR factor for warmup
|
|
207
|
-
**kwargs: Arguments for main scheduler (see get_scheduler)
|
|
208
|
-
|
|
209
|
-
Returns:
|
|
210
|
-
SequentialLR combining warmup and main scheduler
|
|
211
|
-
|
|
212
|
-
Example:
|
|
213
|
-
>>> scheduler = get_scheduler_with_warmup(
|
|
214
|
-
... "cosine", optimizer, warmup_epochs=5, epochs=100
|
|
215
|
-
... )
|
|
216
|
-
"""
|
|
217
|
-
# Create warmup scheduler
|
|
218
|
-
warmup_scheduler = LinearLR(
|
|
219
|
-
optimizer,
|
|
220
|
-
start_factor=start_factor,
|
|
221
|
-
end_factor=1.0,
|
|
222
|
-
total_iters=warmup_epochs,
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
# Create main scheduler
|
|
226
|
-
main_scheduler = get_scheduler(name, optimizer, **kwargs)
|
|
227
|
-
|
|
228
|
-
# Combine with SequentialLR
|
|
229
|
-
return SequentialLR(
|
|
230
|
-
optimizer,
|
|
231
|
-
schedulers=[warmup_scheduler, main_scheduler],
|
|
232
|
-
milestones=[warmup_epochs],
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def is_epoch_based(name: str) -> bool:
|
|
237
|
-
"""
|
|
238
|
-
Check if scheduler should be stepped per epoch (True) or per batch (False).
|
|
239
|
-
|
|
240
|
-
Args:
|
|
241
|
-
name: Scheduler name
|
|
242
|
-
|
|
243
|
-
Returns:
|
|
244
|
-
True if scheduler should step per epoch, False for per batch
|
|
245
|
-
"""
|
|
246
|
-
name_lower = name.lower().replace("-", "_")
|
|
247
|
-
|
|
248
|
-
# OneCycleLR steps per batch, all others step per epoch
|
|
249
|
-
per_batch_schedulers = {"onecycle"}
|
|
250
|
-
|
|
251
|
-
return name_lower not in per_batch_schedulers
|
|
1
|
+
"""
|
|
2
|
+
Learning Rate Schedulers
|
|
3
|
+
========================
|
|
4
|
+
|
|
5
|
+
Provides a comprehensive set of learning rate schedulers with a factory
|
|
6
|
+
function for easy selection via CLI arguments.
|
|
7
|
+
|
|
8
|
+
Supported Schedulers:
|
|
9
|
+
- plateau: ReduceLROnPlateau (default, adaptive)
|
|
10
|
+
- cosine: CosineAnnealingLR
|
|
11
|
+
- cosine_restarts: CosineAnnealingWarmRestarts
|
|
12
|
+
- onecycle: OneCycleLR
|
|
13
|
+
- step: StepLR
|
|
14
|
+
- multistep: MultiStepLR
|
|
15
|
+
- exponential: ExponentialLR
|
|
16
|
+
- linear_warmup: LinearLR (warmup phase)
|
|
17
|
+
|
|
18
|
+
Author: Ductho Le (ductho.le@outlook.com)
|
|
19
|
+
Version: 1.0.0
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import torch.optim as optim
|
|
23
|
+
from torch.optim.lr_scheduler import (
|
|
24
|
+
CosineAnnealingLR,
|
|
25
|
+
CosineAnnealingWarmRestarts,
|
|
26
|
+
ExponentialLR,
|
|
27
|
+
LinearLR,
|
|
28
|
+
LRScheduler,
|
|
29
|
+
MultiStepLR,
|
|
30
|
+
OneCycleLR,
|
|
31
|
+
ReduceLROnPlateau,
|
|
32
|
+
SequentialLR,
|
|
33
|
+
StepLR,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ==============================================================================
|
|
38
|
+
# SCHEDULER REGISTRY
|
|
39
|
+
# ==============================================================================
|
|
40
|
+
def list_schedulers() -> list[str]:
|
|
41
|
+
"""
|
|
42
|
+
Return list of available scheduler names.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of registered scheduler names
|
|
46
|
+
"""
|
|
47
|
+
return [
|
|
48
|
+
"plateau",
|
|
49
|
+
"cosine",
|
|
50
|
+
"cosine_restarts",
|
|
51
|
+
"onecycle",
|
|
52
|
+
"step",
|
|
53
|
+
"multistep",
|
|
54
|
+
"exponential",
|
|
55
|
+
"linear_warmup",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_scheduler(
|
|
60
|
+
name: str,
|
|
61
|
+
optimizer: optim.Optimizer,
|
|
62
|
+
# Common parameters
|
|
63
|
+
epochs: int = 100,
|
|
64
|
+
steps_per_epoch: int | None = None,
|
|
65
|
+
min_lr: float = 1e-6,
|
|
66
|
+
# ReduceLROnPlateau parameters
|
|
67
|
+
patience: int = 10,
|
|
68
|
+
factor: float = 0.5,
|
|
69
|
+
# Cosine parameters
|
|
70
|
+
T_max: int | None = None,
|
|
71
|
+
T_0: int = 10,
|
|
72
|
+
T_mult: int = 2,
|
|
73
|
+
# OneCycleLR parameters
|
|
74
|
+
max_lr: float | None = None,
|
|
75
|
+
pct_start: float = 0.3,
|
|
76
|
+
# Step/MultiStep parameters
|
|
77
|
+
step_size: int = 30,
|
|
78
|
+
milestones: list[int] | None = None,
|
|
79
|
+
gamma: float = 0.1,
|
|
80
|
+
# Linear warmup parameters
|
|
81
|
+
warmup_epochs: int = 5,
|
|
82
|
+
start_factor: float = 0.1,
|
|
83
|
+
**kwargs,
|
|
84
|
+
) -> LRScheduler:
|
|
85
|
+
"""
|
|
86
|
+
Factory function to create learning rate scheduler by name.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
name: Scheduler name (see list_schedulers())
|
|
90
|
+
optimizer: Optimizer instance to schedule
|
|
91
|
+
epochs: Total training epochs (for cosine, onecycle)
|
|
92
|
+
steps_per_epoch: Steps per epoch (required for onecycle)
|
|
93
|
+
min_lr: Minimum learning rate (eta_min for cosine)
|
|
94
|
+
patience: Patience for ReduceLROnPlateau
|
|
95
|
+
factor: Reduction factor for plateau/step
|
|
96
|
+
T_max: Period for CosineAnnealingLR (default: epochs)
|
|
97
|
+
T_0: Initial period for CosineAnnealingWarmRestarts
|
|
98
|
+
T_mult: Period multiplier for warm restarts
|
|
99
|
+
max_lr: Maximum LR for OneCycleLR (default: optimizer's initial LR)
|
|
100
|
+
pct_start: Percentage of cycle spent increasing LR (OneCycleLR)
|
|
101
|
+
step_size: Period for StepLR
|
|
102
|
+
milestones: Epochs to decay LR for MultiStepLR
|
|
103
|
+
gamma: Decay factor for step/multistep/exponential
|
|
104
|
+
warmup_epochs: Number of warmup epochs for linear_warmup
|
|
105
|
+
start_factor: Starting LR factor for warmup (LR * start_factor)
|
|
106
|
+
**kwargs: Additional arguments passed to scheduler
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Instantiated learning rate scheduler
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
ValueError: If scheduler name is not recognized
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
>>> scheduler = get_scheduler("plateau", optimizer, patience=15)
|
|
116
|
+
>>> scheduler = get_scheduler("cosine", optimizer, epochs=100)
|
|
117
|
+
>>> scheduler = get_scheduler(
|
|
118
|
+
... "onecycle", optimizer, epochs=100, steps_per_epoch=1000, max_lr=1e-3
|
|
119
|
+
... )
|
|
120
|
+
"""
|
|
121
|
+
name_lower = name.lower().replace("-", "_")
|
|
122
|
+
|
|
123
|
+
# Get initial LR from optimizer
|
|
124
|
+
base_lr = optimizer.param_groups[0]["lr"]
|
|
125
|
+
|
|
126
|
+
if name_lower == "plateau":
|
|
127
|
+
return ReduceLROnPlateau(
|
|
128
|
+
optimizer,
|
|
129
|
+
mode="min",
|
|
130
|
+
factor=factor,
|
|
131
|
+
patience=patience,
|
|
132
|
+
min_lr=min_lr,
|
|
133
|
+
**kwargs,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
elif name_lower == "cosine":
|
|
137
|
+
return CosineAnnealingLR(
|
|
138
|
+
optimizer,
|
|
139
|
+
T_max=T_max if T_max is not None else epochs,
|
|
140
|
+
eta_min=min_lr,
|
|
141
|
+
**kwargs,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
elif name_lower == "cosine_restarts":
|
|
145
|
+
return CosineAnnealingWarmRestarts(
|
|
146
|
+
optimizer, T_0=T_0, T_mult=T_mult, eta_min=min_lr, **kwargs
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
elif name_lower == "onecycle":
|
|
150
|
+
if steps_per_epoch is None:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
"OneCycleLR requires 'steps_per_epoch'. "
|
|
153
|
+
"Pass len(train_dataloader) as steps_per_epoch."
|
|
154
|
+
)
|
|
155
|
+
return OneCycleLR(
|
|
156
|
+
optimizer,
|
|
157
|
+
max_lr=max_lr if max_lr is not None else base_lr,
|
|
158
|
+
epochs=epochs,
|
|
159
|
+
steps_per_epoch=steps_per_epoch,
|
|
160
|
+
pct_start=pct_start,
|
|
161
|
+
**kwargs,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
elif name_lower == "step":
|
|
165
|
+
return StepLR(optimizer, step_size=step_size, gamma=gamma, **kwargs)
|
|
166
|
+
|
|
167
|
+
elif name_lower == "multistep":
|
|
168
|
+
if milestones is None:
|
|
169
|
+
# Default milestones at 30%, 60%, 90% of epochs
|
|
170
|
+
milestones = [int(epochs * 0.3), int(epochs * 0.6), int(epochs * 0.9)]
|
|
171
|
+
return MultiStepLR(optimizer, milestones=milestones, gamma=gamma, **kwargs)
|
|
172
|
+
|
|
173
|
+
elif name_lower == "exponential":
|
|
174
|
+
return ExponentialLR(optimizer, gamma=gamma, **kwargs)
|
|
175
|
+
|
|
176
|
+
elif name_lower == "linear_warmup":
|
|
177
|
+
return LinearLR(
|
|
178
|
+
optimizer,
|
|
179
|
+
start_factor=start_factor,
|
|
180
|
+
end_factor=1.0,
|
|
181
|
+
total_iters=warmup_epochs,
|
|
182
|
+
**kwargs,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
else:
|
|
186
|
+
available = ", ".join(list_schedulers())
|
|
187
|
+
raise ValueError(f"Unknown scheduler: '{name}'. Available options: {available}")
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def get_scheduler_with_warmup(
|
|
191
|
+
name: str,
|
|
192
|
+
optimizer: optim.Optimizer,
|
|
193
|
+
warmup_epochs: int = 5,
|
|
194
|
+
start_factor: float = 0.1,
|
|
195
|
+
**kwargs,
|
|
196
|
+
) -> LRScheduler:
|
|
197
|
+
"""
|
|
198
|
+
Create a scheduler with linear warmup phase.
|
|
199
|
+
|
|
200
|
+
Combines LinearLR warmup with any other scheduler using SequentialLR.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
name: Main scheduler name (after warmup)
|
|
204
|
+
optimizer: Optimizer instance
|
|
205
|
+
warmup_epochs: Number of warmup epochs
|
|
206
|
+
start_factor: Starting LR factor for warmup
|
|
207
|
+
**kwargs: Arguments for main scheduler (see get_scheduler)
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
SequentialLR combining warmup and main scheduler
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
>>> scheduler = get_scheduler_with_warmup(
|
|
214
|
+
... "cosine", optimizer, warmup_epochs=5, epochs=100
|
|
215
|
+
... )
|
|
216
|
+
"""
|
|
217
|
+
# Create warmup scheduler
|
|
218
|
+
warmup_scheduler = LinearLR(
|
|
219
|
+
optimizer,
|
|
220
|
+
start_factor=start_factor,
|
|
221
|
+
end_factor=1.0,
|
|
222
|
+
total_iters=warmup_epochs,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Create main scheduler
|
|
226
|
+
main_scheduler = get_scheduler(name, optimizer, **kwargs)
|
|
227
|
+
|
|
228
|
+
# Combine with SequentialLR
|
|
229
|
+
return SequentialLR(
|
|
230
|
+
optimizer,
|
|
231
|
+
schedulers=[warmup_scheduler, main_scheduler],
|
|
232
|
+
milestones=[warmup_epochs],
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def is_epoch_based(name: str) -> bool:
|
|
237
|
+
"""
|
|
238
|
+
Check if scheduler should be stepped per epoch (True) or per batch (False).
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
name: Scheduler name
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
True if scheduler should step per epoch, False for per batch
|
|
245
|
+
"""
|
|
246
|
+
name_lower = name.lower().replace("-", "_")
|
|
247
|
+
|
|
248
|
+
# OneCycleLR steps per batch, all others step per epoch
|
|
249
|
+
per_batch_schedulers = {"onecycle"}
|
|
250
|
+
|
|
251
|
+
return name_lower not in per_batch_schedulers
|