torchloop 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {torchloop-0.1.0 → torchloop-0.2.0}/PKG-INFO +1 -1
- {torchloop-0.1.0 → torchloop-0.2.0}/pyproject.toml +1 -1
- {torchloop-0.1.0 → torchloop-0.2.0}/src/torchloop/trainer.py +78 -16
- {torchloop-0.1.0 → torchloop-0.2.0}/tests/test_trainer.py +39 -1
- {torchloop-0.1.0 → torchloop-0.2.0}/.github/workflows/ci.yml +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/.github/workflows/publish.yml +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/.gitignore +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/LICENSE +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/README.md +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/src/torchloop/__init__.py +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/src/torchloop/evaluator.py +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/src/torchloop/exporter.py +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/tests/__init__.py +0 -0
- {torchloop-0.1.0 → torchloop-0.2.0}/tests/test_evaluator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: torchloop
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Lightweight PyTorch utility library for training, evaluation, and TFLite export — without the framework lock-in.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Tharun007-TK/torchloop
|
|
6
6
|
Project-URL: Repository, https://github.com/Tharun007-TK/torchloop
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "torchloop"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Lightweight PyTorch utility library for training, evaluation, and TFLite export — without the framework lock-in."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -1,13 +1,22 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
torchlite.trainer
|
|
3
3
|
-----------------
|
|
4
4
|
Wraps the PyTorch training loop so you stop rewriting it.
|
|
5
5
|
|
|
6
6
|
Usage:
|
|
7
7
|
from torchloop import Trainer
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
import torch.optim.lr_scheduler as sched
|
|
9
|
+
|
|
10
|
+
scheduler = sched.StepLR(optimizer, step_size=5, gamma=0.1)
|
|
11
|
+
|
|
12
|
+
trainer = Trainer(
|
|
13
|
+
model, optimizer, criterion,
|
|
14
|
+
device="cuda",
|
|
15
|
+
scheduler=scheduler,
|
|
16
|
+
amp=True,
|
|
17
|
+
patience=5,
|
|
18
|
+
)
|
|
19
|
+
trainer.fit(train_loader, val_loader, epochs=30)
|
|
11
20
|
trainer.save("best.pt")
|
|
12
21
|
"""
|
|
13
22
|
|
|
@@ -32,8 +41,12 @@ class Trainer:
|
|
|
32
41
|
optimizer : Any torch.optim optimizer.
|
|
33
42
|
criterion : Loss function (nn.Module or callable).
|
|
34
43
|
device : 'cuda', 'cpu', or 'mps'. Auto-detects if None.
|
|
35
|
-
metric_fn : Optional callable(preds, targets)
|
|
44
|
+
metric_fn : Optional callable(preds, targets) -> float for val metric.
|
|
36
45
|
patience : Early stopping patience (epochs). None = disabled.
|
|
46
|
+
scheduler : Any torch.optim.lr_scheduler. Steps once per epoch.
|
|
47
|
+
ReduceLROnPlateau is handled automatically.
|
|
48
|
+
amp : If True, enables automatic mixed precision (CUDA only).
|
|
49
|
+
Silently ignored on CPU/MPS.
|
|
37
50
|
"""
|
|
38
51
|
|
|
39
52
|
def __init__(
|
|
@@ -44,6 +57,8 @@ class Trainer:
|
|
|
44
57
|
device: Optional[str] = None,
|
|
45
58
|
metric_fn: Optional[Callable] = None,
|
|
46
59
|
patience: Optional[int] = None,
|
|
60
|
+
scheduler: Optional[object] = None,
|
|
61
|
+
amp: bool = False,
|
|
47
62
|
):
|
|
48
63
|
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
49
64
|
self.model = model.to(self.device)
|
|
@@ -51,11 +66,17 @@ class Trainer:
|
|
|
51
66
|
self.criterion = criterion
|
|
52
67
|
self.metric_fn = metric_fn
|
|
53
68
|
self.patience = patience
|
|
69
|
+
self.scheduler = scheduler
|
|
70
|
+
|
|
71
|
+
# AMP only works on CUDA — silently disable elsewhere
|
|
72
|
+
self.amp = amp and self.device == "cuda"
|
|
73
|
+
self.scaler = torch.cuda.amp.GradScaler() if self.amp else None
|
|
54
74
|
|
|
55
75
|
self.history: dict[str, list] = {
|
|
56
76
|
"train_loss": [],
|
|
57
77
|
"val_loss": [],
|
|
58
78
|
"val_metric": [],
|
|
79
|
+
"lr": [],
|
|
59
80
|
}
|
|
60
81
|
self._best_val_loss = float("inf")
|
|
61
82
|
self._best_state: Optional[dict] = None
|
|
@@ -75,7 +96,7 @@ class Trainer:
|
|
|
75
96
|
Train the model.
|
|
76
97
|
|
|
77
98
|
Returns:
|
|
78
|
-
history dict with train_loss, val_loss, val_metric per epoch.
|
|
99
|
+
history dict with train_loss, val_loss, val_metric, lr per epoch.
|
|
79
100
|
"""
|
|
80
101
|
for epoch in range(1, epochs + 1):
|
|
81
102
|
t0 = time.time()
|
|
@@ -89,7 +110,14 @@ class Trainer:
|
|
|
89
110
|
self.history["val_metric"].append(val_metric)
|
|
90
111
|
self._checkpoint(val_loss)
|
|
91
112
|
|
|
92
|
-
self.
|
|
113
|
+
self._step_scheduler(val_loss)
|
|
114
|
+
current_lr = self._current_lr()
|
|
115
|
+
self.history["lr"].append(current_lr)
|
|
116
|
+
|
|
117
|
+
self._log(
|
|
118
|
+
epoch, epochs, train_loss, val_loss,
|
|
119
|
+
val_metric, current_lr, time.time() - t0,
|
|
120
|
+
)
|
|
93
121
|
|
|
94
122
|
if self._should_stop():
|
|
95
123
|
print(f" Early stopping triggered at epoch {epoch}.")
|
|
@@ -108,7 +136,9 @@ class Trainer:
|
|
|
108
136
|
|
|
109
137
|
def load(self, path: str | Path) -> None:
|
|
110
138
|
"""Load model state dict from path."""
|
|
111
|
-
self.model.load_state_dict(
|
|
139
|
+
self.model.load_state_dict(
|
|
140
|
+
torch.load(path, map_location=self.device)
|
|
141
|
+
)
|
|
112
142
|
print(f" Loaded ← {path}")
|
|
113
143
|
|
|
114
144
|
# ------------------------------------------------------------------
|
|
@@ -121,10 +151,20 @@ class Trainer:
|
|
|
121
151
|
for inputs, targets in tqdm(loader, desc=" train", leave=False):
|
|
122
152
|
inputs, targets = inputs.to(self.device), targets.to(self.device)
|
|
123
153
|
self.optimizer.zero_grad()
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
154
|
+
|
|
155
|
+
if self.amp:
|
|
156
|
+
with torch.cuda.amp.autocast():
|
|
157
|
+
outputs = self.model(inputs)
|
|
158
|
+
loss = self.criterion(outputs, targets)
|
|
159
|
+
self.scaler.scale(loss).backward()
|
|
160
|
+
self.scaler.step(self.optimizer)
|
|
161
|
+
self.scaler.update()
|
|
162
|
+
else:
|
|
163
|
+
outputs = self.model(inputs)
|
|
164
|
+
loss = self.criterion(outputs, targets)
|
|
165
|
+
loss.backward()
|
|
166
|
+
self.optimizer.step()
|
|
167
|
+
|
|
128
168
|
total_loss += loss.item() * inputs.size(0)
|
|
129
169
|
return total_loss / len(loader.dataset)
|
|
130
170
|
|
|
@@ -134,7 +174,9 @@ class Trainer:
|
|
|
134
174
|
all_preds, all_targets = [], []
|
|
135
175
|
with torch.no_grad():
|
|
136
176
|
for inputs, targets in tqdm(loader, desc=" val ", leave=False):
|
|
137
|
-
inputs, targets =
|
|
177
|
+
inputs, targets = (
|
|
178
|
+
inputs.to(self.device), targets.to(self.device)
|
|
179
|
+
)
|
|
138
180
|
outputs = self.model(inputs)
|
|
139
181
|
loss = self.criterion(outputs, targets)
|
|
140
182
|
total_loss += loss.item() * inputs.size(0)
|
|
@@ -159,6 +201,19 @@ class Trainer:
|
|
|
159
201
|
else:
|
|
160
202
|
self._no_improve_count += 1
|
|
161
203
|
|
|
204
|
+
def _step_scheduler(self, val_loss: Optional[float]) -> None:
|
|
205
|
+
if self.scheduler is None:
|
|
206
|
+
return
|
|
207
|
+
plateau = "ReduceLROnPlateau"
|
|
208
|
+
if type(self.scheduler).__name__ == plateau:
|
|
209
|
+
if val_loss is not None:
|
|
210
|
+
self.scheduler.step(val_loss)
|
|
211
|
+
else:
|
|
212
|
+
self.scheduler.step()
|
|
213
|
+
|
|
214
|
+
def _current_lr(self) -> float:
|
|
215
|
+
return self.optimizer.param_groups[0]["lr"]
|
|
216
|
+
|
|
162
217
|
def _should_stop(self) -> bool:
|
|
163
218
|
return (
|
|
164
219
|
self.patience is not None
|
|
@@ -166,11 +221,18 @@ class Trainer:
|
|
|
166
221
|
)
|
|
167
222
|
|
|
168
223
|
@staticmethod
|
|
169
|
-
def _log(
|
|
170
|
-
|
|
224
|
+
def _log(
|
|
225
|
+
epoch, epochs, train_loss, val_loss,
|
|
226
|
+
val_metric, lr, elapsed,
|
|
227
|
+
) -> None:
|
|
228
|
+
parts = [
|
|
229
|
+
f"Epoch [{epoch:>3}/{epochs}]",
|
|
230
|
+
f"train_loss={train_loss:.4f}",
|
|
231
|
+
]
|
|
171
232
|
if val_loss is not None:
|
|
172
233
|
parts.append(f"val_loss={val_loss:.4f}")
|
|
173
234
|
if val_metric is not None:
|
|
174
235
|
parts.append(f"val_metric={val_metric:.4f}")
|
|
236
|
+
parts.append(f"lr={lr:.2e}")
|
|
175
237
|
parts.append(f"({elapsed:.1f}s)")
|
|
176
|
-
print(" " + " ".join(parts))
|
|
238
|
+
print(" " + " ".join(parts))
|
|
@@ -59,4 +59,42 @@ def test_trainer_with_metric_fn():
|
|
|
59
59
|
trainer = Trainer(model, optimizer, criterion, device="cpu", metric_fn=metric_fn)
|
|
60
60
|
history = trainer.fit(_make_loader(), _make_loader(), epochs=2)
|
|
61
61
|
assert "val_metric" in history
|
|
62
|
-
assert len(history["val_metric"]) == 2
|
|
62
|
+
assert len(history["val_metric"]) == 2
|
|
63
|
+
|
|
64
|
+
def test_trainer_with_scheduler():
|
|
65
|
+
import torch.optim.lr_scheduler as sched
|
|
66
|
+
model = _make_model()
|
|
67
|
+
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
|
|
68
|
+
criterion = nn.CrossEntropyLoss()
|
|
69
|
+
scheduler = sched.StepLR(optimizer, step_size=1, gamma=0.5)
|
|
70
|
+
trainer = Trainer(
|
|
71
|
+
model, optimizer, criterion, device="cpu", scheduler=scheduler
|
|
72
|
+
)
|
|
73
|
+
history = trainer.fit(_make_loader(), _make_loader(), epochs=3)
|
|
74
|
+
assert history["lr"][2] < history["lr"][0]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_trainer_with_plateau_scheduler():
|
|
78
|
+
import torch.optim.lr_scheduler as sched
|
|
79
|
+
model = _make_model()
|
|
80
|
+
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
|
|
81
|
+
criterion = nn.CrossEntropyLoss()
|
|
82
|
+
scheduler = sched.ReduceLROnPlateau(optimizer, patience=1)
|
|
83
|
+
trainer = Trainer(
|
|
84
|
+
model, optimizer, criterion, device="cpu", scheduler=scheduler
|
|
85
|
+
)
|
|
86
|
+
history = trainer.fit(_make_loader(), _make_loader(), epochs=3)
|
|
87
|
+
assert "lr" in history
|
|
88
|
+
assert len(history["lr"]) == 3
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_amp_silently_disabled_on_cpu():
|
|
92
|
+
model = _make_model()
|
|
93
|
+
optimizer = torch.optim.Adam(model.parameters())
|
|
94
|
+
criterion = nn.CrossEntropyLoss()
|
|
95
|
+
trainer = Trainer(
|
|
96
|
+
model, optimizer, criterion, device="cpu", amp=True
|
|
97
|
+
)
|
|
98
|
+
assert trainer.amp is False
|
|
99
|
+
history = trainer.fit(_make_loader(), epochs=2)
|
|
100
|
+
assert len(history["train_loss"]) == 2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|