trainlib 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trainlib-0.1.0/PKG-INFO +140 -0
- trainlib-0.1.0/README.md +105 -0
- trainlib-0.1.0/pyproject.toml +84 -0
- trainlib-0.1.0/setup.cfg +4 -0
- trainlib-0.1.0/trainlib/__init__.py +0 -0
- trainlib-0.1.0/trainlib/dataset.py +964 -0
- trainlib-0.1.0/trainlib/datasets/__init__.py +0 -0
- trainlib-0.1.0/trainlib/datasets/disk.py +179 -0
- trainlib-0.1.0/trainlib/datasets/memory.py +210 -0
- trainlib-0.1.0/trainlib/domain.py +85 -0
- trainlib-0.1.0/trainlib/domains/__init__.py +0 -0
- trainlib-0.1.0/trainlib/domains/disk.py +37 -0
- trainlib-0.1.0/trainlib/domains/functional.py +58 -0
- trainlib-0.1.0/trainlib/estimator.py +195 -0
- trainlib-0.1.0/trainlib/estimators/__init__.py +0 -0
- trainlib-0.1.0/trainlib/estimators/rnn.py +491 -0
- trainlib-0.1.0/trainlib/estimators/tdnn.py +114 -0
- trainlib-0.1.0/trainlib/trainer.py +509 -0
- trainlib-0.1.0/trainlib/transform.py +11 -0
- trainlib-0.1.0/trainlib/utils/__init__.py +0 -0
- trainlib-0.1.0/trainlib/utils/job.py +54 -0
- trainlib-0.1.0/trainlib/utils/module.py +25 -0
- trainlib-0.1.0/trainlib/utils/text.py +8 -0
- trainlib-0.1.0/trainlib/utils/type.py +52 -0
- trainlib-0.1.0/trainlib.egg-info/PKG-INFO +140 -0
- trainlib-0.1.0/trainlib.egg-info/SOURCES.txt +28 -0
- trainlib-0.1.0/trainlib.egg-info/dependency_links.txt +1 -0
- trainlib-0.1.0/trainlib.egg-info/entry_points.txt +2 -0
- trainlib-0.1.0/trainlib.egg-info/requires.txt +19 -0
- trainlib-0.1.0/trainlib.egg-info/top_level.txt +1 -0
trainlib-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: trainlib
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Minimal framework for ML modeling. Supports advanced dataset operations and streamlined training.
|
|
5
|
+
Author-email: Sam Griesemer <git@olog.io>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://doc.olog.io/trainlib
|
|
8
|
+
Project-URL: Documentation, https://doc.olog.io/trainlib
|
|
9
|
+
Project-URL: Repository, https://git.olog.io/olog/trainlib
|
|
10
|
+
Project-URL: Issues, https://git.olog.io/olog/trainlib/issues
|
|
11
|
+
Keywords: machine-learning
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
17
|
+
Requires-Python: >=3.13
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: colorama>=0.4.6
|
|
20
|
+
Requires-Dist: matplotlib>=3.10.8
|
|
21
|
+
Requires-Dist: numpy>=2.4.1
|
|
22
|
+
Requires-Dist: tensorboard>=2.20.0
|
|
23
|
+
Requires-Dist: torch>=2.5.1
|
|
24
|
+
Requires-Dist: tqdm>=4.67.1
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: ipykernel; extra == "dev"
|
|
27
|
+
Provides-Extra: doc
|
|
28
|
+
Requires-Dist: furo; extra == "doc"
|
|
29
|
+
Requires-Dist: myst-parser; extra == "doc"
|
|
30
|
+
Requires-Dist: sphinx; extra == "doc"
|
|
31
|
+
Requires-Dist: sphinx-togglebutton; extra == "doc"
|
|
32
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == "doc"
|
|
33
|
+
Provides-Extra: test
|
|
34
|
+
Requires-Dist: pytest; extra == "test"
|
|
35
|
+
|
|
36
|
+
# Overview
|
|
37
|
+
Package summary goes here, ideally with a diagram
|
|
38
|
+
|
|
39
|
+
# Install
|
|
40
|
+
Installation instructions
|
|
41
|
+
|
|
42
|
+
```sh
|
|
43
|
+
pip install <package>
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
or as a CLI tool
|
|
47
|
+
|
|
48
|
+
```sh
|
|
49
|
+
uv tool install <package>
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
# Development
|
|
53
|
+
- Initialize/synchronize the project with `uv sync`, creating a virtual
|
|
54
|
+
environment with base package dependencies.
|
|
55
|
+
- Depending on needs, install the development dependencies with `uv sync
|
|
56
|
+
--extra dev`.
|
|
57
|
+
|
|
58
|
+
# Testing
|
|
59
|
+
- To run the unit tests, make sure to first have the test dependencies
|
|
60
|
+
installed with `uv sync --extra test`, then run `make test`.
|
|
61
|
+
- For notebook testing, run `make install-kernel` to make the environment
|
|
62
|
+
available as a Jupyter kernel (to be selected when running notebooks).
|
|
63
|
+
|
|
64
|
+
# Documentation
|
|
65
|
+
- Install the documentation dependencies with `uv sync --extra doc`.
|
|
66
|
+
- Run `make docs-build` (optionally preceded by `make docs-clean`), and serve
|
|
67
|
+
locally with `docs-serve`.
|
|
68
|
+
|
|
69
|
+
# Development remarks
|
|
70
|
+
- Across `Trainer` / `Estimator` / `Dataset`, I've considered a
|
|
71
|
+
`ParamSpec`-based typing scheme to better orchestrate alignment in the
|
|
72
|
+
`Trainer.train()` loop, e.g., so we can statically check whether a dataset
|
|
73
|
+
appears to be fulfilling the argument requirements for the estimator's
|
|
74
|
+
`loss()` / `metrics()` methods. Something like
|
|
75
|
+
|
|
76
|
+
```py
|
|
77
|
+
class Estimator[**P](nn.Module):
|
|
78
|
+
def loss(
|
|
79
|
+
self,
|
|
80
|
+
input: Tensor,
|
|
81
|
+
*args: P.args,
|
|
82
|
+
**kwargs: P.kwargs,
|
|
83
|
+
) -> Generator:
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
class Trainer[**P]:
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
estimator: Estimator[P],
|
|
90
|
+
...
|
|
91
|
+
): ...
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
might be how we begin threading signatures. But ensuring dataset items can
|
|
95
|
+
match `P` is challenging. You can consider a "packed" object where we
|
|
96
|
+
obfuscate passing data through `P`-signatures:
|
|
97
|
+
|
|
98
|
+
```py
|
|
99
|
+
class PackedItem[**P]:
|
|
100
|
+
def __init__(self, *args: P.args, **kwargs: P.kwargs) -> None:
|
|
101
|
+
self._args = args
|
|
102
|
+
self._kwargs = kwargs
|
|
103
|
+
|
|
104
|
+
def apply[R](self, func: Callable[P, R]) -> R:
|
|
105
|
+
return func(*self._args, **self._kwargs)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class BatchedDataset[U, R, I, **P](Dataset):
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def _process_item_data(
|
|
111
|
+
self,
|
|
112
|
+
item_data: I,
|
|
113
|
+
item_index: int,
|
|
114
|
+
) -> PackedItem[P]:
|
|
115
|
+
...
|
|
116
|
+
|
|
117
|
+
def __iter__(self) -> Iterator[PackedItem[P]]:
|
|
118
|
+
...
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Meaningfully shaping those signatures is what remains, but you can't really
|
|
122
|
+
do this, not with typical type expression flexibility. For instance, if I'm
|
|
123
|
+
trying to appropriately type my base `TupleDataset`:
|
|
124
|
+
|
|
125
|
+
```py
|
|
126
|
+
class SequenceDataset[I, **P](HomogenousDataset[int, I, I, P]):
|
|
127
|
+
...
|
|
128
|
+
|
|
129
|
+
class TupleDataset[I](SequenceDataset[tuple[I, ...], ??]):
|
|
130
|
+
...
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Here there's no way for me to shape a `ParamSpec` to indicate arbitrarily
|
|
134
|
+
many arguments of a fixed type (`I` in this case) to allow me to unpack my
|
|
135
|
+
item tuples into an appropriate `PackedItem`.
|
|
136
|
+
|
|
137
|
+
Until this (among other issues) becomes clearer, I'm setting up around a
|
|
138
|
+
simpler `TypedDict` type variable. We won't have particularly strong static
|
|
139
|
+
checks for item alignment inside `Trainer`, but this seems about as good as I
|
|
140
|
+
can get around the current infrastructure.
|
trainlib-0.1.0/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Overview
|
|
2
|
+
Package summary goes here, ideally with a diagram
|
|
3
|
+
|
|
4
|
+
# Install
|
|
5
|
+
Installation instructions
|
|
6
|
+
|
|
7
|
+
```sh
|
|
8
|
+
pip install <package>
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
or as a CLI tool
|
|
12
|
+
|
|
13
|
+
```sh
|
|
14
|
+
uv tool install <package>
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
# Development
|
|
18
|
+
- Initialize/synchronize the project with `uv sync`, creating a virtual
|
|
19
|
+
environment with base package dependencies.
|
|
20
|
+
- Depending on needs, install the development dependencies with `uv sync
|
|
21
|
+
--extra dev`.
|
|
22
|
+
|
|
23
|
+
# Testing
|
|
24
|
+
- To run the unit tests, make sure to first have the test dependencies
|
|
25
|
+
installed with `uv sync --extra test`, then run `make test`.
|
|
26
|
+
- For notebook testing, run `make install-kernel` to make the environment
|
|
27
|
+
available as a Jupyter kernel (to be selected when running notebooks).
|
|
28
|
+
|
|
29
|
+
# Documentation
|
|
30
|
+
- Install the documentation dependencies with `uv sync --extra doc`.
|
|
31
|
+
- Run `make docs-build` (optionally preceded by `make docs-clean`), and serve
|
|
32
|
+
locally with `docs-serve`.
|
|
33
|
+
|
|
34
|
+
# Development remarks
|
|
35
|
+
- Across `Trainer` / `Estimator` / `Dataset`, I've considered a
|
|
36
|
+
`ParamSpec`-based typing scheme to better orchestrate alignment in the
|
|
37
|
+
`Trainer.train()` loop, e.g., so we can statically check whether a dataset
|
|
38
|
+
appears to be fulfilling the argument requirements for the estimator's
|
|
39
|
+
`loss()` / `metrics()` methods. Something like
|
|
40
|
+
|
|
41
|
+
```py
|
|
42
|
+
class Estimator[**P](nn.Module):
|
|
43
|
+
def loss(
|
|
44
|
+
self,
|
|
45
|
+
input: Tensor,
|
|
46
|
+
*args: P.args,
|
|
47
|
+
**kwargs: P.kwargs,
|
|
48
|
+
) -> Generator:
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
class Trainer[**P]:
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
estimator: Estimator[P],
|
|
55
|
+
...
|
|
56
|
+
): ...
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
might be how we begin threading signatures. But ensuring dataset items can
|
|
60
|
+
match `P` is challenging. You can consider a "packed" object where we
|
|
61
|
+
obfuscate passing data through `P`-signatures:
|
|
62
|
+
|
|
63
|
+
```py
|
|
64
|
+
class PackedItem[**P]:
|
|
65
|
+
def __init__(self, *args: P.args, **kwargs: P.kwargs) -> None:
|
|
66
|
+
self._args = args
|
|
67
|
+
self._kwargs = kwargs
|
|
68
|
+
|
|
69
|
+
def apply[R](self, func: Callable[P, R]) -> R:
|
|
70
|
+
return func(*self._args, **self._kwargs)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class BatchedDataset[U, R, I, **P](Dataset):
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def _process_item_data(
|
|
76
|
+
self,
|
|
77
|
+
item_data: I,
|
|
78
|
+
item_index: int,
|
|
79
|
+
) -> PackedItem[P]:
|
|
80
|
+
...
|
|
81
|
+
|
|
82
|
+
def __iter__(self) -> Iterator[PackedItem[P]]:
|
|
83
|
+
...
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Meaningfully shaping those signatures is what remains, but you can't really
|
|
87
|
+
do this, not with typical type expression flexibility. For instance, if I'm
|
|
88
|
+
trying to appropriately type my base `TupleDataset`:
|
|
89
|
+
|
|
90
|
+
```py
|
|
91
|
+
class SequenceDataset[I, **P](HomogenousDataset[int, I, I, P]):
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
class TupleDataset[I](SequenceDataset[tuple[I, ...], ??]):
|
|
95
|
+
...
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Here there's no way for me to shape a `ParamSpec` to indicate arbitrarily
|
|
99
|
+
many arguments of a fixed type (`I` in this case) to allow me to unpack my
|
|
100
|
+
item tuples into an appropriate `PackedItem`.
|
|
101
|
+
|
|
102
|
+
Until this (among other issues) becomes clearer, I'm setting up around a
|
|
103
|
+
simpler `TypedDict` type variable. We won't have particularly strong static
|
|
104
|
+
checks for item alignment inside `Trainer`, but this seems about as good as I
|
|
105
|
+
can get around the current infrastructure.
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "trainlib"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Minimal framework for ML modeling. Supports advanced dataset operations and streamlined training."
|
|
9
|
+
requires-python = ">=3.13"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name="Sam Griesemer", email="git@olog.io" },
|
|
12
|
+
]
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
license = "MIT"
|
|
15
|
+
keywords = [
|
|
16
|
+
"machine-learning",
|
|
17
|
+
]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Programming Language :: Python",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Development Status :: 3 - Alpha",
|
|
22
|
+
|
|
23
|
+
"Intended Audience :: Developers",
|
|
24
|
+
"Intended Audience :: End Users/Desktop",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"colorama>=0.4.6",
|
|
28
|
+
"matplotlib>=3.10.8",
|
|
29
|
+
"numpy>=2.4.1",
|
|
30
|
+
"tensorboard>=2.20.0",
|
|
31
|
+
"torch>=2.5.1",
|
|
32
|
+
"tqdm>=4.67.1",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.scripts]
|
|
36
|
+
symconf = "trainlib.__main__:main"
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
dev = [
|
|
40
|
+
"ipykernel",
|
|
41
|
+
]
|
|
42
|
+
doc = [
|
|
43
|
+
"furo",
|
|
44
|
+
"myst-parser",
|
|
45
|
+
"sphinx",
|
|
46
|
+
"sphinx-togglebutton",
|
|
47
|
+
"sphinx-autodoc-typehints",
|
|
48
|
+
]
|
|
49
|
+
test = [
|
|
50
|
+
"pytest",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.urls]
|
|
54
|
+
Homepage = "https://doc.olog.io/trainlib"
|
|
55
|
+
Documentation = "https://doc.olog.io/trainlib"
|
|
56
|
+
Repository = "https://git.olog.io/olog/trainlib"
|
|
57
|
+
Issues = "https://git.olog.io/olog/trainlib/issues"
|
|
58
|
+
|
|
59
|
+
[tool.setuptools.packages.find]
|
|
60
|
+
include = ["trainlib*"]
|
|
61
|
+
|
|
62
|
+
# for static data files under package root
|
|
63
|
+
# [tool.setuptools.package-data]
|
|
64
|
+
# "<package>" = ["data/*.toml"]
|
|
65
|
+
|
|
66
|
+
[tool.ruff]
|
|
67
|
+
line-length = 79
|
|
68
|
+
|
|
69
|
+
[tool.ruff.lint]
|
|
70
|
+
select = ["ANN", "E", "F", "UP", "B", "SIM", "I", "C4", "PERF"]
|
|
71
|
+
|
|
72
|
+
[tool.ruff.lint.isort]
|
|
73
|
+
length-sort = true
|
|
74
|
+
order-by-type = false
|
|
75
|
+
force-sort-within-sections = false
|
|
76
|
+
|
|
77
|
+
[tool.ruff.lint.per-file-ignores]
|
|
78
|
+
"tests/**" = ["S101"]
|
|
79
|
+
"**/__init__.py" = ["F401"]
|
|
80
|
+
|
|
81
|
+
[tool.ruff.format]
|
|
82
|
+
quote-style = "double"
|
|
83
|
+
indent-style = "space"
|
|
84
|
+
docstring-code-format = true
|
trainlib-0.1.0/setup.cfg
ADDED
|
File without changes
|