trainlib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.4
2
+ Name: trainlib
3
+ Version: 0.1.0
4
+ Summary: Minimal framework for ML modeling. Supports advanced dataset operations and streamlined training.
5
+ Author-email: Sam Griesemer <git@olog.io>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://doc.olog.io/trainlib
8
+ Project-URL: Documentation, https://doc.olog.io/trainlib
9
+ Project-URL: Repository, https://git.olog.io/olog/trainlib
10
+ Project-URL: Issues, https://git.olog.io/olog/trainlib/issues
11
+ Keywords: machine-learning
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: End Users/Desktop
17
+ Requires-Python: >=3.13
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: colorama>=0.4.6
20
+ Requires-Dist: matplotlib>=3.10.8
21
+ Requires-Dist: numpy>=2.4.1
22
+ Requires-Dist: tensorboard>=2.20.0
23
+ Requires-Dist: torch>=2.5.1
24
+ Requires-Dist: tqdm>=4.67.1
25
+ Provides-Extra: dev
26
+ Requires-Dist: ipykernel; extra == "dev"
27
+ Provides-Extra: doc
28
+ Requires-Dist: furo; extra == "doc"
29
+ Requires-Dist: myst-parser; extra == "doc"
30
+ Requires-Dist: sphinx; extra == "doc"
31
+ Requires-Dist: sphinx-togglebutton; extra == "doc"
32
+ Requires-Dist: sphinx-autodoc-typehints; extra == "doc"
33
+ Provides-Extra: test
34
+ Requires-Dist: pytest; extra == "test"
35
+
36
+ # Overview
37
+ Package summary goes here, ideally with a diagram
38
+
39
+ # Install
40
+ Installation instructions
41
+
42
+ ```sh
43
+ pip install <package>
44
+ ```
45
+
46
+ or as a CLI tool
47
+
48
+ ```sh
49
+ uv tool install <package>
50
+ ```
51
+
52
+ # Development
53
+ - Initialize/synchronize the project with `uv sync`, creating a virtual
54
+ environment with base package dependencies.
55
+ - Depending on needs, install the development dependencies with `uv sync
56
+ --extra dev`.
57
+
58
+ # Testing
59
+ - To run the unit tests, make sure to first have the test dependencies
60
+ installed with `uv sync --extra test`, then run `make test`.
61
+ - For notebook testing, run `make install-kernel` to make the environment
62
+ available as a Jupyter kernel (to be selected when running notebooks).
63
+
64
+ # Documentation
65
+ - Install the documentation dependencies with `uv sync --extra doc`.
66
+ - Run `make docs-build` (optionally preceded by `make docs-clean`), and serve
67
+ locally with `docs-serve`.
68
+
69
+ # Development remarks
70
+ - Across `Trainer` / `Estimator` / `Dataset`, I've considered a
71
+ `ParamSpec`-based typing scheme to better orchestrate alignment in the
72
+ `Trainer.train()` loop, e.g., so we can statically check whether a dataset
73
+ appears to be fulfilling the argument requirements for the estimator's
74
+ `loss()` / `metrics()` methods. Something like
75
+
76
+ ```py
77
+ class Estimator[**P](nn.Module):
78
+ def loss(
79
+ self,
80
+ input: Tensor,
81
+ *args: P.args,
82
+ **kwargs: P.kwargs,
83
+ ) -> Generator:
84
+ ...
85
+
86
+ class Trainer[**P]:
87
+ def __init__(
88
+ self,
89
+ estimator: Estimator[P],
90
+ ...
91
+ ): ...
92
+ ```
93
+
94
+ might be how we begin threading signatures. But ensuring dataset items can
95
+ match `P` is challenging. You can consider a "packed" object where we
96
+ obfuscate passing data through `P`-signatures:
97
+
98
+ ```py
99
+ class PackedItem[**P]:
100
+ def __init__(self, *args: P.args, **kwargs: P.kwargs) -> None:
101
+ self._args = args
102
+ self._kwargs = kwargs
103
+
104
+ def apply[R](self, func: Callable[P, R]) -> R:
105
+ return func(*self._args, **self._kwargs)
106
+
107
+
108
+ class BatchedDataset[U, R, I, **P](Dataset):
109
+ @abstractmethod
110
+ def _process_item_data(
111
+ self,
112
+ item_data: I,
113
+ item_index: int,
114
+ ) -> PackedItem[P]:
115
+ ...
116
+
117
+ def __iter__(self) -> Iterator[PackedItem[P]]:
118
+ ...
119
+ ```
120
+
121
+ Meaningfully shaping those signatures is what remains, but you can't really
122
+ do this, not with typical type expression flexibility. For instance, if I'm
123
+ trying to appropriately type my base `TupleDataset`:
124
+
125
+ ```py
126
+ class SequenceDataset[I, **P](HomogenousDataset[int, I, I, P]):
127
+ ...
128
+
129
+ class TupleDataset[I](SequenceDataset[tuple[I, ...], ??]):
130
+ ...
131
+ ```
132
+
133
+ Here there's no way for me to shape a `ParamSpec` to indicate arbitrarily
134
+ many arguments of a fixed type (`I` in this case) to allow me to unpack my
135
+ item tuples into an appropriate `PackedItem`.
136
+
137
+ Until this (among other issues) becomes clearer, I'm setting up around a
138
+ simpler `TypedDict` type variable. We won't have particularly strong static
139
+ checks for item alignment inside `Trainer`, but this seems about as good as I
140
+ can get around the current infrastructure.
@@ -0,0 +1,105 @@
1
+ # Overview
2
+ Package summary goes here, ideally with a diagram
3
+
4
+ # Install
5
+ Installation instructions
6
+
7
+ ```sh
8
+ pip install <package>
9
+ ```
10
+
11
+ or as a CLI tool
12
+
13
+ ```sh
14
+ uv tool install <package>
15
+ ```
16
+
17
+ # Development
18
+ - Initialize/synchronize the project with `uv sync`, creating a virtual
19
+ environment with base package dependencies.
20
+ - Depending on needs, install the development dependencies with `uv sync
21
+ --extra dev`.
22
+
23
+ # Testing
24
+ - To run the unit tests, make sure to first have the test dependencies
25
+ installed with `uv sync --extra test`, then run `make test`.
26
+ - For notebook testing, run `make install-kernel` to make the environment
27
+ available as a Jupyter kernel (to be selected when running notebooks).
28
+
29
+ # Documentation
30
+ - Install the documentation dependencies with `uv sync --extra doc`.
31
+ - Run `make docs-build` (optionally preceded by `make docs-clean`), and serve
32
+ locally with `docs-serve`.
33
+
34
+ # Development remarks
35
+ - Across `Trainer` / `Estimator` / `Dataset`, I've considered a
36
+ `ParamSpec`-based typing scheme to better orchestrate alignment in the
37
+ `Trainer.train()` loop, e.g., so we can statically check whether a dataset
38
+ appears to be fulfilling the argument requirements for the estimator's
39
+ `loss()` / `metrics()` methods. Something like
40
+
41
+ ```py
42
+ class Estimator[**P](nn.Module):
43
+ def loss(
44
+ self,
45
+ input: Tensor,
46
+ *args: P.args,
47
+ **kwargs: P.kwargs,
48
+ ) -> Generator:
49
+ ...
50
+
51
+ class Trainer[**P]:
52
+ def __init__(
53
+ self,
54
+ estimator: Estimator[P],
55
+ ...
56
+ ): ...
57
+ ```
58
+
59
+ might be how we begin threading signatures. But ensuring dataset items can
60
+ match `P` is challenging. You can consider a "packed" object where we
61
+ obfuscate passing data through `P`-signatures:
62
+
63
+ ```py
64
+ class PackedItem[**P]:
65
+ def __init__(self, *args: P.args, **kwargs: P.kwargs) -> None:
66
+ self._args = args
67
+ self._kwargs = kwargs
68
+
69
+ def apply[R](self, func: Callable[P, R]) -> R:
70
+ return func(*self._args, **self._kwargs)
71
+
72
+
73
+ class BatchedDataset[U, R, I, **P](Dataset):
74
+ @abstractmethod
75
+ def _process_item_data(
76
+ self,
77
+ item_data: I,
78
+ item_index: int,
79
+ ) -> PackedItem[P]:
80
+ ...
81
+
82
+ def __iter__(self) -> Iterator[PackedItem[P]]:
83
+ ...
84
+ ```
85
+
86
+ Meaningfully shaping those signatures is what remains, but you can't really
87
+ do this, not with typical type expression flexibility. For instance, if I'm
88
+ trying to appropriately type my base `TupleDataset`:
89
+
90
+ ```py
91
+ class SequenceDataset[I, **P](HomogenousDataset[int, I, I, P]):
92
+ ...
93
+
94
+ class TupleDataset[I](SequenceDataset[tuple[I, ...], ??]):
95
+ ...
96
+ ```
97
+
98
+ Here there's no way for me to shape a `ParamSpec` to indicate arbitrarily
99
+ many arguments of a fixed type (`I` in this case) to allow me to unpack my
100
+ item tuples into an appropriate `PackedItem`.
101
+
102
+ Until this (among other issues) becomes clearer, I'm setting up around a
103
+ simpler `TypedDict` type variable. We won't have particularly strong static
104
+ checks for item alignment inside `Trainer`, but this seems about as good as I
105
+ can get around the current infrastructure.
@@ -0,0 +1,84 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "trainlib"
7
+ version = "0.1.0"
8
+ description = "Minimal framework for ML modeling. Supports advanced dataset operations and streamlined training."
9
+ requires-python = ">=3.13"
10
+ authors = [
11
+ { name="Sam Griesemer", email="git@olog.io" },
12
+ ]
13
+ readme = "README.md"
14
+ license = "MIT"
15
+ keywords = [
16
+ "machine-learning",
17
+ ]
18
+ classifiers = [
19
+ "Programming Language :: Python",
20
+ "Operating System :: OS Independent",
21
+ "Development Status :: 3 - Alpha",
22
+
23
+ "Intended Audience :: Developers",
24
+ "Intended Audience :: End Users/Desktop",
25
+ ]
26
+ dependencies = [
27
+ "colorama>=0.4.6",
28
+ "matplotlib>=3.10.8",
29
+ "numpy>=2.4.1",
30
+ "tensorboard>=2.20.0",
31
+ "torch>=2.5.1",
32
+ "tqdm>=4.67.1",
33
+ ]
34
+
35
+ [project.scripts]
36
+ symconf = "trainlib.__main__:main"
37
+
38
+ [project.optional-dependencies]
39
+ dev = [
40
+ "ipykernel",
41
+ ]
42
+ doc = [
43
+ "furo",
44
+ "myst-parser",
45
+ "sphinx",
46
+ "sphinx-togglebutton",
47
+ "sphinx-autodoc-typehints",
48
+ ]
49
+ test = [
50
+ "pytest",
51
+ ]
52
+
53
+ [project.urls]
54
+ Homepage = "https://doc.olog.io/trainlib"
55
+ Documentation = "https://doc.olog.io/trainlib"
56
+ Repository = "https://git.olog.io/olog/trainlib"
57
+ Issues = "https://git.olog.io/olog/trainlib/issues"
58
+
59
+ [tool.setuptools.packages.find]
60
+ include = ["trainlib*"]
61
+
62
+ # for static data files under package root
63
+ # [tool.setuptools.package-data]
64
+ # "<package>" = ["data/*.toml"]
65
+
66
+ [tool.ruff]
67
+ line-length = 79
68
+
69
+ [tool.ruff.lint]
70
+ select = ["ANN", "E", "F", "UP", "B", "SIM", "I", "C4", "PERF"]
71
+
72
+ [tool.ruff.lint.isort]
73
+ length-sort = true
74
+ order-by-type = false
75
+ force-sort-within-sections = false
76
+
77
+ [tool.ruff.lint.per-file-ignores]
78
+ "tests/**" = ["S101"]
79
+ "**/__init__.py" = ["F401"]
80
+
81
+ [tool.ruff.format]
82
+ quote-style = "double"
83
+ indent-style = "space"
84
+ docstring-code-format = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes