tanml 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tanml-0.1.10/LICENSE +21 -0
- tanml-0.1.10/MANIFEST.in +4 -0
- tanml-0.1.10/PKG-INFO +240 -0
- tanml-0.1.10/README.md +184 -0
- tanml-0.1.10/pyproject.toml +112 -0
- tanml-0.1.10/setup.cfg +4 -0
- tanml-0.1.10/tanml/__init__.py +22 -0
- tanml-0.1.10/tanml/__main__.py +4 -0
- tanml-0.1.10/tanml/analysis/__init__.py +46 -0
- tanml-0.1.10/tanml/analysis/clustering.py +172 -0
- tanml-0.1.10/tanml/analysis/correlation.py +204 -0
- tanml-0.1.10/tanml/analysis/drift.py +196 -0
- tanml-0.1.10/tanml/checks/__init__.py +0 -0
- tanml-0.1.10/tanml/checks/base.py +179 -0
- tanml-0.1.10/tanml/checks/explainability/shap_check.py +307 -0
- tanml-0.1.10/tanml/checks/stress_test.py +168 -0
- tanml-0.1.10/tanml/cli/__init__.py +0 -0
- tanml-0.1.10/tanml/cli/main.py +138 -0
- tanml-0.1.10/tanml/models/registry.py +352 -0
- tanml-0.1.10/tanml/ui/__init__.py +109 -0
- tanml-0.1.10/tanml/ui/app.py +104 -0
- tanml-0.1.10/tanml/ui/assets/logo.png +0 -0
- tanml-0.1.10/tanml/ui/components/__init__.py +31 -0
- tanml-0.1.10/tanml/ui/components/forms.py +121 -0
- tanml-0.1.10/tanml/ui/components/metrics.py +144 -0
- tanml-0.1.10/tanml/ui/components/renderers.py +208 -0
- tanml-0.1.10/tanml/ui/config.py +277 -0
- tanml-0.1.10/tanml/ui/glossary.py +172 -0
- tanml-0.1.10/tanml/ui/helpers/__init__.py +272 -0
- tanml-0.1.10/tanml/ui/helpers/tvr.py +159 -0
- tanml-0.1.10/tanml/ui/narratives.py +285 -0
- tanml-0.1.10/tanml/ui/reports/__init__.py +39 -0
- tanml-0.1.10/tanml/ui/reports/base.py +138 -0
- tanml-0.1.10/tanml/ui/reports/generators.py +876 -0
- tanml-0.1.10/tanml/ui/services/__init__.py +4 -0
- tanml-0.1.10/tanml/ui/services/cv.py +183 -0
- tanml-0.1.10/tanml/ui/services/data.py +92 -0
- tanml-0.1.10/tanml/ui/services/session.py +35 -0
- tanml-0.1.10/tanml/ui/views/__init__.py +3 -0
- tanml-0.1.10/tanml/ui/views/evaluation/__init__.py +15 -0
- tanml-0.1.10/tanml/ui/views/evaluation/main.py +308 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/__init__.py +125 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/benchmark.py +289 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/cluster.py +280 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/drift.py +129 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/metrics.py +98 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/plots.py +184 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/shap.py +98 -0
- tanml-0.1.10/tanml/ui/views/evaluation/tabs/stress.py +81 -0
- tanml-0.1.10/tanml/ui/views/model_dev.py +848 -0
- tanml-0.1.10/tanml/ui/views/preprocessing.py +227 -0
- tanml-0.1.10/tanml/ui/views/profiling.py +372 -0
- tanml-0.1.10/tanml/ui/views/ranking.py +372 -0
- tanml-0.1.10/tanml/ui/views/setup.py +213 -0
- tanml-0.1.10/tanml/utils/__init__.py +0 -0
- tanml-0.1.10/tanml/utils/data_loader.py +160 -0
- tanml-0.1.10/tanml.egg-info/PKG-INFO +240 -0
- tanml-0.1.10/tanml.egg-info/SOURCES.txt +61 -0
- tanml-0.1.10/tanml.egg-info/dependency_links.txt +1 -0
- tanml-0.1.10/tanml.egg-info/entry_points.txt +2 -0
- tanml-0.1.10/tanml.egg-info/requires.txt +23 -0
- tanml-0.1.10/tanml.egg-info/top_level.txt +2 -0
- tanml-0.1.10/tests/test_final_verification.py +112 -0
tanml-0.1.10/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tanmay Sah and Dolly Sah
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tanml-0.1.10/MANIFEST.in
ADDED
tanml-0.1.10/PKG-INFO
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tanml
|
|
3
|
+
Version: 0.1.10
|
|
4
|
+
Summary: Automated validation toolkit for tabular ML models—MRM, credit risk, insurance, and other regulated use cases.
|
|
5
|
+
Author: Tanmay Sah, Dolly Sah
|
|
6
|
+
Maintainer: Tanmay Sah, Dolly Sah
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/tdlabs-ai/tanml
|
|
9
|
+
Project-URL: Source, https://github.com/tdlabs-ai/tanml
|
|
10
|
+
Project-URL: Issues, https://github.com/tdlabs-ai/tanml/issues
|
|
11
|
+
Project-URL: Documentation, https://github.com/tdlabs-ai/tanml#readme
|
|
12
|
+
Keywords: model validation,model risk management,model governance,SR 11-7,tabular ML,credit risk,insurance analytics,explainability,XAI,SHAP,stress testing,reporting,docx,streamlit,xgboost,lightgbm,catboost
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
16
|
+
Classifier: Natural Language :: English
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
27
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
28
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
29
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
30
|
+
Requires-Python: >=3.8
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Requires-Dist: numpy>=1.26
|
|
34
|
+
Requires-Dist: scipy>=1.11
|
|
35
|
+
Requires-Dist: pandas>=2.0
|
|
36
|
+
Requires-Dist: scikit-learn>=1.3
|
|
37
|
+
Requires-Dist: statsmodels>=0.14
|
|
38
|
+
Requires-Dist: xgboost>=2.0
|
|
39
|
+
Requires-Dist: lightgbm>=4.3
|
|
40
|
+
Requires-Dist: catboost>=1.2
|
|
41
|
+
Requires-Dist: shap>=0.44
|
|
42
|
+
Requires-Dist: numba>=0.58
|
|
43
|
+
Requires-Dist: matplotlib>=3.8
|
|
44
|
+
Requires-Dist: seaborn>=0.13
|
|
45
|
+
Requires-Dist: Pillow>=10.0
|
|
46
|
+
Requires-Dist: python-docx>=1.1.2
|
|
47
|
+
Requires-Dist: pyarrow>=14.0
|
|
48
|
+
Requires-Dist: openpyxl>=3.1
|
|
49
|
+
Requires-Dist: pyreadstat>=1.2
|
|
50
|
+
Requires-Dist: streamlit>=1.36
|
|
51
|
+
Provides-Extra: dev
|
|
52
|
+
Requires-Dist: pytest; extra == "dev"
|
|
53
|
+
Requires-Dist: black; extra == "dev"
|
|
54
|
+
Requires-Dist: isort; extra == "dev"
|
|
55
|
+
Dynamic: license-file
|
|
56
|
+
|
|
57
|
+
# TanML: Automated Model Validation Toolkit for Tabular Machine Learning
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
[](https://pypi.org/project/tanml/)
|
|
64
|
+
[](https://pepy.tech/project/tanml)
|
|
65
|
+
[](https://opensource.org/licenses/MIT)
|
|
66
|
+
[](https://forms.gle/oG2JHvt7tLXE5Atu7)
|
|
67
|
+
[](https://doi.org/10.5281/zenodo.17317165)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
> **TanML bridges the gap between data science tools (for building models) and governance requirements (for validating them). It's not just an ML library—it's a validation workflow with built-in documentation.**
|
|
71
|
+
|
|
72
|
+
* **Status:** Beta (`0.x`)
|
|
73
|
+
* **License:** MIT
|
|
74
|
+
* **Python:** 3.8–3.13
|
|
75
|
+
* **OS:** Linux / macOS / Windows (incl. WSL)
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Table of Contents
|
|
80
|
+
|
|
81
|
+
- Why TanML?
|
|
82
|
+
- Install
|
|
83
|
+
- Quick Start (UI)
|
|
84
|
+
- Optional CLI Flags
|
|
85
|
+
- Reports
|
|
86
|
+
- Data Privacy
|
|
87
|
+
- Troubleshooting
|
|
88
|
+
- Contributing
|
|
89
|
+
- License & Citation
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Why TanML?
|
|
94
|
+
|
|
95
|
+
* **End-to-end workflow:** Data Profiling → Preprocessing → Feature Ranking → Model Development → Evaluation → Reports—all in one UI.
|
|
96
|
+
* **Audit-ready Word reports:** Generate editable .docx documents for stakeholders and compliance reviews.
|
|
97
|
+
* **Built for regulated industries:** Designed for MRM, credit risk, insurance, and SR 11-7 contexts.
|
|
98
|
+
* **No code required:** Fully UI-driven—no Python knowledge needed.
|
|
99
|
+
* **Robust evaluation:** Drift detection, stress testing, SHAP explainability, cluster coverage.
|
|
100
|
+
* **Works with your stack:** scikit-learn, XGBoost, LightGBM, CatBoost.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Install
|
|
105
|
+
|
|
106
|
+
We strongly recommend using a virtual environment to isolate dependencies:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
# Create and activate virtual environment
|
|
110
|
+
python -m venv .venv
|
|
111
|
+
source .venv/bin/activate # Linux/Mac
|
|
112
|
+
# .venv\Scripts\activate # Windows
|
|
113
|
+
|
|
114
|
+
# Install TanML
|
|
115
|
+
pip install tanml
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Quick Start (UI)
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
tanml ui
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
* Opens at **[http://127.0.0.1:8501](http://127.0.0.1:8501)**
|
|
125
|
+
* **Upload limit ~2 GB** (preconfigured)
|
|
126
|
+
* **Telemetry disabled by default**
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Optional CLI Flags
|
|
131
|
+
|
|
132
|
+
Most users just run `tanml ui`. These help on teams/servers:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# Share on LAN
|
|
136
|
+
tanml ui --public
|
|
137
|
+
|
|
138
|
+
# Different port
|
|
139
|
+
tanml ui --port 9000
|
|
140
|
+
|
|
141
|
+
# Headless (server/CI; no auto-open browser)
|
|
142
|
+
tanml ui --headless
|
|
143
|
+
|
|
144
|
+
# Larger limit (e.g., 2 GB)
|
|
145
|
+
tanml ui --max-mb 2048
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Env var equivalents (Linux/macOS bash):
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
TANML_SERVER_ADDRESS=0.0.0.0 TANML_PORT=9000 TANML_MAX_MB=2048 tanml ui
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Windows PowerShell:
|
|
155
|
+
|
|
156
|
+
```powershell
|
|
157
|
+
$env:TANML_SERVER_ADDRESS="0.0.0.0"; $env:TANML_PORT="9000"; $env:TANML_MAX_MB="2048"; tanml ui
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Defaults:** address `127.0.0.1`, port `8501`, limit `2048 MB`, telemetry **OFF**.
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Reports
|
|
165
|
+
|
|
166
|
+
TanML generates audit-ready Word reports (.docx) programmatically:
|
|
167
|
+
|
|
168
|
+
* **Model Development Report** — Cross-validation metrics, diagnostics, and performance summary
|
|
169
|
+
* **Model Evaluation Report** — Train/Test comparison, drift analysis, stress testing, SHAP explainability
|
|
170
|
+
* **Feature Power Ranking Report** — Feature importance scores, correlation analysis
|
|
171
|
+
|
|
172
|
+
Reports are generated via `tanml/ui/reports/generators.py` and exported directly from the UI.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Data Privacy
|
|
177
|
+
|
|
178
|
+
- TanML runs locally; no data is sent to external services.
|
|
179
|
+
- Telemetry is disabled by default (and can be forced off via `--no-telemetry`).
|
|
180
|
+
- UI artifacts and reports are written under `./tanml_runs/<session>/` in your working directory.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Troubleshooting
|
|
185
|
+
|
|
186
|
+
* **Page didn’t open?** Visit `http://127.0.0.1:8501` or run `tanml ui --port 9000`.
|
|
187
|
+
* **Large CSVs are slow/heavy?** Prefer **Parquet**; CSV → DataFrame can use several GB RAM.
|
|
188
|
+
* **Artifacts missing?** Check `./tanml_runs/<session>/artifacts/`.
|
|
189
|
+
* **Corporate networks:** use `tanml ui --public` to share on LAN.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Contributing
|
|
194
|
+
|
|
195
|
+
We welcome issues and PRs!
|
|
196
|
+
|
|
197
|
+
- Create a virtual environment and install dev extras:
|
|
198
|
+
- `python -m venv .venv && source .venv/bin/activate` (or `\.venv\Scripts\activate` on Windows)
|
|
199
|
+
- `pip install -e .[dev]`
|
|
200
|
+
- Format/lint: `black . && isort .`
|
|
201
|
+
- Run tests: `pytest`
|
|
202
|
+
|
|
203
|
+
Before opening a PR, please describe the change and include a brief test or reproduction steps where applicable.
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## License & Citation
|
|
208
|
+
|
|
209
|
+
**License:** MIT. See [LICENSE](https://github.com/tdlabs-ai/tanml/blob/main/LICENSE).
|
|
210
|
+
SPDX-License-Identifier: MIT
|
|
211
|
+
|
|
212
|
+
© 2025 Tanmay Sah and Dolly Sah. You may use, modify, and distribute this software with appropriate attribution.
|
|
213
|
+
|
|
214
|
+
### How to cite
|
|
215
|
+
|
|
216
|
+
If TanML helps your work or publications, please cite:
|
|
217
|
+
|
|
218
|
+
> Sah, T., & Sah, D. (2025). *TanML: Automated Model Validation Toolkit for Tabular Machine Learning* [Software]. Zenodo. [https://doi.org/10.5281/zenodo.17317165](https://doi.org/10.5281/zenodo.17317165)
|
|
219
|
+
|
|
220
|
+
Or in BibTeX (version-agnostic):
|
|
221
|
+
|
|
222
|
+
```bibtex
|
|
223
|
+
@software{tanml_2025,
|
|
224
|
+
author = {Sah, Tanmay and Sah, Dolly},
|
|
225
|
+
title = {TanML: Automated Model Validation Toolkit for Tabular Machine Learning},
|
|
226
|
+
year = {2025},
|
|
227
|
+
publisher = {Zenodo},
|
|
228
|
+
doi = {10.5281/zenodo.17317165},
|
|
229
|
+
url = {https://doi.org/10.5281/zenodo.17317165},
|
|
230
|
+
license = {MIT}
|
|
231
|
+
}
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
A machine-readable citation file (`CITATION.cff`) is included for citation tools and GitHub’s “Cite this repository” button.
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
|
tanml-0.1.10/README.md
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# TanML: Automated Model Validation Toolkit for Tabular Machine Learning
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
[](https://pypi.org/project/tanml/)
|
|
8
|
+
[](https://pepy.tech/project/tanml)
|
|
9
|
+
[](https://opensource.org/licenses/MIT)
|
|
10
|
+
[](https://forms.gle/oG2JHvt7tLXE5Atu7)
|
|
11
|
+
[](https://doi.org/10.5281/zenodo.17317165)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
> **TanML bridges the gap between data science tools (for building models) and governance requirements (for validating them). It's not just an ML library—it's a validation workflow with built-in documentation.**
|
|
15
|
+
|
|
16
|
+
* **Status:** Beta (`0.x`)
|
|
17
|
+
* **License:** MIT
|
|
18
|
+
* **Python:** 3.8–3.13
|
|
19
|
+
* **OS:** Linux / macOS / Windows (incl. WSL)
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Table of Contents
|
|
24
|
+
|
|
25
|
+
- Why TanML?
|
|
26
|
+
- Install
|
|
27
|
+
- Quick Start (UI)
|
|
28
|
+
- Optional CLI Flags
|
|
29
|
+
- Reports
|
|
30
|
+
- Data Privacy
|
|
31
|
+
- Troubleshooting
|
|
32
|
+
- Contributing
|
|
33
|
+
- License & Citation
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Why TanML?
|
|
38
|
+
|
|
39
|
+
* **End-to-end workflow:** Data Profiling → Preprocessing → Feature Ranking → Model Development → Evaluation → Reports—all in one UI.
|
|
40
|
+
* **Audit-ready Word reports:** Generate editable .docx documents for stakeholders and compliance reviews.
|
|
41
|
+
* **Built for regulated industries:** Designed for MRM, credit risk, insurance, and SR 11-7 contexts.
|
|
42
|
+
* **No code required:** Fully UI-driven—no Python knowledge needed.
|
|
43
|
+
* **Robust evaluation:** Drift detection, stress testing, SHAP explainability, cluster coverage.
|
|
44
|
+
* **Works with your stack:** scikit-learn, XGBoost, LightGBM, CatBoost.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
We strongly recommend using a virtual environment to isolate dependencies:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Create and activate virtual environment
|
|
54
|
+
python -m venv .venv
|
|
55
|
+
source .venv/bin/activate # Linux/Mac
|
|
56
|
+
# .venv\Scripts\activate # Windows
|
|
57
|
+
|
|
58
|
+
# Install TanML
|
|
59
|
+
pip install tanml
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Quick Start (UI)
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
tanml ui
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
* Opens at **[http://127.0.0.1:8501](http://127.0.0.1:8501)**
|
|
69
|
+
* **Upload limit ~2 GB** (preconfigured)
|
|
70
|
+
* **Telemetry disabled by default**
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Optional CLI Flags
|
|
75
|
+
|
|
76
|
+
Most users just run `tanml ui`. These help on teams/servers:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Share on LAN
|
|
80
|
+
tanml ui --public
|
|
81
|
+
|
|
82
|
+
# Different port
|
|
83
|
+
tanml ui --port 9000
|
|
84
|
+
|
|
85
|
+
# Headless (server/CI; no auto-open browser)
|
|
86
|
+
tanml ui --headless
|
|
87
|
+
|
|
88
|
+
# Larger limit (e.g., 2 GB)
|
|
89
|
+
tanml ui --max-mb 2048
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Env var equivalents (Linux/macOS bash):
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
TANML_SERVER_ADDRESS=0.0.0.0 TANML_PORT=9000 TANML_MAX_MB=2048 tanml ui
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Windows PowerShell:
|
|
99
|
+
|
|
100
|
+
```powershell
|
|
101
|
+
$env:TANML_SERVER_ADDRESS="0.0.0.0"; $env:TANML_PORT="9000"; $env:TANML_MAX_MB="2048"; tanml ui
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Defaults:** address `127.0.0.1`, port `8501`, limit `2048 MB`, telemetry **OFF**.
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Reports
|
|
109
|
+
|
|
110
|
+
TanML generates audit-ready Word reports (.docx) programmatically:
|
|
111
|
+
|
|
112
|
+
* **Model Development Report** — Cross-validation metrics, diagnostics, and performance summary
|
|
113
|
+
* **Model Evaluation Report** — Train/Test comparison, drift analysis, stress testing, SHAP explainability
|
|
114
|
+
* **Feature Power Ranking Report** — Feature importance scores, correlation analysis
|
|
115
|
+
|
|
116
|
+
Reports are generated via `tanml/ui/reports/generators.py` and exported directly from the UI.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Data Privacy
|
|
121
|
+
|
|
122
|
+
- TanML runs locally; no data is sent to external services.
|
|
123
|
+
- Telemetry is disabled by default (and can be forced off via `--no-telemetry`).
|
|
124
|
+
- UI artifacts and reports are written under `./tanml_runs/<session>/` in your working directory.
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Troubleshooting
|
|
129
|
+
|
|
130
|
+
* **Page didn’t open?** Visit `http://127.0.0.1:8501` or run `tanml ui --port 9000`.
|
|
131
|
+
* **Large CSVs are slow/heavy?** Prefer **Parquet**; CSV → DataFrame can use several GB RAM.
|
|
132
|
+
* **Artifacts missing?** Check `./tanml_runs/<session>/artifacts/`.
|
|
133
|
+
* **Corporate networks:** use `tanml ui --public` to share on LAN.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Contributing
|
|
138
|
+
|
|
139
|
+
We welcome issues and PRs!
|
|
140
|
+
|
|
141
|
+
- Create a virtual environment and install dev extras:
|
|
142
|
+
- `python -m venv .venv && source .venv/bin/activate` (or `\.venv\Scripts\activate` on Windows)
|
|
143
|
+
- `pip install -e .[dev]`
|
|
144
|
+
- Format/lint: `black . && isort .`
|
|
145
|
+
- Run tests: `pytest`
|
|
146
|
+
|
|
147
|
+
Before opening a PR, please describe the change and include a brief test or reproduction steps where applicable.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## License & Citation
|
|
152
|
+
|
|
153
|
+
**License:** MIT. See [LICENSE](https://github.com/tdlabs-ai/tanml/blob/main/LICENSE).
|
|
154
|
+
SPDX-License-Identifier: MIT
|
|
155
|
+
|
|
156
|
+
© 2025 Tanmay Sah and Dolly Sah. You may use, modify, and distribute this software with appropriate attribution.
|
|
157
|
+
|
|
158
|
+
### How to cite
|
|
159
|
+
|
|
160
|
+
If TanML helps your work or publications, please cite:
|
|
161
|
+
|
|
162
|
+
> Sah, T., & Sah, D. (2025). *TanML: Automated Model Validation Toolkit for Tabular Machine Learning* [Software]. Zenodo. [https://doi.org/10.5281/zenodo.17317165](https://doi.org/10.5281/zenodo.17317165)
|
|
163
|
+
|
|
164
|
+
Or in BibTeX (version-agnostic):
|
|
165
|
+
|
|
166
|
+
```bibtex
|
|
167
|
+
@software{tanml_2025,
|
|
168
|
+
author = {Sah, Tanmay and Sah, Dolly},
|
|
169
|
+
title = {TanML: Automated Model Validation Toolkit for Tabular Machine Learning},
|
|
170
|
+
year = {2025},
|
|
171
|
+
publisher = {Zenodo},
|
|
172
|
+
doi = {10.5281/zenodo.17317165},
|
|
173
|
+
url = {https://doi.org/10.5281/zenodo.17317165},
|
|
174
|
+
license = {MIT}
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
A machine-readable citation file (`CITATION.cff`) is included for citation tools and GitHub’s “Cite this repository” button.
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tanml"
|
|
7
|
+
version = "0.1.10"
|
|
8
|
+
description = "Automated validation toolkit for tabular ML models—MRM, credit risk, insurance, and other regulated use cases."
|
|
9
|
+
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
keywords = [
|
|
16
|
+
"model validation",
|
|
17
|
+
"model risk management",
|
|
18
|
+
"model governance",
|
|
19
|
+
"SR 11-7",
|
|
20
|
+
"tabular ML",
|
|
21
|
+
"credit risk",
|
|
22
|
+
"insurance analytics",
|
|
23
|
+
"explainability",
|
|
24
|
+
"XAI",
|
|
25
|
+
"SHAP",
|
|
26
|
+
"stress testing",
|
|
27
|
+
"reporting",
|
|
28
|
+
"docx",
|
|
29
|
+
"streamlit",
|
|
30
|
+
"xgboost",
|
|
31
|
+
"lightgbm",
|
|
32
|
+
"catboost"
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
classifiers = [
|
|
38
|
+
"Development Status :: 4 - Beta",
|
|
39
|
+
"Intended Audience :: Science/Research",
|
|
40
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
41
|
+
"Natural Language :: English",
|
|
42
|
+
"Operating System :: OS Independent",
|
|
43
|
+
"Programming Language :: Python",
|
|
44
|
+
"Programming Language :: Python :: 3",
|
|
45
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
46
|
+
"Programming Language :: Python :: 3.8",
|
|
47
|
+
"Programming Language :: Python :: 3.9",
|
|
48
|
+
"Programming Language :: Python :: 3.10",
|
|
49
|
+
"Programming Language :: Python :: 3.11",
|
|
50
|
+
"Programming Language :: Python :: 3.12",
|
|
51
|
+
"Programming Language :: Python :: 3.13",
|
|
52
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
53
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
54
|
+
"Topic :: Scientific/Engineering :: Visualization"
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
authors = [
|
|
59
|
+
{ name = "Tanmay Sah" },
|
|
60
|
+
{ name = "Dolly Sah" }
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
maintainers = [
|
|
64
|
+
{ name = "Tanmay Sah" },
|
|
65
|
+
{ name = "Dolly Sah" }
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
dependencies = [
|
|
69
|
+
"numpy>=1.26",
|
|
70
|
+
"scipy>=1.11",
|
|
71
|
+
"pandas>=2.0",
|
|
72
|
+
"scikit-learn>=1.3",
|
|
73
|
+
"statsmodels>=0.14",
|
|
74
|
+
"xgboost>=2.0",
|
|
75
|
+
"lightgbm>=4.3",
|
|
76
|
+
"catboost>=1.2",
|
|
77
|
+
"shap>=0.44",
|
|
78
|
+
"numba>=0.58",
|
|
79
|
+
"matplotlib>=3.8",
|
|
80
|
+
"seaborn>=0.13",
|
|
81
|
+
"Pillow>=10.0",
|
|
82
|
+
"python-docx>=1.1.2",
|
|
83
|
+
|
|
84
|
+
"pyarrow>=14.0",
|
|
85
|
+
"openpyxl>=3.1",
|
|
86
|
+
"pyreadstat>=1.2",
|
|
87
|
+
"streamlit>=1.36"
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
[project.optional-dependencies]
|
|
92
|
+
dev = ["pytest", "black", "isort"]
|
|
93
|
+
|
|
94
|
+
[project.scripts]
|
|
95
|
+
tanml = "tanml.cli.main:main"
|
|
96
|
+
|
|
97
|
+
[project.urls]
|
|
98
|
+
Homepage = "https://github.com/tdlabs-ai/tanml"
|
|
99
|
+
Source = "https://github.com/tdlabs-ai/tanml"
|
|
100
|
+
Issues = "https://github.com/tdlabs-ai/tanml/issues"
|
|
101
|
+
Documentation = "https://github.com/tdlabs-ai/tanml#readme"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
[tool.setuptools]
|
|
105
|
+
include-package-data = true
|
|
106
|
+
|
|
107
|
+
[tool.setuptools.packages.find]
|
|
108
|
+
where = ["."]
|
|
109
|
+
include = ["tanml*"]
|
|
110
|
+
|
|
111
|
+
[tool.setuptools.package-data]
|
|
112
|
+
"tanml.ui.assets" = ["*.png", "*.jpg", "*.svg"]
|
tanml-0.1.10/setup.cfg
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# tanml/__init__.py
|
|
2
|
+
"""
|
|
3
|
+
TanML - Industrial-Grade Model Validation Framework
|
|
4
|
+
|
|
5
|
+
TanML provides comprehensive model validation, testing, and reporting
|
|
6
|
+
for machine learning models in production environments.
|
|
7
|
+
|
|
8
|
+
Quick Start:
|
|
9
|
+
# Launch the UI
|
|
10
|
+
tanml ui
|
|
11
|
+
|
|
12
|
+
# Or run checks programmatically
|
|
13
|
+
from tanml.checks.stress_test import StressTestCheck
|
|
14
|
+
from tanml.checks.explainability.shap_check import SHAPCheck
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.8"
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"__version__",
|
|
21
|
+
]
|
|
22
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# tanml/analysis/__init__.py
|
|
2
|
+
"""
|
|
3
|
+
Analysis module for TanML.
|
|
4
|
+
|
|
5
|
+
This module contains reusable business logic for data analysis,
|
|
6
|
+
separated from the UI layer for better maintainability and testability.
|
|
7
|
+
|
|
8
|
+
Modules:
|
|
9
|
+
- drift: Feature drift analysis (PSI, KS statistics)
|
|
10
|
+
- clustering: Input cluster coverage analysis
|
|
11
|
+
- correlation: Feature correlation and VIF analysis
|
|
12
|
+
- benchmarking: Model comparison and benchmarking
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
from tanml.analysis import calculate_psi, calculate_ks
|
|
16
|
+
|
|
17
|
+
psi_scores = calculate_psi(train_df, test_df, numeric_cols)
|
|
18
|
+
ks_scores = calculate_ks(train_df, test_df, numeric_cols)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from tanml.analysis.drift import (
|
|
22
|
+
calculate_psi,
|
|
23
|
+
calculate_ks,
|
|
24
|
+
analyze_drift,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from tanml.analysis.clustering import (
|
|
28
|
+
analyze_cluster_coverage,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from tanml.analysis.correlation import (
|
|
32
|
+
calculate_correlation_matrix,
|
|
33
|
+
calculate_vif,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
# Drift analysis
|
|
38
|
+
"calculate_psi",
|
|
39
|
+
"calculate_ks",
|
|
40
|
+
"analyze_drift",
|
|
41
|
+
# Clustering
|
|
42
|
+
"analyze_cluster_coverage",
|
|
43
|
+
# Correlation
|
|
44
|
+
"calculate_correlation_matrix",
|
|
45
|
+
"calculate_vif",
|
|
46
|
+
]
|