tanml 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tanml might be problematic. Click here for more details.

Files changed (97) hide show
  1. {tanml-0.1.6 → tanml-0.1.7}/MANIFEST.in +0 -2
  2. tanml-0.1.7/PKG-INFO +164 -0
  3. tanml-0.1.7/README-pypi.md +107 -0
  4. tanml-0.1.7/README.md +148 -0
  5. tanml-0.1.7/pyproject.toml +112 -0
  6. tanml-0.1.7/tanml/__init__.py +1 -0
  7. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/cleaning_repro_runner.py +2 -2
  8. tanml-0.1.7/tanml/check_runners/correlation_runner.py +52 -0
  9. tanml-0.1.7/tanml/check_runners/explainability_runner.py +18 -0
  10. tanml-0.1.7/tanml/check_runners/logistic_stats_runner.py +207 -0
  11. tanml-0.1.7/tanml/check_runners/performance_runner.py +84 -0
  12. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/raw_data_runner.py +29 -14
  13. tanml-0.1.7/tanml/check_runners/regression_metrics_runner.py +195 -0
  14. tanml-0.1.7/tanml/check_runners/stress_test_runner.py +43 -0
  15. tanml-0.1.7/tanml/check_runners/vif_runner.py +60 -0
  16. tanml-0.1.7/tanml/checks/correlation.py +261 -0
  17. tanml-0.1.7/tanml/checks/explainability/shap_check.py +287 -0
  18. tanml-0.1.7/tanml/checks/logit_stats.py +191 -0
  19. tanml-0.1.7/tanml/checks/performance_classification.py +305 -0
  20. tanml-0.1.7/tanml/checks/raw_data.py +82 -0
  21. tanml-0.1.7/tanml/checks/regression_metrics.py +167 -0
  22. tanml-0.1.7/tanml/checks/stress_test.py +168 -0
  23. tanml-0.1.7/tanml/cli/main.py +99 -0
  24. {tanml-0.1.6 → tanml-0.1.7}/tanml/engine/check_agent_registry.py +20 -10
  25. tanml-0.1.7/tanml/engine/core_engine_agent.py +277 -0
  26. tanml-0.1.7/tanml/models/registry.py +329 -0
  27. tanml-0.1.7/tanml/report/report_builder.py +1263 -0
  28. tanml-0.1.7/tanml/report/templates/report_template_cls.docx +0 -0
  29. tanml-0.1.7/tanml/report/templates/report_template_reg.docx +0 -0
  30. tanml-0.1.7/tanml/ui/app.py +1205 -0
  31. tanml-0.1.7/tanml/utils/data_loader.py +107 -0
  32. tanml-0.1.7/tanml.egg-info/PKG-INFO +164 -0
  33. {tanml-0.1.6 → tanml-0.1.7}/tanml.egg-info/SOURCES.txt +9 -16
  34. tanml-0.1.7/tanml.egg-info/requires.txt +25 -0
  35. tanml-0.1.6/PKG-INFO +0 -317
  36. tanml-0.1.6/README.md +0 -284
  37. tanml-0.1.6/pyproject.toml +0 -54
  38. tanml-0.1.6/tanml/__init__.py +0 -1
  39. tanml-0.1.6/tanml/check_runners/correlation_runner.py +0 -15
  40. tanml-0.1.6/tanml/check_runners/explainability_runner.py +0 -28
  41. tanml-0.1.6/tanml/check_runners/logistic_stats_runner.py +0 -28
  42. tanml-0.1.6/tanml/check_runners/performance_runner.py +0 -28
  43. tanml-0.1.6/tanml/check_runners/stress_test_runner.py +0 -26
  44. tanml-0.1.6/tanml/check_runners/vif_runner.py +0 -54
  45. tanml-0.1.6/tanml/checks/correlation.py +0 -61
  46. tanml-0.1.6/tanml/checks/explainability/shap_check.py +0 -55
  47. tanml-0.1.6/tanml/checks/logit_stats.py +0 -59
  48. tanml-0.1.6/tanml/checks/raw_data.py +0 -47
  49. tanml-0.1.6/tanml/checks/stress_test.py +0 -64
  50. tanml-0.1.6/tanml/cli/arg_parser.py +0 -31
  51. tanml-0.1.6/tanml/cli/init_cmd.py +0 -8
  52. tanml-0.1.6/tanml/cli/main.py +0 -27
  53. tanml-0.1.6/tanml/cli/validate_cmd.py +0 -7
  54. tanml-0.1.6/tanml/config_templates/rules_multiple_models_datasets.yaml +0 -144
  55. tanml-0.1.6/tanml/config_templates/rules_one_dataset_segment_column.yaml +0 -140
  56. tanml-0.1.6/tanml/config_templates/rules_one_model_one_dataset.yaml +0 -143
  57. tanml-0.1.6/tanml/engine/core_engine_agent.py +0 -115
  58. tanml-0.1.6/tanml/engine/segmentation_agent.py +0 -118
  59. tanml-0.1.6/tanml/engine/validation_agent.py +0 -91
  60. tanml-0.1.6/tanml/report/report_builder.py +0 -230
  61. tanml-0.1.6/tanml/report/templates/report_template.docx +0 -0
  62. tanml-0.1.6/tanml/utils/data_loader.py +0 -17
  63. tanml-0.1.6/tanml/utils/model_loader.py +0 -35
  64. tanml-0.1.6/tanml/utils/r_loader.py +0 -30
  65. tanml-0.1.6/tanml/utils/sas_loader.py +0 -50
  66. tanml-0.1.6/tanml/utils/yaml_generator.py +0 -34
  67. tanml-0.1.6/tanml/utils/yaml_loader.py +0 -5
  68. tanml-0.1.6/tanml/validate.py +0 -209
  69. tanml-0.1.6/tanml.egg-info/PKG-INFO +0 -317
  70. tanml-0.1.6/tanml.egg-info/requires.txt +0 -21
  71. {tanml-0.1.6 → tanml-0.1.7}/LICENSE +0 -0
  72. {tanml-0.1.6 → tanml-0.1.7}/setup.cfg +0 -0
  73. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/__init__.py +0 -0
  74. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/base_runner.py +0 -0
  75. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/data_quality_runner.py +0 -0
  76. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/eda_runner.py +0 -0
  77. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/input_cluster_runner.py +0 -0
  78. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/model_meta_runner.py +0 -0
  79. {tanml-0.1.6 → tanml-0.1.7}/tanml/check_runners/rule_engine_runner.py +0 -0
  80. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/__init__.py +0 -0
  81. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/base.py +0 -0
  82. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/cleaning_repro.py +0 -0
  83. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/data_quality.py +0 -0
  84. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/eda.py +0 -0
  85. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/input_cluster.py +0 -0
  86. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/model_contents.py +0 -0
  87. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/model_meta.py +0 -0
  88. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/performance.py +0 -0
  89. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/rule_engine.py +0 -0
  90. {tanml-0.1.6 → tanml-0.1.7}/tanml/checks/vif.py +0 -0
  91. {tanml-0.1.6 → tanml-0.1.7}/tanml/cli/__init__.py +0 -0
  92. {tanml-0.1.6 → tanml-0.1.7}/tanml/config_templates/__init__.py +0 -0
  93. {tanml-0.1.6 → tanml-0.1.7}/tanml/engine/__init__.py +0 -0
  94. {tanml-0.1.6 → tanml-0.1.7}/tanml/utils/__init__.py +0 -0
  95. {tanml-0.1.6 → tanml-0.1.7}/tanml.egg-info/dependency_links.txt +0 -0
  96. {tanml-0.1.6 → tanml-0.1.7}/tanml.egg-info/entry_points.txt +0 -0
  97. {tanml-0.1.6 → tanml-0.1.7}/tanml.egg-info/top_level.txt +0 -0
@@ -1,6 +1,4 @@
1
1
  include README.md
2
2
  include LICENSE
3
3
  include pyproject.toml
4
-
5
4
  include tanml/report/templates/*.docx
6
- include tanml/config_templates/*.yaml
tanml-0.1.7/PKG-INFO ADDED
@@ -0,0 +1,164 @@
1
+ Metadata-Version: 2.4
2
+ Name: tanml
3
+ Version: 0.1.7
4
+ Summary: Automated validation toolkit for tabular ML models—MRM, credit risk, insurance, and other regulated use cases.
5
+ Author: Tanmay Sah, Dolly Sah
6
+ Maintainer: Tanmay Sah, Dolly Sah
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/tdlabs-ai/tanml
9
+ Project-URL: Source, https://github.com/tdlabs-ai/tanml
10
+ Project-URL: Issues, https://github.com/tdlabs-ai/tanml/issues
11
+ Project-URL: Documentation, https://github.com/tdlabs-ai/tanml#readme
12
+ Keywords: model validation,model risk management,model governance,SR 11-7,tabular ML,credit risk,insurance analytics,explainability,XAI,SHAP,stress testing,reporting,docx,streamlit,xgboost,lightgbm,catboost
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Intended Audience :: Financial and Insurance Industry
16
+ Classifier: Natural Language :: English
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3 :: Only
21
+ Classifier: Programming Language :: Python :: 3.8
22
+ Classifier: Programming Language :: Python :: 3.9
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
27
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
28
+ Classifier: Topic :: Scientific/Engineering :: Visualization
29
+ Requires-Python: >=3.8
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: numpy>=1.26
33
+ Requires-Dist: scipy>=1.11
34
+ Requires-Dist: pandas>=2.0
35
+ Requires-Dist: scikit-learn>=1.3
36
+ Requires-Dist: statsmodels>=0.14
37
+ Requires-Dist: xgboost>=2.0
38
+ Requires-Dist: lightgbm>=4.3
39
+ Requires-Dist: catboost>=1.2
40
+ Requires-Dist: shap>=0.44
41
+ Requires-Dist: numba>=0.58
42
+ Requires-Dist: matplotlib>=3.8
43
+ Requires-Dist: seaborn>=0.13
44
+ Requires-Dist: Pillow>=10.0
45
+ Requires-Dist: python-docx>=1.1.2
46
+ Requires-Dist: tzlocal>=5.0
47
+ Requires-Dist: tqdm>=4.66
48
+ Requires-Dist: pyarrow>=14.0
49
+ Requires-Dist: openpyxl>=3.1
50
+ Requires-Dist: pyreadstat>=1.2
51
+ Requires-Dist: streamlit>=1.36
52
+ Provides-Extra: dev
53
+ Requires-Dist: pytest; extra == "dev"
54
+ Requires-Dist: black; extra == "dev"
55
+ Requires-Dist: isort; extra == "dev"
56
+ Dynamic: license-file
57
+
58
+ # TanML: Automated Model Validation Toolkit for Tabular Machine Learning
59
+
60
+ [![Cite this repo](https://img.shields.io/badge/Cite-this_repo-blue)](https://github.com/tdlabs-ai/tanml#license--citation)
61
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
62
+ [![Downloads](https://pepy.tech/badge/tanml)](https://pepy.tech/project/tanml)
63
+
64
+ **TanML** validates tabular ML models with a zero-config **Streamlit UI** and exports an audit-ready, **editable Word report (.docx)**. It covers data quality, correlation/VIF, performance, explainability (SHAP), and robustness/stress tests—built for regulated settings (MRM, credit risk, insurance, etc.).
65
+
66
+ * **Status:** Beta (`0.x`)
67
+ * **License:** MIT
68
+ * **Python:** 3.8–3.12
69
+ * **OS:** Linux / macOS / Windows (incl. WSL)
70
+
71
+ ---
72
+
73
+ ## Why TanML?
74
+
75
+ * **Zero-config UI:** launch Streamlit, upload data, click **Run**—no YAML needed.
76
+ * **Audit-ready outputs:** tables/plots + a polished DOCX your stakeholders can edit.
77
+ * **Regulatory alignment:** supports common Model Risk Management themes (e.g., SR 11-7 style).
78
+ * **Works with your stack:** scikit-learn, XGBoost/LightGBM/CatBoost, etc.
79
+
80
+ ---
81
+
82
+ ## Install
83
+
84
+ ```bash
85
+ pip install tanml
86
+ ```
87
+
88
+ ## Quick Start (UI)
89
+
90
+ ```bash
91
+ tanml ui
92
+ ```
93
+
94
+ * Opens at **[http://127.0.0.1:8501](http://127.0.0.1:8501)**
95
+ * **Upload limit ~1 GB** (preconfigured)
96
+ * **Telemetry disabled by default**
97
+
98
+ ### In the app
99
+
100
+ 1. **Load data** — upload a cleaned CSV/XLSX/Parquet (optional: raw or separate Train/Test).
101
+ 2. **Select target & features** — target auto-suggested; features default to all non-target columns.
102
+ 3. **Pick a model** — choose library/algorithm (scikit-learn, XGBoost, LightGBM, CatBoost) and tweak params.
103
+ 4. **Run validation** — click **▶️ Refit & validate**.
104
+ 5. **Export** — click **⬇️ Download report** to get a **DOCX** (auto-selects classification/regression template).
105
+
106
+ **Outputs**
107
+
108
+ * Report: `./.ui_runs/<session>/tanml_report_*.docx`
109
+ * Artifacts (CSV/PNGs): `./.ui_runs/<session>/artifacts/*`
110
+
111
+ ---
112
+
113
+ ## What TanML Checks
114
+
115
+ * **Raw Data (optional):** rows/cols, missingness, duplicates, constant columns
116
+ * **Data Quality & EDA:** summaries, distributions
117
+ * **Correlation & Multicollinearity:** heatmap, top-pairs CSV, **VIF** table
118
+ * **Performance**
119
+
120
+ * **Classification:** AUC, PR-AUC, KS, decile lift, confusion
121
+ * **Regression:** R², MAE, MSE/RMSE, error stats
122
+ * **Explainability:** SHAP (auto explainer; configurable background size)
123
+ * **Robustness/Stress Tests:** feature perturbations → delta-metrics
124
+ * **Model Metadata:** model class, hyperparameters, features, training info
125
+
126
+ ---
127
+
128
+
129
+ ## Templates
130
+
131
+ TanML ships DOCX templates (packaged in wheel & sdist):
132
+
133
+ * `tanml/report/templates/report_template_cls.docx`
134
+ * `tanml/report/templates/report_template_reg.docx`
135
+
136
+ ---
137
+
138
+
139
+ ## License & Citation
140
+
141
+ **License:** MIT. See [LICENSE](https://github.com/tdlabs-ai/tanml/blob/main/LICENSE).
142
+ SPDX-License-Identifier: MIT
143
+
144
+ © 2025 Tanmay Sah and Dolly Sah. You may use, modify, and distribute this software with appropriate attribution.
145
+
146
+ ### How to cite
147
+
148
+ If TanML helps your work or publications, please cite:
149
+
150
+ > Sah, T., & Sah, D. (2025). *TanML: Automated Model Validation Toolkit for Tabular Machine Learning* [Software]. Available at https://github.com/tdlabs-ai/tanml
151
+
152
+ Or in BibTeX (version-agnostic):
153
+
154
+ ```bibtex
155
+ @misc{tanml,
156
+ author = {Sah, Tanmay and Sah, Dolly},
157
+ title = {TanML: Automated Model Validation Toolkit for Tabular Machine Learning},
158
+ year = {2025},
159
+ note = {Software; MIT License},
160
+ url = {https://github.com/tdlabs-ai/tanml}
161
+ }
162
+ ```
163
+
164
+ A machine-readable citation file (`CITATION.cff`) is included for citation tools and GitHub’s “Cite this repository” button.
@@ -0,0 +1,107 @@
1
+ # TanML: Automated Model Validation Toolkit for Tabular Machine Learning
2
+
3
+ [![Cite this repo](https://img.shields.io/badge/Cite-this_repo-blue)](https://github.com/tdlabs-ai/tanml#license--citation)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
5
+ [![Downloads](https://pepy.tech/badge/tanml)](https://pepy.tech/project/tanml)
6
+
7
+ **TanML** validates tabular ML models with a zero-config **Streamlit UI** and exports an audit-ready, **editable Word report (.docx)**. It covers data quality, correlation/VIF, performance, explainability (SHAP), and robustness/stress tests—built for regulated settings (MRM, credit risk, insurance, etc.).
8
+
9
+ * **Status:** Beta (`0.x`)
10
+ * **License:** MIT
11
+ * **Python:** 3.8–3.12
12
+ * **OS:** Linux / macOS / Windows (incl. WSL)
13
+
14
+ ---
15
+
16
+ ## Why TanML?
17
+
18
+ * **Zero-config UI:** launch Streamlit, upload data, click **Run**—no YAML needed.
19
+ * **Audit-ready outputs:** tables/plots + a polished DOCX your stakeholders can edit.
20
+ * **Regulatory alignment:** supports common Model Risk Management themes (e.g., SR 11-7 style).
21
+ * **Works with your stack:** scikit-learn, XGBoost/LightGBM/CatBoost, etc.
22
+
23
+ ---
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install tanml
29
+ ```
30
+
31
+ ## Quick Start (UI)
32
+
33
+ ```bash
34
+ tanml ui
35
+ ```
36
+
37
+ * Opens at **[http://127.0.0.1:8501](http://127.0.0.1:8501)**
38
+ * **Upload limit ~1 GB** (preconfigured)
39
+ * **Telemetry disabled by default**
40
+
41
+ ### In the app
42
+
43
+ 1. **Load data** — upload a cleaned CSV/XLSX/Parquet (optional: raw or separate Train/Test).
44
+ 2. **Select target & features** — target auto-suggested; features default to all non-target columns.
45
+ 3. **Pick a model** — choose library/algorithm (scikit-learn, XGBoost, LightGBM, CatBoost) and tweak params.
46
+ 4. **Run validation** — click **▶️ Refit & validate**.
47
+ 5. **Export** — click **⬇️ Download report** to get a **DOCX** (auto-selects classification/regression template).
48
+
49
+ **Outputs**
50
+
51
+ * Report: `./.ui_runs/<session>/tanml_report_*.docx`
52
+ * Artifacts (CSV/PNGs): `./.ui_runs/<session>/artifacts/*`
53
+
54
+ ---
55
+
56
+ ## What TanML Checks
57
+
58
+ * **Raw Data (optional):** rows/cols, missingness, duplicates, constant columns
59
+ * **Data Quality & EDA:** summaries, distributions
60
+ * **Correlation & Multicollinearity:** heatmap, top-pairs CSV, **VIF** table
61
+ * **Performance**
62
+
63
+ * **Classification:** AUC, PR-AUC, KS, decile lift, confusion
64
+ * **Regression:** R², MAE, MSE/RMSE, error stats
65
+ * **Explainability:** SHAP (auto explainer; configurable background size)
66
+ * **Robustness/Stress Tests:** feature perturbations → delta-metrics
67
+ * **Model Metadata:** model class, hyperparameters, features, training info
68
+
69
+ ---
70
+
71
+
72
+ ## Templates
73
+
74
+ TanML ships DOCX templates (packaged in wheel & sdist):
75
+
76
+ * `tanml/report/templates/report_template_cls.docx`
77
+ * `tanml/report/templates/report_template_reg.docx`
78
+
79
+ ---
80
+
81
+
82
+ ## License & Citation
83
+
84
+ **License:** MIT. See [LICENSE](https://github.com/tdlabs-ai/tanml/blob/main/LICENSE).
85
+ SPDX-License-Identifier: MIT
86
+
87
+ © 2025 Tanmay Sah and Dolly Sah. You may use, modify, and distribute this software with appropriate attribution.
88
+
89
+ ### How to cite
90
+
91
+ If TanML helps your work or publications, please cite:
92
+
93
+ > Sah, T., & Sah, D. (2025). *TanML: Automated Model Validation Toolkit for Tabular Machine Learning* [Software]. Available at https://github.com/tdlabs-ai/tanml
94
+
95
+ Or in BibTeX (version-agnostic):
96
+
97
+ ```bibtex
98
+ @misc{tanml,
99
+ author = {Sah, Tanmay and Sah, Dolly},
100
+ title = {TanML: Automated Model Validation Toolkit for Tabular Machine Learning},
101
+ year = {2025},
102
+ note = {Software; MIT License},
103
+ url = {https://github.com/tdlabs-ai/tanml}
104
+ }
105
+ ```
106
+
107
+ A machine-readable citation file (`CITATION.cff`) is included for citation tools and GitHub’s “Cite this repository” button.
tanml-0.1.7/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # TanML: Automated Model Validation Toolkit for Tabular Machine Learning
2
+
3
+ [![Cite this repo](https://img.shields.io/badge/Cite-this_repo-blue)](https://github.com/tdlabs-ai/tanml#license--citation)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
5
+ [![Downloads](https://pepy.tech/badge/tanml)](https://pepy.tech/project/tanml)
6
+
7
+ **TanML** validates tabular ML models with a zero-config **Streamlit UI** and exports an audit-ready, **editable Word report (.docx)**. It covers data quality, correlation/VIF, performance, explainability (SHAP), and robustness/stress tests—built for regulated settings (MRM, credit risk, insurance, etc.).
8
+
9
+ * **Status:** Beta (`0.x`)
10
+ * **License:** MIT
11
+ * **Python:** 3.8–3.12
12
+ * **OS:** Linux / macOS / Windows (incl. WSL)
13
+
14
+ ---
15
+
16
+ ## Why TanML?
17
+
18
+ * **Zero-config UI:** launch Streamlit, upload data, click **Run**—no YAML needed.
19
+ * **Audit-ready outputs:** tables/plots + a polished DOCX your stakeholders can edit.
20
+ * **Regulatory alignment:** supports common Model Risk Management themes (e.g., SR 11-7 style).
21
+ * **Works with your stack:** scikit-learn, XGBoost/LightGBM/CatBoost, etc.
22
+
23
+ ---
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install tanml
29
+ ```
30
+
31
+ ## Quick Start (UI)
32
+
33
+ ```bash
34
+ tanml ui
35
+ ```
36
+
37
+ * Opens at **[http://127.0.0.1:8501](http://127.0.0.1:8501)**
38
+ * **Upload limit ~1 GB** (preconfigured)
39
+ * **Telemetry disabled by default**
40
+
41
+ ### In the app
42
+
43
+ 1. **Load data** — upload a cleaned CSV/XLSX/Parquet (optional: raw or separate Train/Test).
44
+ 2. **Select target & features** — target auto-suggested; features default to all non-target columns.
45
+ 3. **Pick a model** — choose library/algorithm (scikit-learn, XGBoost, LightGBM, CatBoost) and tweak params.
46
+ 4. **Run validation** — click **▶️ Refit & validate**.
47
+ 5. **Export** — click **⬇️ Download report** to get a **DOCX** (auto-selects classification/regression template).
48
+
49
+ **Outputs**
50
+
51
+ * Report: `./.ui_runs/<session>/tanml_report_*.docx`
52
+ * Artifacts (CSV/PNGs): `./.ui_runs/<session>/artifacts/*`
53
+
54
+ ---
55
+
56
+ ## What TanML Checks
57
+
58
+ * **Raw Data (optional):** rows/cols, missingness, duplicates, constant columns
59
+ * **Data Quality & EDA:** summaries, distributions
60
+ * **Correlation & Multicollinearity:** heatmap, top-pairs CSV, **VIF** table
61
+ * **Performance**
62
+
63
+ * **Classification:** AUC, PR-AUC, KS, decile lift, confusion
64
+ * **Regression:** R², MAE, MSE/RMSE, error stats
65
+ * **Explainability:** SHAP (auto explainer; configurable background size)
66
+ * **Robustness/Stress Tests:** feature perturbations → delta-metrics
67
+ * **Model Metadata:** model class, hyperparameters, features, training info
68
+
69
+ ---
70
+
71
+ ## Optional CLI Flags
72
+
73
+ Most users just run `tanml ui`. These help on teams/servers:
74
+
75
+ ```bash
76
+ # Share on LAN
77
+ tanml ui --public
78
+
79
+ # Different port
80
+ tanml ui --port 9000
81
+
82
+ # Headless (server/CI; no auto-open browser)
83
+ tanml ui --headless
84
+
85
+ # Larger limit (e.g., 2 GB)
86
+ tanml ui --max-mb 2048
87
+ ```
88
+
89
+ Env var equivalents (Linux/macOS bash):
90
+
91
+ ```bash
92
+ TANML_SERVER_ADDRESS=0.0.0.0 TANML_PORT=9000 TANML_MAX_MB=2048 tanml ui
93
+ ```
94
+
95
+ Windows PowerShell:
96
+
97
+ ```powershell
98
+ $env:TANML_SERVER_ADDRESS="0.0.0.0"; $env:TANML_PORT="9000"; $env:TANML_MAX_MB="2048"; tanml ui
99
+ ```
100
+
101
+ **Defaults:** address `127.0.0.1`, port `8501`, limit `1024 MB`, telemetry **OFF**.
102
+
103
+ ---
104
+
105
+ ## Templates
106
+
107
+ TanML ships DOCX templates (packaged in wheel & sdist):
108
+
109
+ * `tanml/report/templates/report_template_cls.docx`
110
+ * `tanml/report/templates/report_template_reg.docx`
111
+
112
+ ---
113
+
114
+ ## Troubleshooting
115
+
116
+ * **Page didn’t open?** Visit `http://127.0.0.1:8501` or run `tanml ui --port 9000`.
117
+ * **Large CSVs are slow/heavy?** Prefer **Parquet**; CSV → DataFrame can use several GB RAM.
118
+ * **Artifacts missing?** Check `./.ui_runs/<session>/artifacts/`.
119
+ * **Corporate networks:** use `tanml ui --public` to share on LAN.
120
+
121
+ ---
122
+
123
+ ## License & Citation
124
+
125
+ **License:** MIT. See [LICENSE](https://github.com/tdlabs-ai/tanml/blob/main/LICENSE).
126
+ SPDX-License-Identifier: MIT
127
+
128
+ © 2025 Tanmay Sah and Dolly Sah. You may use, modify, and distribute this software with appropriate attribution.
129
+
130
+ ### How to cite
131
+
132
+ If TanML helps your work or publications, please cite:
133
+
134
+ > Sah, T., & Sah, D. (2025). *TanML: Automated Model Validation Toolkit for Tabular Machine Learning* [Software]. Available at https://github.com/tdlabs-ai/tanml
135
+
136
+ Or in BibTeX (version-agnostic):
137
+
138
+ ```bibtex
139
+ @misc{tanml,
140
+ author = {Sah, Tanmay and Sah, Dolly},
141
+ title = {TanML: Automated Model Validation Toolkit for Tabular Machine Learning},
142
+ year = {2025},
143
+ note = {Software; MIT License},
144
+ url = {https://github.com/tdlabs-ai/tanml}
145
+ }
146
+ ```
147
+
148
+ A machine-readable citation file (`CITATION.cff`) is included for citation tools and GitHub’s “Cite this repository” button.
@@ -0,0 +1,112 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tanml"
7
+ version = "0.1.7"
8
+ description = "Automated validation toolkit for tabular ML models—MRM, credit risk, insurance, and other regulated use cases."
9
+ readme = { file = "README-pypi.md", content-type = "text/markdown" }
10
+ requires-python = ">=3.8"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+
14
+
15
+ keywords = [
16
+ "model validation",
17
+ "model risk management",
18
+ "model governance",
19
+ "SR 11-7",
20
+ "tabular ML",
21
+ "credit risk",
22
+ "insurance analytics",
23
+ "explainability",
24
+ "XAI",
25
+ "SHAP",
26
+ "stress testing",
27
+ "reporting",
28
+ "docx",
29
+ "streamlit",
30
+ "xgboost",
31
+ "lightgbm",
32
+ "catboost"
33
+ ]
34
+
35
+
36
+
37
+ classifiers = [
38
+ "Development Status :: 4 - Beta",
39
+ "Intended Audience :: Science/Research",
40
+ "Intended Audience :: Financial and Insurance Industry",
41
+ "Natural Language :: English",
42
+ "Operating System :: OS Independent",
43
+ "Programming Language :: Python",
44
+ "Programming Language :: Python :: 3",
45
+ "Programming Language :: Python :: 3 :: Only",
46
+ "Programming Language :: Python :: 3.8",
47
+ "Programming Language :: Python :: 3.9",
48
+ "Programming Language :: Python :: 3.10",
49
+ "Programming Language :: Python :: 3.11",
50
+ "Programming Language :: Python :: 3.12",
51
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
52
+ "Topic :: Scientific/Engineering :: Information Analysis",
53
+ "Topic :: Scientific/Engineering :: Visualization"
54
+ ]
55
+
56
+
57
+ authors = [
58
+ { name = "Tanmay Sah" },
59
+ { name = "Dolly Sah" }
60
+ ]
61
+
62
+ maintainers = [
63
+ { name = "Tanmay Sah" },
64
+ { name = "Dolly Sah" }
65
+ ]
66
+
67
+ dependencies = [
68
+ "numpy>=1.26",
69
+ "scipy>=1.11",
70
+ "pandas>=2.0",
71
+ "scikit-learn>=1.3",
72
+ "statsmodels>=0.14",
73
+ "xgboost>=2.0",
74
+ "lightgbm>=4.3",
75
+ "catboost>=1.2",
76
+ "shap>=0.44",
77
+ "numba>=0.58",
78
+ "matplotlib>=3.8",
79
+ "seaborn>=0.13",
80
+ "Pillow>=10.0",
81
+ "python-docx>=1.1.2",
82
+ "tzlocal>=5.0",
83
+ "tqdm>=4.66",
84
+ "pyarrow>=14.0",
85
+ "openpyxl>=3.1",
86
+ "pyreadstat>=1.2",
87
+ "streamlit>=1.36"
88
+ ]
89
+
90
+
91
+ [project.optional-dependencies]
92
+ dev = ["pytest", "black", "isort"]
93
+
94
+ [project.scripts]
95
+ tanml = "tanml.cli.main:main"
96
+
97
+ [project.urls]
98
+ Homepage = "https://github.com/tdlabs-ai/tanml"
99
+ Source = "https://github.com/tdlabs-ai/tanml"
100
+ Issues = "https://github.com/tdlabs-ai/tanml/issues"
101
+ Documentation = "https://github.com/tdlabs-ai/tanml#readme"
102
+
103
+
104
+ [tool.setuptools]
105
+ include-package-data = true
106
+
107
+ [tool.setuptools.packages.find]
108
+ where = ["."]
109
+ include = ["tanml*"]
110
+
111
+ [tool.setuptools.package-data]
112
+ "tanml.report.templates" = ["*.docx"]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.7"
@@ -3,12 +3,12 @@ from tanml.checks.cleaning_repro import CleaningReproCheck
3
3
 
4
4
  def run_cleaning_repro_check(model, X_train, X_test, y_train, y_test,
5
5
  config, cleaned_data, *args, **kwargs):
6
- # honour rules.yaml toggle
6
+
7
7
  if not config.get("rules", {}).get("CleaningReproCheck", {}).get("enabled", True):
8
8
  print("ℹ️ CleaningReproCheck skipped (disabled in rules.yaml)")
9
9
  return None
10
10
 
11
- # raw_df can come from rules.yaml *or* via kwargs (passed by ValidationEngine)
11
+
12
12
  raw_data = config.get("raw_data") or kwargs.get("raw_df")
13
13
  if raw_data is None:
14
14
  print("⚠️ Skipping CleaningReproCheck — raw_data missing in config and kwargs")
@@ -0,0 +1,52 @@
1
+ # tanml/check_runners/correlation_runner.py
2
+ from __future__ import annotations
3
+ import os
4
+ from typing import Any, Dict, List
5
+ import pandas as pd
6
+
7
+ from tanml.checks.correlation import CorrelationCheck
8
+
9
+ def _resolve_outdir(config: Dict[str, Any]) -> str:
10
+ base = (config.get("options") or {}).get("save_artifacts_dir") or "reports"
11
+ outdir = os.path.join(base, "correlation")
12
+ os.makedirs(outdir, exist_ok=True)
13
+ return outdir
14
+
15
+ def _df_features_only(cleaned_df: pd.DataFrame) -> pd.DataFrame:
16
+ if cleaned_df is None or cleaned_df.empty:
17
+ return cleaned_df
18
+ cols = list(cleaned_df.columns)
19
+ if len(cols) >= 2:
20
+ return cleaned_df[cols[:-1]]
21
+ return cleaned_df
22
+
23
+ def CorrelationCheckRunner(
24
+ model,
25
+ X_train,
26
+ X_test,
27
+ y_train,
28
+ y_test,
29
+ config: Dict[str, Any],
30
+ cleaned_df: pd.DataFrame,
31
+ raw_df: pd.DataFrame | None = None,
32
+ ):
33
+ ui_block: Dict[str, Any] = (config.get("CorrelationCheck") or {})
34
+ legacy: Dict[str, Any] = (config.get("correlation") or {})
35
+ if not bool(ui_block.get("enabled", legacy.get("enabled", True))):
36
+ return None
37
+
38
+ df = _df_features_only(cleaned_df)
39
+ cfg: Dict[str, Any] = {
40
+ "method": ui_block.get("method", "pearson"),
41
+ "high_corr_threshold": float(ui_block.get("high_corr_threshold", 0.8)),
42
+ "heatmap_max_features_default": int(ui_block.get("heatmap_max_features_default", 20)),
43
+ "heatmap_max_features_limit": int(ui_block.get("heatmap_max_features_limit", 60)),
44
+ "subset_strategy": ui_block.get("subset_strategy", "cluster"),
45
+ "sample_rows": int(ui_block.get("sample_rows", 150_000)),
46
+ "seed": int(ui_block.get("seed", 42)),
47
+ "save_csv": True,
48
+ "save_fig": True,
49
+ "appendix_csv_cap": ui_block.get("appendix_csv_cap", None),
50
+ }
51
+ outdir = _resolve_outdir(config)
52
+ return CorrelationCheck(cleaned_data=df, cfg=cfg, output_dir=outdir).run()
@@ -0,0 +1,18 @@
1
+ # explainability runner
2
+ from tanml.checks.explainability.shap_check import SHAPCheck
3
+
4
+ def run_shap_check(model, X_train, X_test, y_train, y_test, rule_config, cleaned_df, *args, **kwargs):
5
+ try:
6
+ cfg_shapcheck = (rule_config or {}).get("SHAPCheck", {}) or {}
7
+ cfg_expl = (rule_config or {}).get("explainability", {}).get("shap", {}) or {}
8
+ enabled = cfg_shapcheck.get("enabled", cfg_expl.get("enabled", True))
9
+ if not enabled:
10
+ print("ℹ️ SHAPCheck skipped (disabled)")
11
+ return {"SHAPCheck": {"skipped": True}}
12
+
13
+ check = SHAPCheck(model, X_train, X_test, y_train, y_test, rule_config=rule_config, cleaned_df=cleaned_df)
14
+ result = check.run()
15
+ return {"SHAPCheck": result}
16
+ except Exception as e:
17
+ print(f"⚠️ SHAPCheck failed: {e}")
18
+ return {"SHAPCheck": {"status": "error", "error": str(e)}}