PyPI - tanml - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

tanml 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tanml might be problematic. Click here for more details.

Files changed (49) hide show

tanml/__init__.py +1 -1
tanml/check_runners/cleaning_repro_runner.py +2 -2
tanml/check_runners/correlation_runner.py +49 -12
tanml/check_runners/explainability_runner.py +12 -22
tanml/check_runners/logistic_stats_runner.py +196 -17
tanml/check_runners/performance_runner.py +82 -26
tanml/check_runners/raw_data_runner.py +29 -14
tanml/check_runners/regression_metrics_runner.py +195 -0
tanml/check_runners/stress_test_runner.py +23 -6
tanml/check_runners/vif_runner.py +33 -27
tanml/checks/correlation.py +241 -41
tanml/checks/explainability/shap_check.py +261 -29
tanml/checks/logit_stats.py +186 -54
tanml/checks/performance_classification.py +305 -0
tanml/checks/raw_data.py +58 -23
tanml/checks/regression_metrics.py +167 -0
tanml/checks/stress_test.py +157 -53
tanml/cli/main.py +99 -27
tanml/engine/check_agent_registry.py +20 -10
tanml/engine/core_engine_agent.py +199 -37
tanml/models/registry.py +329 -0
tanml/report/report_builder.py +1180 -147
tanml/report/templates/report_template_cls.docx +0 -0
tanml/report/templates/report_template_reg.docx +0 -0
tanml/ui/app.py +1205 -0
tanml/utils/data_loader.py +105 -15
tanml-0.1.7.dist-info/METADATA +164 -0
tanml-0.1.7.dist-info/RECORD +54 -0
tanml/cli/arg_parser.py +0 -31
tanml/cli/init_cmd.py +0 -8
tanml/cli/validate_cmd.py +0 -7
tanml/config_templates/rules_multiple_models_datasets.yaml +0 -144
tanml/config_templates/rules_one_dataset_segment_column.yaml +0 -140
tanml/config_templates/rules_one_model_one_dataset.yaml +0 -143
tanml/engine/segmentation_agent.py +0 -118
tanml/engine/validation_agent.py +0 -91
tanml/report/templates/report_template.docx +0 -0
tanml/utils/model_loader.py +0 -35
tanml/utils/r_loader.py +0 -30
tanml/utils/sas_loader.py +0 -50
tanml/utils/yaml_generator.py +0 -34
tanml/utils/yaml_loader.py +0 -5
tanml/validate.py +0 -209
tanml-0.1.6.dist-info/METADATA +0 -317
tanml-0.1.6.dist-info/RECORD +0 -62
{tanml-0.1.6.dist-info → tanml-0.1.7.dist-info}/WHEEL +0 -0
{tanml-0.1.6.dist-info → tanml-0.1.7.dist-info}/entry_points.txt +0 -0
{tanml-0.1.6.dist-info → tanml-0.1.7.dist-info}/licenses/LICENSE +0 -0
{tanml-0.1.6.dist-info → tanml-0.1.7.dist-info}/top_level.txt +0 -0

tanml/engine/core_engine_agent.py CHANGED Viewed

@@ -1,22 +1,27 @@
-"""
-ValidationEngine – runs all registered check-runners and assembles a
-single results dictionary that the ReportBuilder / Jinja template expects.
-"""
+# tanml/engine/core_engine_agent.py
 from tanml.engine.check_agent_registry import CHECK_RUNNER_REGISTRY
-#from tanml.checks.cleaning_repro import CleaningReproCheck
+import numpy as np
+import pandas as pd
+try:
+    import statsmodels.api as sm
+    _SM_AVAILABLE = True
+except Exception:
+    _SM_AVAILABLE = False
 KEEP_AS_NESTED = {
     "DataQualityCheck",
     "StressTestCheck",
     "InputClusterCheck",
+    "InputClusterCoverageCheck",
     "RawDataCheck",
-    #"CleaningReproCheck",
     "SHAPCheck",
     "VIFCheck",
     "CorrelationCheck",
     "EDACheck",
+    # "RegressionMetrics",  # you can keep nested if desired
 }
@@ -30,8 +35,8 @@ class ValidationEngine:
         y_test,
         config,
         cleaned_data,
-        raw_df=None ,
-        ctx=None
+        raw_df=None,
+        ctx=None
     ):
         self.model = model
         self.X_train = X_train
@@ -40,15 +45,149 @@ class ValidationEngine:
         self.y_test = y_test
         self.config = config
         self.cleaned_data = cleaned_data
-        self.raw_df = raw_df
+        self.raw_df = raw_df
+        # allow resuming if config had check_results
         self.results = dict(config.get("check_results", {}))
         self.ctx = ctx or {}
-    def run_all_checks(self):
+        self.task_type = self._infer_task_type(self.y_train, config, model)
+    # --- better detection logic --------------------------------------------
+    @staticmethod
+    def _infer_task_type(y, config=None, model=None):
+        """
+        Decide if task is classification or regression.
+        Priority:
+        1. config["model"]["type"]
+        2. model._estimator_type (sklearn)
+        3. y values (unique count)
+        """
+        # 1. Config hint
+        try:
+            mtype = (config or {}).get("model", {}).get("type", "")
+            if isinstance(mtype, str):
+                mtype = mtype.lower()
+                if "class" in mtype:
+                    return "classification"
+                if "regress" in mtype:
+                    return "regression"
+        except Exception:
+            pass
+        # 2. Model introspection
+        try:
+            if hasattr(model, "_estimator_type"):
+                est = getattr(model, "_estimator_type", "")
+                if est == "classifier":
+                    return "classification"
+                if est == "regressor":
+                    return "regression"
+            if hasattr(model, "predict_proba") or hasattr(model, "decision_function"):
+                return "classification"
+        except Exception:
+            pass
+        # 3. Label based
+        try:
+            if isinstance(y, (pd.Series, pd.DataFrame)):
+                s = y.squeeze()
+            else:
+                s = np.asarray(y).reshape(-1)
+            unique_vals = pd.Series(s).dropna().unique()
+            # Heuristic: small discrete set -> classification
+            if pd.api.types.is_numeric_dtype(s):
+                if len(unique_vals) <= 10:
+                    return "classification"
+                return "regression"
+            else:
+                # non-numeric target -> classification
+                return "classification"
+        except Exception:
+            pass
+        # Fallback
+        return "classification"
+    # -----------------------------------------------------------------------
+    def _pick(self, *paths, default=None):
+        for path in paths:
+            cur = self.results
+            ok = True
+            for p in path:
+                if isinstance(cur, dict) and p in cur:
+                    cur = cur[p]
+                else:
+                    ok = False
+                    break
+            if ok:
+                return cur
+        return default
+    def _compute_linear_stats(self):
+        """
+        Optional: compute a statsmodels OLS summary + coefficient table for regression runs.
+        Writes results into self.results["LinearStats"].
+        """
+        if self.task_type != "regression":
+            return
+        if not _SM_AVAILABLE:
+            self.results["LinearStats"] = {
+                "error": "statsmodels not available; install `statsmodels` to see OLS summary."
+            }
+            return
+        try:
+            # add constant and fit OLS on TRAIN split to mirror sklearn fit
+            X = self.X_train
+            y = self.y_train
+            Xc = sm.add_constant(X, has_constant="add")
+            ols_model = sm.OLS(y, Xc, missing="drop")
+            ols_res = ols_model.fit()
+            # Build coefficient table (including intercept 'const')
+            params = ols_res.params
+            bse = ols_res.bse
+            tvals = ols_res.tvalues
+            pvals = ols_res.pvalues
+            ci = ols_res.conf_int(alpha=0.05)
+            ci.columns = ["ci_low", "ci_high"]
+            rows = []
+            for name in params.index:
+                rows.append({
+                    "feature": name,
+                    "coef": float(params[name]),
+                    "std err": float(bse.get(name, float("nan"))),
+                    "t": float(tvals.get(name, float("nan"))),
+                    "P>|t|": float(pvals.get(name, float("nan"))),
+                    "ci_low": float(ci.loc[name, "ci_low"]) if name in ci.index else None,
+                    "ci_high": float(ci.loc[name, "ci_high"]) if name in ci.index else None,
+                })
+            self.results["LinearStats"] = {
+                "summary_text": ols_res.summary().as_text(),
+                "coeff_table": rows,
+                "status": "ok",
+            }
+        except Exception as e:
+            self.results["LinearStats"] = {"error": f"OLS stats failed: {e}"}
+    # ------------------------------------------------------------
+    def run_all_checks(self, progress_callback=None):
+        self.results["task_type"] = self.task_type
         for check_name, runner_func in CHECK_RUNNER_REGISTRY.items():
             if check_name in self.config.get("skip_checks", []):
                 continue
+            if progress_callback:
+                try:
+                    progress_callback(f"Running {check_name}…")
+                except Exception:
+                    pass
             print(f"✅ Running {check_name}")
             try:
                 result = runner_func(
@@ -59,30 +198,45 @@ class ValidationEngine:
                     self.y_test,
                     self.config,
                     self.cleaned_data,
-                    raw_df=self.raw_df
+                    raw_df=self.raw_df
                 )
                 self._integrate(check_name, result)
             except Exception as e:
                 print(f"⚠️  {check_name} failed: {e}")
                 self.results[check_name] = {"error": str(e)}
-        # Add CleaningReproCheck manually
-        # if self.raw_df is not None:
-        #     print("✅ Running CleaningReproCheck")
-        #     try:
-        #         check = CleaningReproCheck(self.raw_df, self.cleaned_data)
-        #         self.results["CleaningReproCheck"] = check.run()
-        #     except Exception as e:
-        #         print(f"⚠️ CleaningReproCheck failed: {e}")
-        #         self.results["CleaningReproCheck"] = {"error": str(e)}
-        # else:
-        #     print("⚠️ Skipping CleaningReproCheck — raw_df not provided")
-        #     self.results["CleaningReproCheck"] = {"error": "raw_data not available"}
-        # convenience copy for template
+        # add OLS stats for regression (pretty coef table + p-values)
+        self._compute_linear_stats()
+        # -------- Build summary: TASK-AWARE --------
+        summary = {}
+        if self.task_type == "regression":
+            summary["rmse"] = self._pick(("RegressionMetrics", "rmse"))
+            summary["mae"]  = self._pick(("RegressionMetrics", "mae"))
+            summary["r2"]   = self._pick(("RegressionMetrics", "r2"))
+        else:
+            cls = self._pick(("performance", "classification", "summary")) or {}
+            summary["auc"]    = cls.get("auc")
+            summary["ks"]     = cls.get("ks")
+            summary["f1"]     = cls.get("f1")
+            summary["pr_auc"] = cls.get("pr_auc")
+        # PSI (optional)
+        summary["max_psi"] = self._pick(
+            ("PSICheck", "max_psi"),
+            ("PopulationStabilityCheck", "max_psi"),
+            ("max_psi",)
+        )
+        # Count failed checks
+        failed = 0
+        for k, v in self.results.items():
+            if isinstance(v, dict) and v.get("status") == "fail":
+                failed += 1
+        summary["rules_failed"] = failed
+        self.results["summary"] = summary
         self.results["check_results"] = dict(self.results)
         return self.results
@@ -91,25 +245,33 @@ class ValidationEngine:
         if not result:
             return
-        # Special flatten for LogisticStatsCheck
         if check_name == "LogisticStatsCheck":
             self.results.update(result)
             return
-        # If it's a simple object (rare), store as-is
         if not isinstance(result, dict):
             self.results[check_name] = result
             return
-        # Keep entire dict nested
-        if check_name in KEEP_AS_NESTED:
+        cluster_aliases = {
+            "InputClusterCoverageCheck",
+            "InputClusterCoverage",
+            "ClusterCoverageCheck",
+            "InputClustersCheck",
+        }
+        if check_name in cluster_aliases:
             self.results[check_name] = result
+            self.results["InputClusterCheck"] = result
+            return
+        if set(result.keys()) == {"InputClusterCheck"}:
+            self.results["InputClusterCheck"] = result["InputClusterCheck"]
             return
-        # If runner returns {"CheckName": {...}}, unwrap
-        if set(result.keys()) == {check_name}:
-            self.results[check_name] = result[check_name]
+        if check_name in KEEP_AS_NESTED:
+            self.results[check_name] = result
             return
-        # Default: merge into root
-        self.results.update(result)
+        if isinstance(result, dict):
+            self.results.update(result)
+            return

tanml/models/registry.py ADDED Viewed

@@ -0,0 +1,329 @@
+# tanml/models/registry.py
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, Optional, Tuple, Literal
+Task = Literal["classification", "regression"]
+@dataclass(frozen=True)
+class ModelSpec:
+    task: Task
+    import_path: str                      # e.g., "sklearn.ensemble.RandomForestClassifier"
+    defaults: Dict[str, Any] = field(default_factory=dict)
+    # UI schema: param -> (type, choices_or_None, help_or_None)
+    ui_schema: Dict[str, Tuple[str, Optional[Tuple[Any, ...]], Optional[str]]] = field(default_factory=dict)
+    aliases: Dict[str, str] = field(default_factory=dict)  # optional param alias map
+# -------------------- 20 MODELS --------------------
+_REGISTRY: Dict[Tuple[str, str], ModelSpec] = {
+    # -------- Classification (10) --------
+    ("sklearn", "LogisticRegression"): ModelSpec(
+        task="classification",
+        import_path="sklearn.linear_model.LogisticRegression",
+        defaults=dict(penalty="l2", solver="lbfgs", C=1.0, class_weight=None, max_iter=1000, random_state=42),
+        ui_schema={
+            "penalty": ("choice", ("l2", "l1"), "Regularization"),
+            "solver": ("choice", ("lbfgs", "liblinear", "saga"), "Solver"),
+            "C": ("float", None, "Inverse regularization strength"),
+            "class_weight": ("choice", (None, "balanced"), "Imbalance handling"),
+            "max_iter": ("int", None, "Max iterations"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "RandomForestClassifier"): ModelSpec(
+        task="classification",
+        import_path="sklearn.ensemble.RandomForestClassifier",
+        defaults=dict(n_estimators=400, max_depth=16, min_samples_split=2, min_samples_leaf=1,
+                      class_weight=None, random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Number of trees"),
+            "max_depth": ("int", None, "Tree depth (None=unbounded)"),
+            "min_samples_split": ("int", None, "Min samples to split"),
+            "min_samples_leaf": ("int", None, "Min samples per leaf"),
+            "class_weight": ("choice", (None, "balanced", "balanced_subsample"), "Imbalance"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("xgboost", "XGBClassifier"): ModelSpec(
+        task="classification",
+        import_path="xgboost.XGBClassifier",
+        defaults=dict(n_estimators=600, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8,
+                      min_child_weight=1, reg_lambda=1.0, tree_method="hist", random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Boosting rounds"),
+            "max_depth": ("int", None, "Tree depth"),
+            "learning_rate": ("float", None, "Eta"),
+            "subsample": ("float", None, "Row subsample"),
+            "colsample_bytree": ("float", None, "Column subsample"),
+            "min_child_weight": ("float", None, "Min child weight"),
+            "reg_lambda": ("float", None, "L2 regularization"),
+            "tree_method": ("choice", ("hist", "auto"), "Grow method"),
+            "random_state": ("int", None, "Seed"),
+            "n_jobs": ("int", None, "Threads"),
+        },
+    ),
+    ("lightgbm", "LGBMClassifier"): ModelSpec(
+        task="classification",
+        import_path="lightgbm.LGBMClassifier",
+        defaults=dict(n_estimators=800, num_leaves=31, max_depth=-1, learning_rate=0.05, subsample=0.8,
+                      colsample_bytree=0.8, min_child_samples=20, reg_lambda=1.0, random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Boosting rounds"),
+            "num_leaves": ("int", None, "Max leaves"),
+            "max_depth": ("int", None, "-1 = auto"),
+            "learning_rate": ("float", None, "Shrinkage"),
+            "subsample": ("float", None, "Row subsample"),
+            "colsample_bytree": ("float", None, "Column subsample"),
+            "min_child_samples": ("int", None, "Min child samples"),
+            "reg_lambda": ("float", None, "L2 reg"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "SVC"): ModelSpec(
+        task="classification",
+        import_path="sklearn.svm.SVC",
+        defaults=dict(C=1.0, gamma="scale", kernel="rbf", probability=True, class_weight=None, random_state=42),
+        ui_schema={
+            "C": ("float", None, "Regularization"),
+            "gamma": ("choice", ("scale", "auto"), "RBF width"),
+            "kernel": ("choice", ("rbf", "linear", "poly", "sigmoid"), "Kernel"),
+            "class_weight": ("choice", (None, "balanced"), "Imbalance"),
+            "probability": ("bool", None, "Calibrated probs"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "KNeighborsClassifier"): ModelSpec(
+        task="classification",
+        import_path="sklearn.neighbors.KNeighborsClassifier",
+        defaults=dict(n_neighbors=15, weights="distance", p=2),
+        ui_schema={
+            "n_neighbors": ("int", None, "k"),
+            "weights": ("choice", ("uniform", "distance"), "Weights"),
+            "p": ("int", None, "Minkowski p"),
+        },
+    ),
+    ("sklearn", "GaussianNB"): ModelSpec(
+        task="classification",
+        import_path="sklearn.naive_bayes.GaussianNB",
+        defaults=dict(var_smoothing=1e-9),
+        ui_schema={"var_smoothing": ("float", None, "Variance smoothing")},
+    ),
+    ("catboost", "CatBoostClassifier"): ModelSpec(
+        task="classification",
+        import_path="catboost.CatBoostClassifier",
+        defaults=dict(iterations=800, depth=6, learning_rate=0.05, l2_leaf_reg=3.0, subsample=0.8,
+                      loss_function="Logloss", random_state=42, verbose=False),
+        ui_schema={
+            "iterations": ("int", None, "Rounds"),
+            "depth": ("int", None, "Depth"),
+            "learning_rate": ("float", None, "Eta"),
+            "l2_leaf_reg": ("float", None, "L2 reg"),
+            "subsample": ("float", None, "Row subsample"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "ExtraTreesClassifier"): ModelSpec(
+        task="classification",
+        import_path="sklearn.ensemble.ExtraTreesClassifier",
+        defaults=dict(n_estimators=400, max_depth=None, min_samples_split=2, min_samples_leaf=1,
+                      class_weight=None, random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Trees"),
+            "max_depth": ("int", None, "Depth"),
+            "min_samples_split": ("int", None, "Min split"),
+            "min_samples_leaf": ("int", None, "Min leaf"),
+            "class_weight": ("choice", (None, "balanced", "balanced_subsample"), "Imbalance"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "HistGradientBoostingClassifier"): ModelSpec(
+        task="classification",
+        import_path="sklearn.ensemble.HistGradientBoostingClassifier",
+        defaults=dict(max_depth=None, learning_rate=0.1, max_bins=255, l2_regularization=0.0,
+                      early_stopping=True, random_state=42),
+        ui_schema={
+            "max_depth": ("int", None, "Depth (None=auto)"),
+            "learning_rate": ("float", None, "Eta"),
+            "max_bins": ("int", None, "Bins"),
+            "l2_regularization": ("float", None, "L2 reg"),
+            "early_stopping": ("bool", None, "Early stop"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    # -------- Regression (10) --------
+    ("sklearn", "LinearRegression"): ModelSpec(
+        task="regression",
+        import_path="sklearn.linear_model.LinearRegression",
+        defaults=dict(fit_intercept=True, positive=False),
+        ui_schema={
+            "fit_intercept": ("bool", None, "Fit intercept"),
+            "positive": ("bool", None, "Positive coef"),
+        },
+    ),
+    ("sklearn", "RandomForestRegressor"): ModelSpec(
+        task="regression",
+        import_path="sklearn.ensemble.RandomForestRegressor",
+        defaults=dict(n_estimators=400, max_depth=16, min_samples_split=2, min_samples_leaf=1,
+                      random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Trees"),
+            "max_depth": ("int", None, "Depth"),
+            "min_samples_split": ("int", None, "Min split"),
+            "min_samples_leaf": ("int", None, "Min leaf"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("xgboost", "XGBRegressor"): ModelSpec(
+        task="regression",
+        import_path="xgboost.XGBRegressor",
+        defaults=dict(n_estimators=800, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8,
+                      min_child_weight=1, reg_lambda=1.0, tree_method="hist", random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Rounds"),
+            "max_depth": ("int", None, "Depth"),
+            "learning_rate": ("float", None, "Eta"),
+            "subsample": ("float", None, "Row subsample"),
+            "colsample_bytree": ("float", None, "Column subsample"),
+            "min_child_weight": ("float", None, "Min child weight"),
+            "reg_lambda": ("float", None, "L2 reg"),
+            "tree_method": ("choice", ("hist", "auto"), "Grow method"),
+            "random_state": ("int", None, "Seed"),
+            "n_jobs": ("int", None, "Threads"),
+        },
+    ),
+    ("lightgbm", "LGBMRegressor"): ModelSpec(
+        task="regression",
+        import_path="lightgbm.LGBMRegressor",
+        defaults=dict(n_estimators=1200, num_leaves=31, max_depth=-1, learning_rate=0.05, subsample=0.8,
+                      colsample_bytree=0.8, min_child_samples=20, reg_lambda=1.0, random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Rounds"),
+            "num_leaves": ("int", None, "Max leaves"),
+            "max_depth": ("int", None, "-1 = auto"),
+            "learning_rate": ("float", None, "Eta"),
+            "subsample": ("float", None, "Row subsample"),
+            "colsample_bytree": ("float", None, "Column subsample"),
+            "min_child_samples": ("int", None, "Min child samples"),
+            "reg_lambda": ("float", None, "L2 reg"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "SVR"): ModelSpec(
+        task="regression",
+        import_path="sklearn.svm.SVR",
+        defaults=dict(C=1.0, gamma="scale", epsilon=0.1, kernel="rbf"),
+        ui_schema={
+            "C": ("float", None, "Regularization"),
+            "gamma": ("choice", ("scale", "auto"), "RBF width"),
+            "epsilon": ("float", None, "Epsilon tube"),
+            "kernel": ("choice", ("rbf", "linear", "poly", "sigmoid"), "Kernel"),
+        },
+    ),
+    ("sklearn", "KNeighborsRegressor"): ModelSpec(
+        task="regression",
+        import_path="sklearn.neighbors.KNeighborsRegressor",
+        defaults=dict(n_neighbors=15, weights="distance", p=2),
+        ui_schema={
+            "n_neighbors": ("int", None, "k"),
+            "weights": ("choice", ("uniform", "distance"), "Weights"),
+            "p": ("int", None, "Minkowski p"),
+        },
+    ),
+    ("sklearn", "ElasticNet"): ModelSpec(
+        task="regression",
+        import_path="sklearn.linear_model.ElasticNet",
+        defaults=dict(alpha=0.001, l1_ratio=0.5, max_iter=1000, random_state=42),
+        ui_schema={
+            "alpha": ("float", None, "Reg strength"),
+            "l1_ratio": ("float", None, "L1 vs L2 mix"),
+            "max_iter": ("int", None, "Max iterations"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("catboost", "CatBoostRegressor"): ModelSpec(
+        task="regression",
+        import_path="catboost.CatBoostRegressor",
+        defaults=dict(iterations=1000, depth=6, learning_rate=0.05, l2_leaf_reg=3.0, subsample=0.8,
+                      loss_function="RMSE", random_state=42, verbose=False),
+        ui_schema={
+            "iterations": ("int", None, "Rounds"),
+            "depth": ("int", None, "Depth"),
+            "learning_rate": ("float", None, "Eta"),
+            "l2_leaf_reg": ("float", None, "L2 reg"),
+            "subsample": ("float", None, "Row subsample"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "ExtraTreesRegressor"): ModelSpec(
+        task="regression",
+        import_path="sklearn.ensemble.ExtraTreesRegressor",
+        defaults=dict(n_estimators=400, max_depth=None, min_samples_split=2, min_samples_leaf=1,
+                      random_state=42, n_jobs=-1),
+        ui_schema={
+            "n_estimators": ("int", None, "Trees"),
+            "max_depth": ("int", None, "Depth"),
+            "min_samples_split": ("int", None, "Min split"),
+            "min_samples_leaf": ("int", None, "Min leaf"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+    ("sklearn", "HistGradientBoostingRegressor"): ModelSpec(
+        task="regression",
+        import_path="sklearn.ensemble.HistGradientBoostingRegressor",
+        defaults=dict(max_depth=None, learning_rate=0.1, max_bins=255, l2_regularization=0.0,
+                      early_stopping=True, random_state=42),
+        ui_schema={
+            "max_depth": ("int", None, "Depth (None=auto)"),
+            "learning_rate": ("float", None, "Eta"),
+            "max_bins": ("int", None, "Bins"),
+            "l2_regularization": ("float", None, "L2 reg"),
+            "early_stopping": ("bool", None, "Early stop"),
+            "random_state": ("int", None, "Seed"),
+        },
+    ),
+}
+# --------------- Helpers ---------------
+def list_models(task: Optional[Task] = None) -> Dict[Tuple[str, str], ModelSpec]:
+    if task:
+        return {k: v for k, v in _REGISTRY.items() if v.task == task}
+    return dict(_REGISTRY)
+def get_spec(library: str, algo: str) -> ModelSpec:
+    key = (library, algo)
+    if key not in _REGISTRY:
+        raise KeyError(f"Unknown model: {library}.{algo}")
+    return _REGISTRY[key]
+def _lazy_import(import_path: str) -> Callable[..., Any]:
+    mod_name, cls_name = import_path.rsplit(".", 1)
+    mod = __import__(mod_name, fromlist=[cls_name])
+    return getattr(mod, cls_name)
+def build_estimator(library: str, algo: str, params: Optional[Dict[str, Any]] = None):
+    spec = get_spec(library, algo)
+    Cls = _lazy_import(spec.import_path)
+    kwargs = dict(spec.defaults)
+    if params:
+        canon = {}
+        for k, v in params.items():
+            k2 = spec.aliases.get(k, k)
+            canon[k2] = v
+        kwargs.update({k: v for k, v in canon.items() if v is not None})
+    return Cls(**kwargs)
+def ui_schema_for(library: str, algo: str) -> Dict[str, Tuple[str, Optional[Tuple[Any, ...]], Optional[str]]]:
+    return get_spec(library, algo).ui_schema
+def infer_task_from_target(y) -> Task:
+    try:
+        n = int(y.nunique())  # pandas Series fast path
+    except Exception:
+        try:
+            n = len(set(y))
+        except Exception:
+            n = 10
+    return "classification" if n <= 3 else "regression"

tanml 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

Potentially problematic release.

tanml 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl