PyPI - wizata-dsapi - Versions diffs - 2.0.0.dev24__tar.gz → 2.0.0.dev25__tar.gz - Mend

wizata-dsapi 2.0.0.dev24tar.gz → 2.0.0.dev25tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{wizata_dsapi-2.0.0.dev24/wizata_dsapi.egg-info → wizata_dsapi-2.0.0.dev25}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wizata_dsapi
-Version: 2.0.0.dev24
+Version: 2.0.0.dev25
 Summary: Wizata Data Science Toolkit
 Author: Wizata S.A.
 Author-email: info@wizata.com

wizata_dsapi-2.0.0.dev25/wizata_dsapi/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .common import linear_regression, logistic_regression, isolation_forest, gradiant_boost_classifier, setpoint_optimizer, SetpointOptimizer

wizata_dsapi-2.0.0.dev25/wizata_dsapi/models/common.py ADDED Viewed

@@ -0,0 +1,272 @@
+import wizata_dsapi
+import pandas
+import numpy
+import sklearn
+import sklearn.linear_model
+import sklearn.ensemble
+import sklearn.neighbors
+import sklearn.pipeline
+import sklearn.preprocessing
+def extract_target_feat(context: wizata_dsapi.Context, single: bool = True):
+    """
+    return a list of target_feat columns names if not single value or the single value target feat name
+    raise an error if configuration mismatch
+    """
+    if "target_feat" not in context.properties:
+        raise ValueError(f"training script requires a proper target_feat")
+    target_feat = context.properties["target_feat"]
+    if isinstance(target_feat, str):
+        if single:
+            return target_feat
+        else:
+            return [target_feat]
+    elif isinstance(target_feat, list):
+        if single:
+            if len(target_feat) == 1:
+                return target_feat[0]
+            else:
+                raise ValueError(f"expecting only one target_feat but found {len(target_feat)}")
+        else:
+            return [target_feat]
+    else:
+        raise TypeError(f'target_feat must be a str or a list of str but found {target_feat.__class__.__name__}')
+def linear_regression(context: wizata_dsapi.Context):
+    """Train a linear regression model on all features to predict a single target column."""
+    df = context.dataframe
+    model_config = context.get_model_config()
+    if not model_config.has_target_feat():
+        raise ValueError(f'linear_regression requires a target feat')
+    target_feat_name = context.properties["target_feat"]
+    x = df.drop(columns=[target_feat_name])
+    y = df[target_feat_name]
+    model = sklearn.linear_model.LinearRegression()
+    model.fit(x, y)
+    context.set_model(model, features=x.columns)
+def logistic_regression(context: wizata_dsapi.Context):
+    """Train a logistic regression classifier on all features to predict a binary target column."""
+    df = context.dataframe
+    model_config = context.get_model_config()
+    if not model_config.has_target_feat():
+        raise ValueError(f'logistic_regression requires a target feat')
+    target_feat_name = context.properties["target_feat"]
+    x = df.drop(columns=[target_feat_name])
+    y = df[target_feat_name]
+    model = sklearn.linear_model.LogisticRegression()
+    model.fit(x, y.astype(int))
+    context.set_model(model, features=x.columns)
+def isolation_forest(context: wizata_dsapi.Context):
+    """Train an Isolation Forest for unsupervised anomaly detection using a sensitivity level (1-5)."""
+    model_config = context.get_model_config()
+    if model_config.has_target_feat():
+        raise ValueError(f'isolation_forest does not requires a target feat')
+    try:
+        if context.properties['sensitivity'] is None:
+            raise KeyError("sensitivity is none")
+        sensitivity = int(context.properties['sensitivity'])
+        sensitivities = [0.05, 0.15, 0.25, 0.35, 0.4]
+        contamination = sensitivities[sensitivity - 1]
+    except Exception as e:
+        raise ValueError(f'cannot extract sensitivity integer from 0 to 4 due to {e}')
+    df = context.dataframe.copy()
+    model = sklearn.ensemble.IsolationForest(contamination=contamination)
+    df['isolation_forest_predict'] = model.fit_predict(df)
+    context.set_model(model, features=df.columns)
+    return df
+def gradiant_boost_classifier(context: wizata_dsapi.Context):
+    """Train a Gradient Boosting classifier on all features to predict a target column."""
+    df = context.dataframe
+    model_config = context.get_model_config()
+    if not model_config.has_target_feat():
+        raise ValueError(f'gradiant_boost_classifier requires a target feat')
+    target_feat_name = context.properties["target_feat"]
+    x = df.drop(columns=[target_feat_name])
+    y = df[target_feat_name]
+    model = sklearn.ensemble.GradientBoostingClassifier(random_state=0).fit(x, y)
+    context.set_model(model, features=df.columns)
+class SetpointOptimizer:
+    """
+    Wraps a KNN quality forecaster (StandardScaler + KNeighborsRegressor) with a grid-search
+    setpoint recommendation method.
+    At training time, the underlying pipeline learns quality = f(telemetry + setpoints) from
+    historical data. At inference, recommend(X) keeps each row's telemetry fixed and grid-searches
+    over stored setpoint bounds (5th-95th percentile of train data) to find the combination that
+    minimizes (or maximizes) predicted quality.
+    :ivar pipeline: fitted sklearn Pipeline (StandardScaler -> KNeighborsRegressor).
+    :ivar setpoint_cols: ordered list of setpoint column names auto-detected at train time.
+    :ivar feature_cols: ordered list of all feature columns used at training (telemetry + setpoints).
+    :ivar bounds: dict mapping each setpoint column name to a (low, high) tuple.
+    :ivar direction: 'minimize' or 'maximize' the target quality.
+    :ivar grid_size: number of points per setpoint axis in the grid search.
+    """
+    def __init__(self, pipeline, setpoint_cols, feature_cols, bounds, direction, grid_size):
+        self.pipeline = pipeline
+        self.setpoint_cols = list(setpoint_cols)
+        self.feature_cols = list(feature_cols)
+        self.bounds = dict(bounds)
+        self.direction = direction
+        self.grid_size = int(grid_size)
+    def predict(self, X):
+        """Return predicted quality for each row — dual-use for validation or regular predict-mode pipelines."""
+        return self.pipeline.predict(X)
+    def recommend(self, X):
+        """
+        For each row of X, return the grid-search best setpoint combination.
+        :param X: features matrix (DataFrame or ndarray) with columns matching feature_cols.
+        :return: ndarray of shape [n_rows, n_setpoints] in the order of self.setpoint_cols.
+        """
+        if not isinstance(X, pandas.DataFrame):
+            X = pandas.DataFrame(X, columns=self.feature_cols)
+        grids = [
+            numpy.linspace(self.bounds[sp][0], self.bounds[sp][1], self.grid_size)
+            for sp in self.setpoint_cols
+        ]
+        mesh = numpy.array(numpy.meshgrid(*grids)).reshape(len(self.setpoint_cols), -1).T
+        recs = numpy.zeros((len(X), len(self.setpoint_cols)))
+        for i, (_, row) in enumerate(X.iterrows()):
+            candidates = pandas.DataFrame(
+                numpy.tile(row.values, (len(mesh), 1)),
+                columns=self.feature_cols
+            )
+            for j, sp in enumerate(self.setpoint_cols):
+                candidates[sp] = mesh[:, j]
+            preds = self.pipeline.predict(candidates)
+            if self.direction == "maximize":
+                best_idx = int(numpy.argmax(preds))
+            else:
+                best_idx = int(numpy.argmin(preds))
+            recs[i] = mesh[best_idx]
+        return recs
+    def output_names(self, suffix: str = "_recommended"):
+        """Suggest MLModelConfig.output_columns_names matching the setpoint order (e.g. for UI prefill)."""
+        return [f"{sp}{suffix}" for sp in self.setpoint_cols]
+    def get_inference_contract(self):
+        """Self-describing inference contract — the platform reads this to override MLModelConfig at runtime.
+        Returns the function to invoke (.recommend) and the output column names that will be produced, in order."""
+        return {
+            "function": "recommend",
+            "output_columns_names": self.output_names(),
+        }
+def setpoint_optimizer(context: wizata_dsapi.Context):
+    """Train a KNN-based setpoint optimizer that learns quality = f(telemetry + setpoints) and at
+    inference recommends optimal setpoint values. Setpoint columns are auto-detected via
+    BusinessType.SET_POINTS on context.datapoints; bounds are the 5th-95th percentile of train data.
+    Required MLModelConfig:
+      - train_script = 'wizata.models.setpoint_optimizer'
+      - target_feat  = '<quality column name>'
+      - function     = 'recommend'  (or 'predict' for quality forecasting only)
+      - output_columns_names = ['<sp1>_recommended', '<sp2>_recommended', ...] in the order setpoints appear.
+        The trained model exposes `.output_names()` to suggest a matching default.
+    Properties:
+      - k: KNN neighbors (default 5)
+      - grid_size: points per setpoint axis (default 10 — total cost is grid_size^n_setpoints per row)
+      - direction: 'minimize' (default) or 'maximize' the target
+    """
+    df = context.dataframe
+    model_config = context.get_model_config()
+    if not model_config.has_target_feat():
+        raise ValueError(f'setpoint_optimizer requires a target_feat (the quality column to optimize)')
+    target = context.properties["target_feat"]
+    if isinstance(target, list):
+        if len(target) != 1:
+            raise ValueError(f'setpoint_optimizer requires exactly one target_feat column')
+        target = target[0]
+    if target not in df.columns:
+        raise ValueError(f"target_feat '{target}' not found in dataframe columns")
+    datapoints = context.datapoints or {}
+    setpoint_cols = [
+        col for col, dp in datapoints.items()
+        if col in df.columns
+        and col != target
+        and dp.business_type == wizata_dsapi.BusinessType.SET_POINTS
+    ]
+    if not setpoint_cols:
+        raise ValueError(
+            "no setpoint datapoints found in context (BusinessType.SET_POINTS) — "
+            "the optimizer needs at least one setpoint column to optimize"
+        )
+    k = int(context.properties.get("k", 5))
+    grid_size = int(context.properties.get("grid_size", 10))
+    direction = context.properties.get("direction", "minimize")
+    if direction not in ("minimize", "maximize"):
+        raise ValueError(f"direction must be 'minimize' or 'maximize', got '{direction}'")
+    x = df.drop(columns=[target])
+    y = df[target]
+    pipeline = sklearn.pipeline.Pipeline([
+        ("scaler", sklearn.preprocessing.StandardScaler()),
+        ("knn", sklearn.neighbors.KNeighborsRegressor(n_neighbors=k)),
+    ])
+    pipeline.fit(x, y)
+    bounds = {
+        sp: (float(x[sp].quantile(0.05)), float(x[sp].quantile(0.95)))
+        for sp in setpoint_cols
+    }
+    optimizer = SetpointOptimizer(
+        pipeline=pipeline,
+        setpoint_cols=setpoint_cols,
+        feature_cols=list(x.columns),
+        bounds=bounds,
+        direction=direction,
+        grid_size=grid_size,
+    )
+    context.set_model(optimizer, features=x.columns)

wizata_dsapi-2.0.0.dev25/wizata_dsapi/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "2.0.0.dev25"

{wizata_dsapi-2.0.0.dev24 → wizata_dsapi-2.0.0.dev25/wizata_dsapi.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wizata_dsapi
-Version: 2.0.0.dev24
+Version: 2.0.0.dev25
 Summary: Wizata Data Science Toolkit
 Author: Wizata S.A.
 Author-email: info@wizata.com

wizata_dsapi-2.0.0.dev24/wizata_dsapi/models/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .common import linear_regression, logistic_regression, isolation_forest, gradiant_boost_classifier

wizata_dsapi-2.0.0.dev24/wizata_dsapi/models/common.py DELETED Viewed

@@ -1,116 +0,0 @@
-import wizata_dsapi
-import pandas
-import numpy
-import sklearn
-import sklearn.linear_model
-import sklearn.ensemble
-def extract_target_feat(context: wizata_dsapi.Context, single: bool = True):
-    """
-    return a list of target_feat columns names if not single value or the single value target feat name
-    raise an error if configuration mismatch
-    """
-    if "target_feat" not in context.properties:
-        raise ValueError(f"training script requires a proper target_feat")
-    target_feat = context.properties["target_feat"]
-    if isinstance(target_feat, str):
-        if single:
-            return target_feat
-        else:
-            return [target_feat]
-    elif isinstance(target_feat, list):
-        if single:
-            if len(target_feat) == 1:
-                return target_feat[0]
-            else:
-                raise ValueError(f"expecting only one target_feat but found {len(target_feat)}")
-        else:
-            return [target_feat]
-    else:
-        raise TypeError(f'target_feat must be a str or a list of str but found {target_feat.__class__.__name__}')
-def linear_regression(context: wizata_dsapi.Context):
-    """Train a linear regression model on all features to predict a single target column."""
-    df = context.dataframe
-    model_config = context.get_model_config()
-    if not model_config.has_target_feat():
-        raise ValueError(f'linear_regression requires a target feat')
-    target_feat_name = context.properties["target_feat"]
-    x = df.drop(columns=[target_feat_name])
-    y = df[target_feat_name]
-    model = sklearn.linear_model.LinearRegression()
-    model.fit(x, y)
-    context.set_model(model, features=x.columns)
-def logistic_regression(context: wizata_dsapi.Context):
-    """Train a logistic regression classifier on all features to predict a binary target column."""
-    df = context.dataframe
-    model_config = context.get_model_config()
-    if not model_config.has_target_feat():
-        raise ValueError(f'logistic_regression requires a target feat')
-    target_feat_name = context.properties["target_feat"]
-    x = df.drop(columns=[target_feat_name])
-    y = df[target_feat_name]
-    model = sklearn.linear_model.LogisticRegression()
-    model.fit(x, y.astype(int))
-    context.set_model(model, features=x.columns)
-def isolation_forest(context: wizata_dsapi.Context):
-    """Train an Isolation Forest for unsupervised anomaly detection using a sensitivity level (1-5)."""
-    model_config = context.get_model_config()
-    if model_config.has_target_feat():
-        raise ValueError(f'isolation_forest does not requires a target feat')
-    try:
-        if context.properties['sensitivity'] is None:
-            raise KeyError("sensitivity is none")
-        sensitivity = int(context.properties['sensitivity'])
-        sensitivities = [0.05, 0.15, 0.25, 0.35, 0.4]
-        contamination = sensitivities[sensitivity - 1]
-    except Exception as e:
-        raise ValueError(f'cannot extract sensitivity integer from 0 to 4 due to {e}')
-    df = context.dataframe.copy()
-    model = sklearn.ensemble.IsolationForest(contamination=contamination)
-    df['isolation_forest_predict'] = model.fit_predict(df)
-    context.set_model(model, features=df.columns)
-    return df
-def gradiant_boost_classifier(context: wizata_dsapi.Context):
-    """Train a Gradient Boosting classifier on all features to predict a target column."""
-    df = context.dataframe
-    model_config = context.get_model_config()
-    if not model_config.has_target_feat():
-        raise ValueError(f'gradiant_boost_classifier requires a target feat')
-    target_feat_name = context.properties["target_feat"]
-    x = df.drop(columns=[target_feat_name])
-    y = df[target_feat_name]
-    model = sklearn.ensemble.GradientBoostingClassifier(random_state=0).fit(x, y)
-    context.set_model(model, features=df.columns)