wizata-dsapi 2.0.0.dev23__tar.gz → 2.0.0.dev25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {wizata_dsapi-2.0.0.dev23/wizata_dsapi.egg-info → wizata_dsapi-2.0.0.dev25}/PKG-INFO +1 -1
  2. wizata_dsapi-2.0.0.dev25/wizata_dsapi/models/__init__.py +1 -0
  3. wizata_dsapi-2.0.0.dev25/wizata_dsapi/models/common.py +272 -0
  4. wizata_dsapi-2.0.0.dev25/wizata_dsapi/scripts/__init__.py +1 -0
  5. wizata_dsapi-2.0.0.dev25/wizata_dsapi/scripts/common.py +372 -0
  6. wizata_dsapi-2.0.0.dev25/wizata_dsapi/version.py +1 -0
  7. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25/wizata_dsapi.egg-info}/PKG-INFO +1 -1
  8. wizata_dsapi-2.0.0.dev23/wizata_dsapi/models/__init__.py +0 -1
  9. wizata_dsapi-2.0.0.dev23/wizata_dsapi/models/common.py +0 -116
  10. wizata_dsapi-2.0.0.dev23/wizata_dsapi/scripts/__init__.py +0 -1
  11. wizata_dsapi-2.0.0.dev23/wizata_dsapi/scripts/common.py +0 -122
  12. wizata_dsapi-2.0.0.dev23/wizata_dsapi/version.py +0 -1
  13. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/LICENSE.txt +0 -0
  14. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/README.rst +0 -0
  15. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/setup.cfg +0 -0
  16. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/setup.py +0 -0
  17. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/__init__.py +0 -0
  18. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/api_config.py +0 -0
  19. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/api_dto.py +0 -0
  20. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/api_interface.py +0 -0
  21. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/bucket.py +0 -0
  22. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/business_label.py +0 -0
  23. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/context.py +0 -0
  24. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/dashboard.py +0 -0
  25. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/dataframe_toolkit.py +0 -0
  26. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/datapoint.py +0 -0
  27. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/datastore.py +0 -0
  28. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/ds_dataframe.py +0 -0
  29. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/dsapi_json_encoder.py +0 -0
  30. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/edge_config.py +0 -0
  31. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/edge_device.py +0 -0
  32. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/edge_module.py +0 -0
  33. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/evaluation.py +0 -0
  34. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/execution.py +0 -0
  35. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/execution_log.py +0 -0
  36. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/experiment.py +0 -0
  37. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/graylog_log.py +0 -0
  38. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/group_system.py +0 -0
  39. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/ilogger.py +0 -0
  40. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/insight.py +0 -0
  41. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/mlmodel.py +0 -0
  42. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/mobile_asset.py +0 -0
  43. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/model_toolkit.py +0 -0
  44. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/notification.py +0 -0
  45. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/paged_query_result.py +0 -0
  46. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/pipeline.py +0 -0
  47. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/pipeline_image.py +0 -0
  48. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/plot.py +0 -0
  49. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/plots/__init__.py +0 -0
  50. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/plots/common.py +0 -0
  51. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/plots/theme.py +0 -0
  52. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/request.py +0 -0
  53. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/script.py +0 -0
  54. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/search.py +0 -0
  55. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/solution_component.py +0 -0
  56. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/streamlit_utils.py +0 -0
  57. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/template.py +0 -0
  58. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/template_config.py +0 -0
  59. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/trigger.py +0 -0
  60. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/twin.py +0 -0
  61. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/twinregistration.py +0 -0
  62. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/user.py +0 -0
  63. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/wizard_function.py +0 -0
  64. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/wizard_request.py +0 -0
  65. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/wizata_dsapi_client.py +0 -0
  66. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi/words.py +0 -0
  67. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi.egg-info/SOURCES.txt +0 -0
  68. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi.egg-info/dependency_links.txt +0 -0
  69. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi.egg-info/requires.txt +0 -0
  70. {wizata_dsapi-2.0.0.dev23 → wizata_dsapi-2.0.0.dev25}/wizata_dsapi.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wizata_dsapi
3
- Version: 2.0.0.dev23
3
+ Version: 2.0.0.dev25
4
4
  Summary: Wizata Data Science Toolkit
5
5
  Author: Wizata S.A.
6
6
  Author-email: info@wizata.com
@@ -0,0 +1 @@
1
+ from .common import linear_regression, logistic_regression, isolation_forest, gradiant_boost_classifier, setpoint_optimizer, SetpointOptimizer
@@ -0,0 +1,272 @@
1
+ import wizata_dsapi
2
+
3
+ import pandas
4
+ import numpy
5
+
6
+ import sklearn
7
+ import sklearn.linear_model
8
+ import sklearn.ensemble
9
+ import sklearn.neighbors
10
+ import sklearn.pipeline
11
+ import sklearn.preprocessing
12
+
13
+
14
+ def extract_target_feat(context: wizata_dsapi.Context, single: bool = True):
15
+ """
16
+ return a list of target_feat columns names if not single value or the single value target feat name
17
+ raise an error if configuration mismatch
18
+ """
19
+
20
+ if "target_feat" not in context.properties:
21
+ raise ValueError(f"training script requires a proper target_feat")
22
+
23
+ target_feat = context.properties["target_feat"]
24
+ if isinstance(target_feat, str):
25
+ if single:
26
+ return target_feat
27
+ else:
28
+ return [target_feat]
29
+ elif isinstance(target_feat, list):
30
+ if single:
31
+ if len(target_feat) == 1:
32
+ return target_feat[0]
33
+ else:
34
+ raise ValueError(f"expecting only one target_feat but found {len(target_feat)}")
35
+ else:
36
+ return [target_feat]
37
+ else:
38
+ raise TypeError(f'target_feat must be a str or a list of str but found {target_feat.__class__.__name__}')
39
+
40
+
41
+ def linear_regression(context: wizata_dsapi.Context):
42
+ """Train a linear regression model on all features to predict a single target column."""
43
+ df = context.dataframe
44
+
45
+ model_config = context.get_model_config()
46
+ if not model_config.has_target_feat():
47
+ raise ValueError(f'linear_regression requires a target feat')
48
+ target_feat_name = context.properties["target_feat"]
49
+
50
+ x = df.drop(columns=[target_feat_name])
51
+ y = df[target_feat_name]
52
+
53
+ model = sklearn.linear_model.LinearRegression()
54
+ model.fit(x, y)
55
+
56
+ context.set_model(model, features=x.columns)
57
+
58
+
59
+ def logistic_regression(context: wizata_dsapi.Context):
60
+ """Train a logistic regression classifier on all features to predict a binary target column."""
61
+ df = context.dataframe
62
+
63
+ model_config = context.get_model_config()
64
+ if not model_config.has_target_feat():
65
+ raise ValueError(f'logistic_regression requires a target feat')
66
+ target_feat_name = context.properties["target_feat"]
67
+
68
+ x = df.drop(columns=[target_feat_name])
69
+ y = df[target_feat_name]
70
+
71
+ model = sklearn.linear_model.LogisticRegression()
72
+ model.fit(x, y.astype(int))
73
+
74
+ context.set_model(model, features=x.columns)
75
+
76
+
77
+ def isolation_forest(context: wizata_dsapi.Context):
78
+ """Train an Isolation Forest for unsupervised anomaly detection using a sensitivity level (1-5)."""
79
+
80
+ model_config = context.get_model_config()
81
+ if model_config.has_target_feat():
82
+ raise ValueError(f'isolation_forest does not requires a target feat')
83
+
84
+ try:
85
+ if context.properties['sensitivity'] is None:
86
+ raise KeyError("sensitivity is none")
87
+ sensitivity = int(context.properties['sensitivity'])
88
+ sensitivities = [0.05, 0.15, 0.25, 0.35, 0.4]
89
+ contamination = sensitivities[sensitivity - 1]
90
+ except Exception as e:
91
+ raise ValueError(f'cannot extract sensitivity integer from 0 to 4 due to {e}')
92
+
93
+ df = context.dataframe.copy()
94
+ model = sklearn.ensemble.IsolationForest(contamination=contamination)
95
+ df['isolation_forest_predict'] = model.fit_predict(df)
96
+ context.set_model(model, features=df.columns)
97
+ return df
98
+
99
+
100
+ def gradiant_boost_classifier(context: wizata_dsapi.Context):
101
+ """Train a Gradient Boosting classifier on all features to predict a target column."""
102
+ df = context.dataframe
103
+
104
+ model_config = context.get_model_config()
105
+ if not model_config.has_target_feat():
106
+ raise ValueError(f'gradiant_boost_classifier requires a target feat')
107
+ target_feat_name = context.properties["target_feat"]
108
+
109
+ x = df.drop(columns=[target_feat_name])
110
+ y = df[target_feat_name]
111
+
112
+ model = sklearn.ensemble.GradientBoostingClassifier(random_state=0).fit(x, y)
113
+ context.set_model(model, features=df.columns)
114
+
115
+
116
+ class SetpointOptimizer:
117
+ """
118
+ Wraps a KNN quality forecaster (StandardScaler + KNeighborsRegressor) with a grid-search
119
+ setpoint recommendation method.
120
+
121
+ At training time, the underlying pipeline learns quality = f(telemetry + setpoints) from
122
+ historical data. At inference, recommend(X) keeps each row's telemetry fixed and grid-searches
123
+ over stored setpoint bounds (5th-95th percentile of train data) to find the combination that
124
+ minimizes (or maximizes) predicted quality.
125
+
126
+ :ivar pipeline: fitted sklearn Pipeline (StandardScaler -> KNeighborsRegressor).
127
+ :ivar setpoint_cols: ordered list of setpoint column names auto-detected at train time.
128
+ :ivar feature_cols: ordered list of all feature columns used at training (telemetry + setpoints).
129
+ :ivar bounds: dict mapping each setpoint column name to a (low, high) tuple.
130
+ :ivar direction: 'minimize' or 'maximize' the target quality.
131
+ :ivar grid_size: number of points per setpoint axis in the grid search.
132
+ """
133
+
134
+ def __init__(self, pipeline, setpoint_cols, feature_cols, bounds, direction, grid_size):
135
+ self.pipeline = pipeline
136
+ self.setpoint_cols = list(setpoint_cols)
137
+ self.feature_cols = list(feature_cols)
138
+ self.bounds = dict(bounds)
139
+ self.direction = direction
140
+ self.grid_size = int(grid_size)
141
+
142
+ def predict(self, X):
143
+ """Return predicted quality for each row — dual-use for validation or regular predict-mode pipelines."""
144
+ return self.pipeline.predict(X)
145
+
146
+ def recommend(self, X):
147
+ """
148
+ For each row of X, return the grid-search best setpoint combination.
149
+ :param X: features matrix (DataFrame or ndarray) with columns matching feature_cols.
150
+ :return: ndarray of shape [n_rows, n_setpoints] in the order of self.setpoint_cols.
151
+ """
152
+ if not isinstance(X, pandas.DataFrame):
153
+ X = pandas.DataFrame(X, columns=self.feature_cols)
154
+
155
+ grids = [
156
+ numpy.linspace(self.bounds[sp][0], self.bounds[sp][1], self.grid_size)
157
+ for sp in self.setpoint_cols
158
+ ]
159
+ mesh = numpy.array(numpy.meshgrid(*grids)).reshape(len(self.setpoint_cols), -1).T
160
+
161
+ recs = numpy.zeros((len(X), len(self.setpoint_cols)))
162
+ for i, (_, row) in enumerate(X.iterrows()):
163
+ candidates = pandas.DataFrame(
164
+ numpy.tile(row.values, (len(mesh), 1)),
165
+ columns=self.feature_cols
166
+ )
167
+ for j, sp in enumerate(self.setpoint_cols):
168
+ candidates[sp] = mesh[:, j]
169
+ preds = self.pipeline.predict(candidates)
170
+ if self.direction == "maximize":
171
+ best_idx = int(numpy.argmax(preds))
172
+ else:
173
+ best_idx = int(numpy.argmin(preds))
174
+ recs[i] = mesh[best_idx]
175
+
176
+ return recs
177
+
178
+ def output_names(self, suffix: str = "_recommended"):
179
+ """Suggest MLModelConfig.output_columns_names matching the setpoint order (e.g. for UI prefill)."""
180
+ return [f"{sp}{suffix}" for sp in self.setpoint_cols]
181
+
182
+ def get_inference_contract(self):
183
+ """Self-describing inference contract — the platform reads this to override MLModelConfig at runtime.
184
+ Returns the function to invoke (.recommend) and the output column names that will be produced, in order."""
185
+ return {
186
+ "function": "recommend",
187
+ "output_columns_names": self.output_names(),
188
+ }
189
+
190
+
191
+ def setpoint_optimizer(context: wizata_dsapi.Context):
192
+ """Train a KNN-based setpoint optimizer that learns quality = f(telemetry + setpoints) and at
193
+ inference recommends optimal setpoint values. Setpoint columns are auto-detected via
194
+ BusinessType.SET_POINTS on context.datapoints; bounds are the 5th-95th percentile of train data.
195
+
196
+ Required MLModelConfig:
197
+ - train_script = 'wizata.models.setpoint_optimizer'
198
+ - target_feat = '<quality column name>'
199
+ - function = 'recommend' (or 'predict' for quality forecasting only)
200
+ - output_columns_names = ['<sp1>_recommended', '<sp2>_recommended', ...] in the order setpoints appear.
201
+ The trained model exposes `.output_names()` to suggest a matching default.
202
+
203
+ Properties:
204
+ - k: KNN neighbors (default 5)
205
+ - grid_size: points per setpoint axis (default 10 — total cost is grid_size^n_setpoints per row)
206
+ - direction: 'minimize' (default) or 'maximize' the target
207
+ """
208
+ df = context.dataframe
209
+
210
+ model_config = context.get_model_config()
211
+ if not model_config.has_target_feat():
212
+ raise ValueError(f'setpoint_optimizer requires a target_feat (the quality column to optimize)')
213
+
214
+ target = context.properties["target_feat"]
215
+ if isinstance(target, list):
216
+ if len(target) != 1:
217
+ raise ValueError(f'setpoint_optimizer requires exactly one target_feat column')
218
+ target = target[0]
219
+
220
+ if target not in df.columns:
221
+ raise ValueError(f"target_feat '{target}' not found in dataframe columns")
222
+
223
+ datapoints = context.datapoints or {}
224
+ setpoint_cols = [
225
+ col for col, dp in datapoints.items()
226
+ if col in df.columns
227
+ and col != target
228
+ and dp.business_type == wizata_dsapi.BusinessType.SET_POINTS
229
+ ]
230
+
231
+ if not setpoint_cols:
232
+ raise ValueError(
233
+ "no setpoint datapoints found in context (BusinessType.SET_POINTS) — "
234
+ "the optimizer needs at least one setpoint column to optimize"
235
+ )
236
+
237
+ k = int(context.properties.get("k", 5))
238
+ grid_size = int(context.properties.get("grid_size", 10))
239
+ direction = context.properties.get("direction", "minimize")
240
+ if direction not in ("minimize", "maximize"):
241
+ raise ValueError(f"direction must be 'minimize' or 'maximize', got '{direction}'")
242
+
243
+ x = df.drop(columns=[target])
244
+ y = df[target]
245
+
246
+ pipeline = sklearn.pipeline.Pipeline([
247
+ ("scaler", sklearn.preprocessing.StandardScaler()),
248
+ ("knn", sklearn.neighbors.KNeighborsRegressor(n_neighbors=k)),
249
+ ])
250
+ pipeline.fit(x, y)
251
+
252
+ bounds = {
253
+ sp: (float(x[sp].quantile(0.05)), float(x[sp].quantile(0.95)))
254
+ for sp in setpoint_cols
255
+ }
256
+
257
+ optimizer = SetpointOptimizer(
258
+ pipeline=pipeline,
259
+ setpoint_cols=setpoint_cols,
260
+ feature_cols=list(x.columns),
261
+ bounds=bounds,
262
+ direction=direction,
263
+ grid_size=grid_size,
264
+ )
265
+
266
+ context.set_model(optimizer, features=x.columns)
267
+
268
+
269
+
270
+
271
+
272
+
@@ -0,0 +1 @@
1
+ from .common import filter_df, fillna, clustering, merge, normalize, interpolate, remove_outliers, resample, rolling, diff, lag, clip, steady_state_filter, pca, setpoint_deviation, formula, target_feat_to_binary
@@ -0,0 +1,372 @@
1
+ import re
2
+
3
+ import wizata_dsapi
4
+
5
+ import pandas
6
+ import numpy
7
+
8
+ import sklearn
9
+ import sklearn.cluster
10
+ import sklearn.metrics
11
+ import sklearn.ensemble
12
+ import sklearn.preprocessing
13
+ import sklearn.decomposition
14
+
15
+
16
+ _FORMULA_SAFE_FUNCTIONS = {
17
+ "abs": numpy.abs,
18
+ "sqrt": numpy.sqrt,
19
+ "log": numpy.log,
20
+ "log10": numpy.log10,
21
+ "exp": numpy.exp,
22
+ "clip": numpy.clip,
23
+ "round": numpy.round,
24
+ "min": numpy.minimum,
25
+ "max": numpy.maximum,
26
+ }
27
+
28
+
29
+ def filter_df(context: wizata_dsapi.Context):
30
+ """Filter dataframe rows using pandas query strings from the 'filters' property list."""
31
+
32
+ if "filters" not in context.properties or not isinstance(context.properties['filters'], list):
33
+ raise ValueError(f'there is no list *filters* in properties - please set them on context or config')
34
+
35
+ df = context.dataframe.copy()
36
+
37
+ filters = context.properties['filters']
38
+ for filter_row in filters:
39
+ try:
40
+ df = df.query(filter_row)
41
+ except pandas.errors.ParserError as e:
42
+ raise ValueError(f"error parsing filter string '{filter_row}': {e}")
43
+
44
+ return df
45
+
46
+
47
+ def clustering(context: wizata_dsapi.Context):
48
+ """K-means clustering with automatic cluster count selection via silhouette score."""
49
+ df = context.dataframe.copy()
50
+ scaler = sklearn.preprocessing.StandardScaler()
51
+ df_clustering_scaler = scaler.fit_transform(df)
52
+
53
+ range_n_clusters = list(range(2, min(10, df_clustering_scaler.shape[0])))
54
+ silhouette_avg = []
55
+ for num_clusters in range_n_clusters:
56
+ kmeans = sklearn.cluster.KMeans(n_clusters=num_clusters)
57
+ kmeans.fit(df_clustering_scaler)
58
+ cluster_labels = kmeans.labels_
59
+ unique, counts = numpy.unique(cluster_labels, return_counts=True)
60
+
61
+ if len(unique) >= 2:
62
+ silhouette_avg.append(sklearn.metrics.silhouette_score(df_clustering_scaler, cluster_labels))
63
+ else:
64
+ silhouette_avg.append(numpy.nan)
65
+
66
+ if numpy.isnan(silhouette_avg).all():
67
+ df['cluster_labels'] = 0
68
+ else:
69
+ best_nb_clusters = silhouette_avg.index(max(silhouette_avg)) + 2
70
+ kmeans = sklearn.cluster.KMeans(n_clusters=best_nb_clusters)
71
+ kmeans.fit(df_clustering_scaler)
72
+ cluster_labels = kmeans.labels_
73
+ df['cluster_labels'] = cluster_labels
74
+ df['cluster_labels'] = df['cluster_labels'].apply(lambda x: int(x + 1))
75
+
76
+ return df
77
+
78
+
79
+ def merge(context: wizata_dsapi.Context):
80
+ """Merge multiple dataframes by index using outer join (configurable via 'how' property)."""
81
+ dataframes = context.current_dataframes()
82
+ if len(dataframes) <= 1:
83
+ raise ValueError(f'there is not enough dataframes to concat')
84
+
85
+ how = "outer"
86
+ if "how" in context.properties:
87
+ how = context.properties["how"]
88
+
89
+ df = None
90
+ for key in dataframes:
91
+ if df is None:
92
+ df = dataframes[key]
93
+ else:
94
+ df = df.merge(dataframes[key], how=how, left_index=True, right_index=True)
95
+ return df
96
+
97
+
98
+ def fillna(context: wizata_dsapi.Context):
99
+ """Fill missing values per column using the 'fillna' property dict mapping column names to fill values."""
100
+ df = context.dataframe
101
+
102
+ if "fillna" not in context.properties:
103
+ raise KeyError(f'please set a property dict fillna')
104
+
105
+ for key in context.properties["fillna"]:
106
+ df[key] = df[key].fillna(value=context.properties["fillna"][key])
107
+
108
+ return df
109
+
110
+
111
+ def normalize(context: wizata_dsapi.Context):
112
+ """Normalize all numeric columns using 'minmax' (default) or 'zscore' scaling (configurable via 'method' property)."""
113
+ df = context.dataframe.copy()
114
+
115
+ method = context.properties.get("method", "minmax")
116
+ columns = df.select_dtypes(include="number").columns.tolist()
117
+
118
+ if not columns:
119
+ raise ValueError(f'no numeric columns to normalize')
120
+
121
+ if method == "minmax":
122
+ scaler = sklearn.preprocessing.MinMaxScaler()
123
+ elif method == "zscore":
124
+ scaler = sklearn.preprocessing.StandardScaler()
125
+ else:
126
+ raise ValueError(f"unknown normalize method '{method}', use 'minmax' or 'zscore'")
127
+
128
+ df[columns] = scaler.fit_transform(df[columns])
129
+ return df
130
+
131
+
132
+ def interpolate(context: wizata_dsapi.Context):
133
+ """Interpolate missing values in numeric columns. The 'method' property selects the pandas interpolation method: 'time' (default, respects timestamp spacing), 'linear', 'nearest', 'pad', 'polynomial', or 'spline'."""
134
+ df = context.dataframe.copy()
135
+
136
+ method = context.properties.get("method", "time")
137
+
138
+ return df.interpolate(method=method)
139
+
140
+
141
+ def remove_outliers(context: wizata_dsapi.Context):
142
+ """Drop rows containing outliers in any numeric column. The 'method' property selects 'iqr' (default, Tukey 1.5*IQR rule) or 'zscore' (drops rows beyond ±3 sigma)."""
143
+ df = context.dataframe.copy()
144
+
145
+ method = context.properties.get("method", "iqr")
146
+ columns = df.select_dtypes(include="number").columns.tolist()
147
+
148
+ if not columns:
149
+ return df
150
+
151
+ if method == "iqr":
152
+ q1 = df[columns].quantile(0.25)
153
+ q3 = df[columns].quantile(0.75)
154
+ iqr = q3 - q1
155
+ lower = q1 - 1.5 * iqr
156
+ upper = q3 + 1.5 * iqr
157
+ mask = ((df[columns] >= lower) & (df[columns] <= upper)).all(axis=1)
158
+ elif method == "zscore":
159
+ mean = df[columns].mean()
160
+ std = df[columns].std()
161
+ z = (df[columns] - mean).abs().divide(std)
162
+ mask = (z.fillna(0) <= 3).all(axis=1)
163
+ else:
164
+ raise ValueError(f"unknown remove_outliers method '{method}', use 'iqr' or 'zscore'")
165
+
166
+ return df[mask]
167
+
168
+
169
+ def resample(context: wizata_dsapi.Context):
170
+ """Resample the dataframe to a new time frequency. Property 'freq' is required (pandas offset alias, e.g. '1min', '5min', '1H', '1D'); 'agg' selects the aggregation ('mean' default, 'sum', 'min', 'max', 'first', 'last', 'median')."""
171
+ df = context.dataframe
172
+
173
+ if "freq" not in context.properties:
174
+ raise KeyError(f"please set a 'freq' property (pandas offset alias, e.g. '1min', '1H')")
175
+
176
+ freq = context.properties["freq"]
177
+ agg = context.properties.get("agg", "mean")
178
+
179
+ return df.resample(freq).agg(agg)
180
+
181
+
182
+ def rolling(context: wizata_dsapi.Context):
183
+ """Apply a rolling-window aggregation over all numeric columns. Property 'window' is required (integer number of rows); 'agg' selects the aggregation ('mean' default, 'sum', 'std', 'min', 'max', 'median')."""
184
+ df = context.dataframe
185
+
186
+ if "window" not in context.properties:
187
+ raise KeyError(f"please set a 'window' property (integer number of rows)")
188
+
189
+ window = context.properties["window"]
190
+ agg = context.properties.get("agg", "mean")
191
+
192
+ return df.rolling(window=window).agg(agg)
193
+
194
+
195
+ def diff(context: wizata_dsapi.Context):
196
+ """Compute discrete differences (rate of change) across all numeric columns. Property 'periods' (default 1) is the number of rows to shift before subtracting — use 1 for first derivative, higher for longer horizons."""
197
+ df = context.dataframe
198
+
199
+ periods = context.properties.get("periods", 1)
200
+
201
+ return df.diff(periods=periods)
202
+
203
+
204
+ def lag(context: wizata_dsapi.Context):
205
+ """Add lagged versions of all numeric columns as new '<col>_lag<N>' columns, preserving originals. Property 'periods' is required (integer number of rows to shift back)."""
206
+ df = context.dataframe
207
+
208
+ if "periods" not in context.properties:
209
+ raise KeyError(f"please set a 'periods' property (integer number of rows to lag)")
210
+
211
+ periods = context.properties["periods"]
212
+ columns = df.select_dtypes(include="number").columns.tolist()
213
+
214
+ for col in columns:
215
+ df[f"{col}_lag{periods}"] = df[col].shift(periods)
216
+
217
+ return df
218
+
219
+
220
+ def clip(context: wizata_dsapi.Context):
221
+ """Clip all numeric columns to a physical range. Properties 'min' and/or 'max' (floats) bound the values; at least one of the two must be provided."""
222
+ df = context.dataframe
223
+
224
+ lower = context.properties.get("min")
225
+ upper = context.properties.get("max")
226
+
227
+ if lower is None and upper is None:
228
+ raise KeyError(f"please set at least one of 'min' or 'max' properties")
229
+
230
+ columns = df.select_dtypes(include="number").columns.tolist()
231
+ df[columns] = df[columns].clip(lower=lower, upper=upper)
232
+
233
+ return df
234
+
235
+
236
+ def steady_state_filter(context: wizata_dsapi.Context):
237
+ """Keep only rows where all numeric columns are in steady state (rolling std over 'window' rows stays below 'tolerance'). Drops transients and start-up periods — a standard preprocessing step before process modeling."""
238
+ df = context.dataframe
239
+
240
+ if "window" not in context.properties:
241
+ raise KeyError(f"please set a 'window' property (integer number of rows)")
242
+ if "tolerance" not in context.properties:
243
+ raise KeyError(f"please set a 'tolerance' property (max allowed rolling std)")
244
+
245
+ window = context.properties["window"]
246
+ tolerance = context.properties["tolerance"]
247
+
248
+ columns = df.select_dtypes(include="number").columns.tolist()
249
+ if not columns:
250
+ return df
251
+
252
+ rolling_std = df[columns].rolling(window=window).std()
253
+ mask = (rolling_std <= tolerance).all(axis=1)
254
+
255
+ return df[mask]
256
+
257
+
258
+ def pca(context: wizata_dsapi.Context):
259
+ """Reduce numeric columns to principal components, replacing them with 'PC1', 'PC2', ... Property 'n_components' is required — an integer (number of components) or a float in (0, 1] (minimum explained variance ratio). NaN values must be handled upstream (e.g. with interpolate or fillna)."""
260
+ df = context.dataframe
261
+
262
+ if "n_components" not in context.properties:
263
+ raise KeyError(f"please set an 'n_components' property (int or float in (0,1])")
264
+
265
+ n_components = context.properties["n_components"]
266
+ columns = df.select_dtypes(include="number").columns.tolist()
267
+
268
+ if not columns:
269
+ raise ValueError(f"no numeric columns to reduce")
270
+
271
+ if df[columns].isna().any().any():
272
+ raise ValueError(f"PCA cannot handle NaN values — run interpolate or fillna upstream")
273
+
274
+ model = sklearn.decomposition.PCA(n_components=n_components)
275
+ transformed = model.fit_transform(df[columns])
276
+
277
+ pc_cols = [f"PC{i + 1}" for i in range(transformed.shape[1])]
278
+ return pandas.DataFrame(transformed, index=df.index, columns=pc_cols)
279
+
280
+
281
+ def setpoint_deviation(context: wizata_dsapi.Context):
282
+ """Add '<measurement>_deviation' columns computed as (measurement - setpoint) for each setpoint datapoint in the dataframe. Setpoints are auto-detected via BusinessType.SET_POINTS and paired with a non-setpoint datapoint sharing the same category_id and twin_id. Ambiguous or unpaired setpoints are skipped."""
283
+ df = context.dataframe
284
+ datapoints = context.datapoints or {}
285
+
286
+ setpoints = {
287
+ col: dp for col, dp in datapoints.items()
288
+ if col in df.columns and dp.business_type == wizata_dsapi.BusinessType.SET_POINTS
289
+ }
290
+
291
+ if not setpoints:
292
+ raise ValueError(f"no setpoint datapoints found in context (BusinessType.SET_POINTS)")
293
+
294
+ paired = 0
295
+ for sp_col, sp_dp in setpoints.items():
296
+ candidates = [
297
+ col for col, dp in datapoints.items()
298
+ if col in df.columns
299
+ and col != sp_col
300
+ and dp.business_type != wizata_dsapi.BusinessType.SET_POINTS
301
+ and dp.category_id is not None
302
+ and dp.category_id == sp_dp.category_id
303
+ and dp.twin_id == sp_dp.twin_id
304
+ ]
305
+ if len(candidates) == 1:
306
+ meas_col = candidates[0]
307
+ df[f"{meas_col}_deviation"] = df[meas_col] - df[sp_col]
308
+ paired += 1
309
+
310
+ if paired == 0:
311
+ raise ValueError(f"no setpoint/measurement pairs could be resolved — check that paired datapoints share category_id and twin_id")
312
+
313
+ return df
314
+
315
+
316
+ def formula(context: wizata_dsapi.Context):
317
+ """Add a new column computed from a user-defined math expression over existing columns. Property 'expression' is required — intuitive math syntax referencing column names (e.g. 'temp_1 + temp_2', '(p_in - p_out) / p_in * 100', 'sqrt(vibration_x**2 + vibration_y**2)', 'clip(temperature, 0, 500)', 'log(power + 1)'). Supports arithmetic operators (+, -, *, /, %, **) and these functions: abs, sqrt, log, log10, exp, clip, round, min, max. Column names with spaces must be wrapped in backticks (e.g. '`motor temp` * 2'). Property 'result' (default 'result') names the output column."""
318
+ df = context.dataframe
319
+
320
+ if "expression" not in context.properties:
321
+ raise KeyError(f"please set an 'expression' property (e.g. 'col_a + col_b')")
322
+
323
+ expression = context.properties["expression"]
324
+ result = context.properties.get("result", "result")
325
+
326
+ # Whitelist validation: strip everything we allow, flag any leftover token.
327
+ sanitized = expression
328
+ for col in sorted(df.columns, key=len, reverse=True):
329
+ sanitized = sanitized.replace(f"`{col}`", "").replace(col, "")
330
+ for fn in _FORMULA_SAFE_FUNCTIONS:
331
+ sanitized = sanitized.replace(fn, "")
332
+ sanitized = re.sub(r"[\d\.\+\-\*/\%\(\)\s,\^]", "", sanitized)
333
+ if sanitized.strip():
334
+ raise ValueError(
335
+ f"expression contains disallowed tokens: '{sanitized.strip()}' — "
336
+ f"only column names, numbers, arithmetic operators, and these functions are allowed: "
337
+ f"{list(_FORMULA_SAFE_FUNCTIONS.keys())}"
338
+ )
339
+
340
+ df[result] = df.eval(expression, local_dict=_FORMULA_SAFE_FUNCTIONS, engine="python")
341
+ return df
342
+
343
+
344
+ def target_feat_to_binary(context: wizata_dsapi.Context):
345
+ """Convert a target feature column to binary (0/1) using a threshold operator (lt, lte, gt, gte)."""
346
+ df = context.dataframe
347
+
348
+ if "target_feat" not in context.properties:
349
+ raise KeyError(f'please set a target feature to transform to binary class')
350
+
351
+ target_feat = context.properties["target_feat"]["sensor"]
352
+ operator = context.properties["target_feat"]["operator"]
353
+ threshold = context.properties["target_feat"]["threshold"]
354
+
355
+ if operator == 'lt':
356
+ df[target_feat] = numpy.where(df[target_feat] < threshold, 1, 0)
357
+ elif operator == 'lte':
358
+ df[target_feat] = numpy.where(df[target_feat] <= threshold, 1, 0)
359
+ elif operator == 'gt':
360
+ df[target_feat] = numpy.where(df[target_feat] > threshold, 1, 0)
361
+ elif operator == 'gte':
362
+ df[target_feat] = numpy.where(df[target_feat] >= threshold, 1, 0)
363
+ else:
364
+ raise KeyError(f'operator type for binarisation not know')
365
+
366
+ # Check if at least 1 value of each class
367
+ if df[target_feat].nunique() == 1:
368
+ raise KeyError(f'classification model requires 2 classes, only one was detected')
369
+ elif df[target_feat].nunique() > 2:
370
+ raise KeyError(f'classification model requires 2 classes, more than 2 were detected')
371
+
372
+ return df
@@ -0,0 +1 @@
1
+ __version__ = "2.0.0.dev25"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wizata_dsapi
3
- Version: 2.0.0.dev23
3
+ Version: 2.0.0.dev25
4
4
  Summary: Wizata Data Science Toolkit
5
5
  Author: Wizata S.A.
6
6
  Author-email: info@wizata.com
@@ -1 +0,0 @@
1
- from .common import linear_regression, logistic_regression, isolation_forest, gradiant_boost_classifier
@@ -1,116 +0,0 @@
1
- import wizata_dsapi
2
-
3
- import pandas
4
- import numpy
5
-
6
- import sklearn
7
- import sklearn.linear_model
8
- import sklearn.ensemble
9
-
10
-
11
- def extract_target_feat(context: wizata_dsapi.Context, single: bool = True):
12
- """
13
- return a list of target_feat columns names if not single value or the single value target feat name
14
- raise an error if configuration mismatch
15
- """
16
-
17
- if "target_feat" not in context.properties:
18
- raise ValueError(f"training script requires a proper target_feat")
19
-
20
- target_feat = context.properties["target_feat"]
21
- if isinstance(target_feat, str):
22
- if single:
23
- return target_feat
24
- else:
25
- return [target_feat]
26
- elif isinstance(target_feat, list):
27
- if single:
28
- if len(target_feat) == 1:
29
- return target_feat[0]
30
- else:
31
- raise ValueError(f"expecting only one target_feat but found {len(target_feat)}")
32
- else:
33
- return [target_feat]
34
- else:
35
- raise TypeError(f'target_feat must be a str or a list of str but found {target_feat.__class__.__name__}')
36
-
37
-
38
- def linear_regression(context: wizata_dsapi.Context):
39
- """Train a linear regression model on all features to predict a single target column."""
40
- df = context.dataframe
41
-
42
- model_config = context.get_model_config()
43
- if not model_config.has_target_feat():
44
- raise ValueError(f'linear_regression requires a target feat')
45
- target_feat_name = context.properties["target_feat"]
46
-
47
- x = df.drop(columns=[target_feat_name])
48
- y = df[target_feat_name]
49
-
50
- model = sklearn.linear_model.LinearRegression()
51
- model.fit(x, y)
52
-
53
- context.set_model(model, features=x.columns)
54
-
55
-
56
- def logistic_regression(context: wizata_dsapi.Context):
57
- """Train a logistic regression classifier on all features to predict a binary target column."""
58
- df = context.dataframe
59
-
60
- model_config = context.get_model_config()
61
- if not model_config.has_target_feat():
62
- raise ValueError(f'logistic_regression requires a target feat')
63
- target_feat_name = context.properties["target_feat"]
64
-
65
- x = df.drop(columns=[target_feat_name])
66
- y = df[target_feat_name]
67
-
68
- model = sklearn.linear_model.LogisticRegression()
69
- model.fit(x, y.astype(int))
70
-
71
- context.set_model(model, features=x.columns)
72
-
73
-
74
- def isolation_forest(context: wizata_dsapi.Context):
75
- """Train an Isolation Forest for unsupervised anomaly detection using a sensitivity level (1-5)."""
76
-
77
- model_config = context.get_model_config()
78
- if model_config.has_target_feat():
79
- raise ValueError(f'isolation_forest does not requires a target feat')
80
-
81
- try:
82
- if context.properties['sensitivity'] is None:
83
- raise KeyError("sensitivity is none")
84
- sensitivity = int(context.properties['sensitivity'])
85
- sensitivities = [0.05, 0.15, 0.25, 0.35, 0.4]
86
- contamination = sensitivities[sensitivity - 1]
87
- except Exception as e:
88
- raise ValueError(f'cannot extract sensitivity integer from 0 to 4 due to {e}')
89
-
90
- df = context.dataframe.copy()
91
- model = sklearn.ensemble.IsolationForest(contamination=contamination)
92
- df['isolation_forest_predict'] = model.fit_predict(df)
93
- context.set_model(model, features=df.columns)
94
- return df
95
-
96
-
97
- def gradiant_boost_classifier(context: wizata_dsapi.Context):
98
- """Train a Gradient Boosting classifier on all features to predict a target column."""
99
- df = context.dataframe
100
-
101
- model_config = context.get_model_config()
102
- if not model_config.has_target_feat():
103
- raise ValueError(f'gradiant_boost_classifier requires a target feat')
104
- target_feat_name = context.properties["target_feat"]
105
-
106
- x = df.drop(columns=[target_feat_name])
107
- y = df[target_feat_name]
108
-
109
- model = sklearn.ensemble.GradientBoostingClassifier(random_state=0).fit(x, y)
110
- context.set_model(model, features=df.columns)
111
-
112
-
113
-
114
-
115
-
116
-
@@ -1 +0,0 @@
1
- from .common import filter_df, fillna, clustering, merge, target_feat_to_binary
@@ -1,122 +0,0 @@
1
- import wizata_dsapi
2
-
3
- import pandas
4
- import numpy
5
-
6
- import sklearn
7
- import sklearn.cluster
8
- import sklearn.metrics
9
- import sklearn.ensemble
10
-
11
-
12
- def filter_df(context: wizata_dsapi.Context):
13
- """Filter dataframe rows using pandas query strings from the 'filters' property list."""
14
-
15
- if "filters" not in context.properties or not isinstance(context.properties['filters'], list):
16
- raise ValueError(f'there is no list *filters* in properties - please set them on context or config')
17
-
18
- df = context.dataframe.copy()
19
-
20
- filters = context.properties['filters']
21
- for filter_row in filters:
22
- try:
23
- df = df.query(filter_row)
24
- except pandas.errors.ParserError as e:
25
- raise ValueError(f"error parsing filter string '{filter_row}': {e}")
26
-
27
- return df
28
-
29
-
30
- def clustering(context: wizata_dsapi.Context):
31
- """K-means clustering with automatic cluster count selection via silhouette score."""
32
- df = context.dataframe.copy()
33
- scaler = sklearn.preprocessing.StandardScaler()
34
- df_clustering_scaler = scaler.fit_transform(df)
35
-
36
- range_n_clusters = list(range(2, min(10, df_clustering_scaler.shape[0])))
37
- silhouette_avg = []
38
- for num_clusters in range_n_clusters:
39
- kmeans = sklearn.cluster.KMeans(n_clusters=num_clusters)
40
- kmeans.fit(df_clustering_scaler)
41
- cluster_labels = kmeans.labels_
42
- unique, counts = numpy.unique(cluster_labels, return_counts=True)
43
-
44
- if len(unique) >= 2:
45
- silhouette_avg.append(sklearn.metrics.silhouette_score(df_clustering_scaler, cluster_labels))
46
- else:
47
- silhouette_avg.append(numpy.nan)
48
-
49
- if numpy.isnan(silhouette_avg).all():
50
- df['cluster_labels'] = 0
51
- else:
52
- best_nb_clusters = silhouette_avg.index(max(silhouette_avg)) + 2
53
- kmeans = sklearn.cluster.KMeans(n_clusters=best_nb_clusters)
54
- kmeans.fit(df_clustering_scaler)
55
- cluster_labels = kmeans.labels_
56
- df['cluster_labels'] = cluster_labels
57
- df['cluster_labels'] = df['cluster_labels'].apply(lambda x: int(x + 1))
58
-
59
- return df
60
-
61
-
62
- def merge(context: wizata_dsapi.Context):
63
- """Merge multiple dataframes by index using outer join (configurable via 'how' property)."""
64
- dataframes = context.current_dataframes()
65
- if len(dataframes) <= 1:
66
- raise ValueError(f'there is not enough dataframes to concat')
67
-
68
- how = "outer"
69
- if "how" in context.properties:
70
- how = context.properties["how"]
71
-
72
- df = None
73
- for key in dataframes:
74
- if df is None:
75
- df = dataframes[key]
76
- else:
77
- df = df.merge(dataframes[key], how=how, left_index=True, right_index=True)
78
- return df
79
-
80
-
81
- def fillna(context: wizata_dsapi.Context):
82
- """Fill missing values per column using the 'fillna' property dict mapping column names to fill values."""
83
- df = context.dataframe
84
-
85
- if "fillna" not in context.properties:
86
- raise KeyError(f'please set a property dict fillna')
87
-
88
- for key in context.properties["fillna"]:
89
- df[key] = df[key].fillna(value=context.properties["fillna"][key])
90
-
91
- return df
92
-
93
-
94
- def target_feat_to_binary(context: wizata_dsapi.Context):
95
- """Convert a target feature column to binary (0/1) using a threshold operator (lt, lte, gt, gte)."""
96
- df = context.dataframe
97
-
98
- if "target_feat" not in context.properties:
99
- raise KeyError(f'please set a target feature to transform to binary class')
100
-
101
- target_feat = context.properties["target_feat"]["sensor"]
102
- operator = context.properties["target_feat"]["operator"]
103
- threshold = context.properties["target_feat"]["threshold"]
104
-
105
- if operator == 'lt':
106
- df[target_feat] = numpy.where(df[target_feat] < threshold, 1, 0)
107
- elif operator == 'lte':
108
- df[target_feat] = numpy.where(df[target_feat] <= threshold, 1, 0)
109
- elif operator == 'gt':
110
- df[target_feat] = numpy.where(df[target_feat] > threshold, 1, 0)
111
- elif operator == 'gte':
112
- df[target_feat] = numpy.where(df[target_feat] >= threshold, 1, 0)
113
- else:
114
- raise KeyError(f'operator type for binarisation not know')
115
-
116
- # Check if at least 1 value of each class
117
- if df[target_feat].nunique() == 1:
118
- raise KeyError(f'classification model requires 2 classes, only one was detected')
119
- elif df[target_feat].nunique() > 2:
120
- raise KeyError(f'classification model requires 2 classes, more than 2 were detected')
121
-
122
- return df
@@ -1 +0,0 @@
1
- __version__ = "2.0.0.dev23"