wizata-dsapi 2.0.0.dev26__tar.gz → 2.0.0.dev28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {wizata_dsapi-2.0.0.dev26/wizata_dsapi.egg-info → wizata_dsapi-2.0.0.dev28}/PKG-INFO +1 -1
  2. wizata_dsapi-2.0.0.dev28/wizata_dsapi/plots/__init__.py +2 -0
  3. wizata_dsapi-2.0.0.dev28/wizata_dsapi/plots/common.py +519 -0
  4. wizata_dsapi-2.0.0.dev28/wizata_dsapi/version.py +1 -0
  5. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28/wizata_dsapi.egg-info}/PKG-INFO +1 -1
  6. wizata_dsapi-2.0.0.dev26/wizata_dsapi/plots/__init__.py +0 -2
  7. wizata_dsapi-2.0.0.dev26/wizata_dsapi/plots/common.py +0 -270
  8. wizata_dsapi-2.0.0.dev26/wizata_dsapi/version.py +0 -1
  9. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/LICENSE.txt +0 -0
  10. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/README.rst +0 -0
  11. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/setup.cfg +0 -0
  12. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/setup.py +0 -0
  13. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/__init__.py +0 -0
  14. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/api_config.py +0 -0
  15. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/api_dto.py +0 -0
  16. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/api_interface.py +0 -0
  17. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/bucket.py +0 -0
  18. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/business_label.py +0 -0
  19. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/context.py +0 -0
  20. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/dashboard.py +0 -0
  21. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/dataframe_toolkit.py +0 -0
  22. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/datapoint.py +0 -0
  23. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/datastore.py +0 -0
  24. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/ds_dataframe.py +0 -0
  25. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/dsapi_json_encoder.py +0 -0
  26. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/edge_config.py +0 -0
  27. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/edge_device.py +0 -0
  28. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/edge_module.py +0 -0
  29. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/evaluation.py +0 -0
  30. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/execution.py +0 -0
  31. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/execution_log.py +0 -0
  32. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/experiment.py +0 -0
  33. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/graylog_log.py +0 -0
  34. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/group_system.py +0 -0
  35. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/ilogger.py +0 -0
  36. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/insight.py +0 -0
  37. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/mlmodel.py +0 -0
  38. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/mobile_asset.py +0 -0
  39. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/model_toolkit.py +0 -0
  40. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/models/__init__.py +0 -0
  41. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/models/common.py +0 -0
  42. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/notification.py +0 -0
  43. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/paged_query_result.py +0 -0
  44. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/pipeline.py +0 -0
  45. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/pipeline_image.py +0 -0
  46. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/plot.py +0 -0
  47. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/plots/theme.py +0 -0
  48. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/request.py +0 -0
  49. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/script.py +0 -0
  50. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/scripts/__init__.py +0 -0
  51. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/scripts/common.py +0 -0
  52. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/search.py +0 -0
  53. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/solution_component.py +0 -0
  54. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/streamlit_utils.py +0 -0
  55. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/template.py +0 -0
  56. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/template_config.py +0 -0
  57. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/trigger.py +0 -0
  58. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/twin.py +0 -0
  59. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/twinregistration.py +0 -0
  60. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/user.py +0 -0
  61. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/wizard_function.py +0 -0
  62. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/wizard_request.py +0 -0
  63. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/wizata_dsapi_client.py +0 -0
  64. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi/words.py +0 -0
  65. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi.egg-info/SOURCES.txt +0 -0
  66. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi.egg-info/dependency_links.txt +0 -0
  67. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi.egg-info/requires.txt +0 -0
  68. {wizata_dsapi-2.0.0.dev26 → wizata_dsapi-2.0.0.dev28}/wizata_dsapi.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wizata_dsapi
3
- Version: 2.0.0.dev26
3
+ Version: 2.0.0.dev28
4
4
  Summary: Wizata Data Science Toolkit
5
5
  Author: Wizata S.A.
6
6
  Author-email: info@wizata.com
@@ -0,0 +1,2 @@
1
+ from .common import ts_chart, confusion_matrix, r_squared, anomalies_chart, parallel_coordinates, data_table, setpoint_recommendation, feature_importance, process_variability
2
+ from . import theme
@@ -0,0 +1,519 @@
1
+ import pandas
2
+ import wizata_dsapi
3
+ import numpy as np
4
+ import pandas as pd
5
+ import plotly.graph_objects as go
6
+ import plotly.express as px
7
+ import sklearn.metrics
8
+
9
+ from plotly.subplots import make_subplots
10
+
11
+ from .theme import (
12
+ apply_theme, SERIES_COLORS, ANOMALY_COLOR, MAIN_COLOR, POSITIVE_COLOR,
13
+ TEXT_MUTED, WHITE,
14
+ TABLE_HEADER_BG, TABLE_HEADER_TEXT, TABLE_CELL_BG, TABLE_CELL_TEXT,
15
+ TABLE_LINE_COLOR, FONT_FAMILY, FONT_SIZE,
16
+ )
17
+
18
+
19
+ def check_single_column_and_target_feat(context: wizata_dsapi.Context):
20
+ """
21
+ check_single_column_and_target_feat
22
+ :param context:
23
+ :return:
24
+ """
25
+ input_io = context.step.get_unique_input()
26
+ if input_io.dataframe not in context.dataframes or input_io.dataframe + '.reference' not in context.dataframes:
27
+ raise ValueError(f'impossible to find {input_io.dataframe} dataframe and reference inside the context ')
28
+
29
+ predict_df = context.dataframes[input_io.dataframe]
30
+ ref_df = context.dataframes[input_io.dataframe + ".reference"]
31
+ if ref_df.ndim != 1:
32
+ raise ValueError('please use a model that predict only one serie/dimension.')
33
+
34
+ if "output_columns_names" not in context.properties:
35
+ raise RuntimeError('there is no output columns in properties, r squared cannot find results')
36
+
37
+ column_name = context.properties["output_columns_names"]
38
+ if not isinstance(column_name, str):
39
+ if isinstance(column_name, list) and len(column_name) == 1:
40
+ column_name = column_name[0]
41
+ elif isinstance(column_name, list):
42
+ raise ValueError('please use a model that predict only one serie/dimension - mulitple column names')
43
+ else:
44
+ raise TypeError(f'column_name is not a str or a list {column_name.__class__.__name__}')
45
+
46
+ if not isinstance(predict_df, pandas.DataFrame):
47
+ raise TypeError(f'predicted dataframe is not a dataframe {predict_df.__class__.__name__}')
48
+ predict_df = predict_df.copy()
49
+ predict_df = predict_df[[column_name]]
50
+
51
+ if isinstance(ref_df, pandas.Series):
52
+ ref_df = pandas.DataFrame(ref_df, index=predict_df.index)
53
+ return predict_df, ref_df
54
+
55
+
56
+ def confusion_matrix(context: wizata_dsapi.Context):
57
+ """Plot a confusion matrix heatmap comparing predicted vs actual binary classes."""
58
+ predict_df, ref_df = check_single_column_and_target_feat(context)
59
+ cm = sklearn.metrics.confusion_matrix(ref_df, predict_df)
60
+ inverted_cm = np.flip(cm, axis=1)
61
+
62
+ fig = go.Figure(data=go.Heatmap(
63
+ z=inverted_cm,
64
+ x=['Positive', 'Negative'],
65
+ y=['Negative', 'Positive'],
66
+ colorscale=[[0, '#062B56'], [1, '#E64600']],
67
+ colorbar=dict(title='Count', tickfont=dict(color=TEXT_MUTED)),
68
+ ))
69
+
70
+ for i in range(len(inverted_cm)):
71
+ for j in range(len(inverted_cm[i])):
72
+ fig.add_annotation(
73
+ x=j,
74
+ y=i,
75
+ text=str(inverted_cm[i][j]),
76
+ showarrow=False,
77
+ font=dict(color=WHITE if inverted_cm[i][j] > np.max(inverted_cm) / 2 else TEXT_MUTED)
78
+ )
79
+
80
+ fig.update_layout(
81
+ xaxis=dict(title='Predicted'),
82
+ yaxis=dict(title='Actual')
83
+ )
84
+
85
+ apply_theme(fig)
86
+
87
+ context.set_plot(
88
+ figure=fig,
89
+ name="confusion_matrix"
90
+ )
91
+
92
+
93
+ def r_squared(context: wizata_dsapi.Context):
94
+ """Plot actual vs predicted scatter with R-squared coefficient and reference diagonal."""
95
+ predict_df, ref_df = check_single_column_and_target_feat(context)
96
+
97
+ r2 = sklearn.metrics.r2_score(ref_df, predict_df)
98
+
99
+ fig = go.Figure()
100
+ fig.add_trace(go.Scatter(
101
+ x=ref_df.values.flatten(),
102
+ y=predict_df.values.flatten(),
103
+ mode='markers',
104
+ name='Data Points',
105
+ marker=dict(color=SERIES_COLORS[0], size=5, opacity=0.7),
106
+ ))
107
+
108
+ min_value = min(ref_df.values.min(), predict_df.values.min())
109
+ max_value = max(ref_df.values.max(), predict_df.values.max())
110
+ fig.add_trace(go.Scatter(
111
+ x=[min_value, max_value],
112
+ y=[min_value, max_value],
113
+ mode='lines',
114
+ line=dict(color=TEXT_MUTED, dash='dash'),
115
+ showlegend=False,
116
+ ))
117
+
118
+ fig.add_annotation(
119
+ x=min_value,
120
+ y=max_value,
121
+ text=f'R\u00b2 = {r2:.4f}',
122
+ showarrow=False,
123
+ font=dict(color=WHITE, size=12),
124
+ bgcolor='#184980',
125
+ bordercolor='rgba(255,255,255,0.2)',
126
+ borderwidth=1,
127
+ borderpad=4,
128
+ opacity=0.9,
129
+ )
130
+
131
+ fig.update_layout(
132
+ xaxis=dict(title='Actual'),
133
+ yaxis=dict(title='Predicted')
134
+ )
135
+
136
+ apply_theme(fig)
137
+
138
+ context.set_plot(
139
+ figure=fig,
140
+ name="r_squared"
141
+ )
142
+
143
+
144
+ def ts_chart(context: wizata_dsapi.Context):
145
+ """Plot all dataframe columns as time-series lines over the index."""
146
+ df = context.dataframe
147
+ traces = []
148
+ for i, column in enumerate(df.columns):
149
+ trace = go.Scatter(
150
+ x=df.index,
151
+ y=df[column],
152
+ mode='lines',
153
+ name=column,
154
+ line=dict(color=SERIES_COLORS[i % len(SERIES_COLORS)]),
155
+ )
156
+ traces.append(trace)
157
+
158
+ fig = go.Figure(traces)
159
+ apply_theme(fig)
160
+
161
+ context.set_plot(
162
+ figure=fig,
163
+ name="ts_chart"
164
+ )
165
+
166
+
167
+ def anomalies_chart(context: wizata_dsapi.Context):
168
+ """Plot time-series signals with detected anomaly regions highlighted in red."""
169
+ df = context.dataframe
170
+
171
+ # Add Signals
172
+ traces = []
173
+ color_idx = 0
174
+ for column in df.columns:
175
+ if column != "anomalies_type":
176
+ trace = go.Scatter(
177
+ x=df.index,
178
+ y=df[column],
179
+ mode='lines',
180
+ name=column,
181
+ line=dict(color=SERIES_COLORS[color_idx % len(SERIES_COLORS)]),
182
+ )
183
+ traces.append(trace)
184
+ color_idx += 1
185
+ fig = go.Figure(traces)
186
+
187
+ # Add Anomalies as Highlighted
188
+ anomalies_list = context.dataframe.copy()
189
+ anomalies_list['anomaly'] = np.where(anomalies_list['anomalies_type'] != 0, 1, 0)
190
+ anomalies_list['new_occurrence'] = np.where(
191
+ (anomalies_list['anomaly'] != anomalies_list['anomaly'].shift(1)) |
192
+ (anomalies_list['anomalies_type'] != anomalies_list['anomalies_type'].shift(1)), 1, 0)
193
+ anomalies_list['new_occurrence_index'] = anomalies_list['new_occurrence'].cumsum()
194
+ anomalies_occurrences = anomalies_list[anomalies_list['anomaly'] != 0].reset_index(). \
195
+ groupby(['new_occurrence_index']). \
196
+ agg({'Timestamp': ['first', 'last'], 'anomalies_type': 'first'})
197
+ anomalies_occurrences.columns = ['from', 'to', 'anomaly_group']
198
+ for i in anomalies_occurrences.index:
199
+ fig.add_vrect(x0=anomalies_occurrences['from'][i], x1=anomalies_occurrences['to'][i], line_width=0,
200
+ fillcolor=ANOMALY_COLOR, opacity=1)
201
+
202
+ apply_theme(fig)
203
+
204
+ context.set_plot(
205
+ figure=fig,
206
+ name="anomalies_chart"
207
+ )
208
+
209
+
210
+ def parallel_coordinates(context: wizata_dsapi.Context):
211
+ """Plot parallel coordinates colored by anomaly type for multi-dimensional analysis."""
212
+ df = context.dataframe
213
+
214
+ fig = px.parallel_coordinates(df,
215
+ color='anomalies_type',
216
+ dimensions=df,
217
+ color_continuous_scale=px.colors.diverging.Portland)
218
+ apply_theme(fig)
219
+
220
+ context.set_plot(
221
+ figure=fig,
222
+ name="parallel_coordinates"
223
+ )
224
+
225
+
226
+ def setpoint_recommendation(context: wizata_dsapi.Context):
227
+ """Compare current vs recommended setpoint values as a grouped bar chart with a summary table.
228
+
229
+ Auto-pairs columns: for every column ending in '_recommended', matches the same column name
230
+ without the suffix and validates via context.datapoints BusinessType.SET_POINTS. Uses the last
231
+ row of the dataframe (most recent state). The top half shows a horizontal grouped bar chart
232
+ (Current vs Recommended), the bottom half a table with Setpoint | Current | Recommended | delta | delta (%).
233
+ """
234
+ df = context.dataframe
235
+ datapoints = context.datapoints or {}
236
+
237
+ if df is None or len(df) == 0:
238
+ raise ValueError("setpoint_recommendation: dataframe is empty")
239
+
240
+ suffix = "_recommended"
241
+ pairs = []
242
+ for col in df.columns:
243
+ if not isinstance(col, str) or not col.endswith(suffix):
244
+ continue
245
+ base = col[: -len(suffix)]
246
+ if base not in df.columns:
247
+ continue
248
+ if datapoints:
249
+ dp = datapoints.get(base)
250
+ if dp is None or dp.business_type != wizata_dsapi.BusinessType.SET_POINTS:
251
+ continue
252
+ pairs.append((base, col))
253
+
254
+ if not pairs:
255
+ raise ValueError(
256
+ "setpoint_recommendation: no <col>_recommended / setpoint pairs found in the dataframe"
257
+ )
258
+
259
+ last = df.iloc[-1]
260
+ names, currents, recommendeds, deltas, pcts = [], [], [], [], []
261
+ for base, rec in pairs:
262
+ current = float(last[base])
263
+ recommended = float(last[rec])
264
+ delta = recommended - current
265
+ pct = (delta / current * 100.0) if current not in (0, 0.0) and not pd.isna(current) else float("nan")
266
+ names.append(base)
267
+ currents.append(current)
268
+ recommendeds.append(recommended)
269
+ deltas.append(delta)
270
+ pcts.append(pct)
271
+
272
+ # ── Build combined figure: bar chart (top) + table (bottom) ──────────
273
+ fig = make_subplots(
274
+ rows=2, cols=1,
275
+ specs=[[{"type": "xy"}], [{"type": "domain"}]],
276
+ row_heights=[0.55, 0.45],
277
+ vertical_spacing=0.08,
278
+ )
279
+
280
+ # Grouped horizontal bars
281
+ fig.add_trace(go.Bar(
282
+ y=names,
283
+ x=recommendeds,
284
+ orientation="h",
285
+ name="Recommended Value",
286
+ marker_color=POSITIVE_COLOR,
287
+ text=[f"{v:.2f}" for v in recommendeds],
288
+ textposition="inside",
289
+ insidetextanchor="start",
290
+ textfont=dict(color=WHITE, size=FONT_SIZE),
291
+ ), row=1, col=1)
292
+
293
+ fig.add_trace(go.Bar(
294
+ y=names,
295
+ x=currents,
296
+ orientation="h",
297
+ name="Current Value",
298
+ marker_color=MAIN_COLOR,
299
+ text=[f"{v:.2f}" for v in currents],
300
+ textposition="inside",
301
+ insidetextanchor="start",
302
+ textfont=dict(color=WHITE, size=FONT_SIZE),
303
+ ), row=1, col=1)
304
+
305
+ fig.update_layout(barmode="group")
306
+ fig.update_xaxes(title_text="Values", row=1, col=1)
307
+ fig.update_yaxes(title_text="Setpoints", row=1, col=1)
308
+
309
+ # Summary table
310
+ fmt = lambda v: "—" if pd.isna(v) else f"{v:.3f}"
311
+ n_rows = len(names)
312
+ cell_bg = [TABLE_CELL_BG[i % 2] for i in range(n_rows)]
313
+
314
+ fig.add_trace(go.Table(
315
+ header=dict(
316
+ values=["<b>Setpoint</b>", "<b>Current</b>", "<b>Recommended</b>",
317
+ "<b>\u0394</b>", "<b>\u0394 (%)</b>"],
318
+ fill_color=TABLE_HEADER_BG,
319
+ font=dict(family=FONT_FAMILY, size=FONT_SIZE, color=TABLE_HEADER_TEXT),
320
+ align="left",
321
+ line_color=TABLE_LINE_COLOR,
322
+ height=32,
323
+ ),
324
+ cells=dict(
325
+ values=[
326
+ names,
327
+ [fmt(v) for v in currents],
328
+ [fmt(v) for v in recommendeds],
329
+ [fmt(v) for v in deltas],
330
+ [fmt(v) for v in pcts],
331
+ ],
332
+ fill_color=[cell_bg],
333
+ font=dict(family=FONT_FAMILY, size=FONT_SIZE, color=TABLE_CELL_TEXT),
334
+ align="left",
335
+ line_color=TABLE_LINE_COLOR,
336
+ height=28,
337
+ ),
338
+ ), row=2, col=1)
339
+
340
+ fig.update_layout(
341
+ title="Actual vs Recommended Values",
342
+ legend=dict(orientation="h", y=1.02, x=0.5, xanchor="center"),
343
+ )
344
+ apply_theme(fig)
345
+
346
+ context.set_plot(figure=fig, name="setpoint_recommendation")
347
+
348
+
349
+ def feature_importance(context: wizata_dsapi.Context):
350
+ """Plot the top-N feature importances of a trained model as a horizontal bar chart.
351
+
352
+ Scans context.models for a model whose underlying trained_model exposes `feature_importances_`
353
+ (RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier, etc.). Uses
354
+ `ml_model.input_columns` for feature labels. Bars are sorted ascending so the most important
355
+ feature is at the top when rendered.
356
+
357
+ Property: top_n (default 15) — maximum number of features to display.
358
+ """
359
+ top_n = int(context.properties.get("top_n", 15)) if "top_n" in context.properties else 15
360
+
361
+ selected = None
362
+ for model in (context.models or {}).values():
363
+ trained = getattr(model, "trained_model", None)
364
+ if trained is not None and hasattr(trained, "feature_importances_"):
365
+ selected = model
366
+ break
367
+
368
+ if selected is None:
369
+ raise ValueError(
370
+ "feature_importance: no trained model with feature_importances_ found in context.models "
371
+ "(supported by RandomForest, GradientBoosting, and similar tree-based models)"
372
+ )
373
+
374
+ importances = np.asarray(selected.trained_model.feature_importances_)
375
+ feature_names = list(selected.input_columns) if selected.input_columns is not None else \
376
+ [f"f{i}" for i in range(len(importances))]
377
+
378
+ if len(feature_names) != len(importances):
379
+ raise ValueError(
380
+ f"feature_importance: feature_importances_ length {len(importances)} does not match "
381
+ f"input_columns length {len(feature_names)}"
382
+ )
383
+
384
+ fi = pd.DataFrame({"feature": feature_names, "importance": importances})
385
+ fi = fi.sort_values("importance", ascending=True).tail(top_n)
386
+
387
+ fig = go.Figure(go.Bar(
388
+ x=fi["importance"],
389
+ y=fi["feature"],
390
+ orientation="h",
391
+ marker_color=SERIES_COLORS[0],
392
+ ))
393
+ fig.update_layout(
394
+ title=f"Top-{min(top_n, len(fi))} Feature Importances",
395
+ xaxis=dict(title="Importance"),
396
+ yaxis=dict(title=""),
397
+ margin=dict(l=200, r=20, t=40, b=40),
398
+ )
399
+ apply_theme(fig)
400
+
401
+ context.set_plot(figure=fig, name="feature_importance")
402
+
403
+
404
+ def process_variability(context: wizata_dsapi.Context):
405
+ """Parallel coordinates plot showing configurations variability across process features, colored by
406
+ a quality/outcome column. Each line is one row (time step) of the dataframe; each vertical axis
407
+ is a numeric feature. The color gradient runs from red (poor quality) through yellow to green
408
+ (good quality) based on the 'color_by' column.
409
+
410
+ Column selection:
411
+ - color_by: property name of the quality column used for coloring (defaults to
412
+ context.properties['target_feat'] if set, otherwise the last numeric column).
413
+ - color_reverse: set to true (default) when a LOWER color_by value is better
414
+ (e.g. residual_co2) — low values will be green. Set to false when higher is better
415
+ (e.g. 'good_bottles') — high values will be green.
416
+ """
417
+ df = context.dataframe
418
+ if df is None or len(df) == 0:
419
+ raise ValueError("process_variability: dataframe is empty")
420
+
421
+ numeric_cols = df.select_dtypes(include="number").columns.tolist()
422
+ if len(numeric_cols) < 2:
423
+ raise ValueError("process_variability: need at least 2 numeric columns")
424
+
425
+ # Resolve color column
426
+ color_by = context.properties.get("color_by")
427
+ if color_by is None:
428
+ color_by = context.properties.get("target_feat")
429
+ if color_by is None:
430
+ color_by = numeric_cols[-1]
431
+ if color_by not in df.columns:
432
+ raise ValueError(f"process_variability: color_by column '{color_by}' not found in dataframe")
433
+
434
+ color_reverse = context.properties.get("color_reverse", True)
435
+ if isinstance(color_reverse, str):
436
+ color_reverse = color_reverse.lower() not in ("false", "0", "no")
437
+
438
+ # Build dimensions for all numeric columns
439
+ dimensions = []
440
+ for col in numeric_cols:
441
+ col_vals = df[col].dropna()
442
+ if col_vals.empty:
443
+ continue
444
+ dimensions.append(dict(
445
+ label=col,
446
+ values=df[col].values,
447
+ range=[float(col_vals.min()), float(col_vals.max())],
448
+ ))
449
+
450
+ # Colorscale: red → yellow → green (reversed if lower is better)
451
+ if color_reverse:
452
+ colorscale = [[0, POSITIVE_COLOR], [0.5, "#FDDD60"], [1, "#FF6E76"]]
453
+ else:
454
+ colorscale = [[0, "#FF6E76"], [0.5, "#FDDD60"], [1, POSITIVE_COLOR]]
455
+
456
+ fig = go.Figure(data=go.Parcoords(
457
+ line=dict(
458
+ color=df[color_by].values,
459
+ colorscale=colorscale,
460
+ showscale=True,
461
+ colorbar=dict(title=color_by),
462
+ ),
463
+ dimensions=dimensions,
464
+ ))
465
+
466
+ fig.update_layout(title="Configurations Variability")
467
+ apply_theme(fig)
468
+
469
+ context.set_plot(figure=fig, name="process_variability")
470
+
471
+
472
+ def data_table(context: wizata_dsapi.Context):
473
+ """Render the dataframe as a styled table with Wizata theme colors."""
474
+ df = context.dataframe.copy()
475
+
476
+ # Format the index as a column if it has a name or is a DatetimeIndex
477
+ if isinstance(df.index, pd.DatetimeIndex):
478
+ df.insert(0, df.index.name or "Timestamp", df.index.strftime("%Y-%m-%d %H:%M:%S"))
479
+ df = df.reset_index(drop=True)
480
+ elif df.index.name is not None:
481
+ df = df.reset_index()
482
+
483
+ # Round numeric columns
484
+ precision = int(context.properties.get("precision", 4)) if "precision" in context.properties else 4
485
+ for col in df.select_dtypes(include="number").columns:
486
+ df[col] = df[col].round(precision)
487
+
488
+ # Build alternating row colors
489
+ n_rows = len(df)
490
+ cell_bg = [TABLE_CELL_BG[i % 2] for i in range(n_rows)]
491
+
492
+ fig = go.Figure(data=[go.Table(
493
+ header=dict(
494
+ values=[f"<b>{c}</b>" for c in df.columns],
495
+ fill_color=TABLE_HEADER_BG,
496
+ font=dict(family=FONT_FAMILY, size=FONT_SIZE, color=TABLE_HEADER_TEXT),
497
+ align="left",
498
+ line_color=TABLE_LINE_COLOR,
499
+ height=32,
500
+ ),
501
+ cells=dict(
502
+ values=[df[c].tolist() for c in df.columns],
503
+ fill_color=[cell_bg],
504
+ font=dict(family=FONT_FAMILY, size=FONT_SIZE, color=TABLE_CELL_TEXT),
505
+ align="left",
506
+ line_color=TABLE_LINE_COLOR,
507
+ height=28,
508
+ ),
509
+ )])
510
+
511
+ fig.update_layout(
512
+ margin=dict(l=0, r=0, t=0, b=0),
513
+ )
514
+ apply_theme(fig)
515
+
516
+ context.set_plot(
517
+ figure=fig,
518
+ name="data_table"
519
+ )
@@ -0,0 +1 @@
1
+ __version__ = "2.0.0.dev28"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wizata_dsapi
3
- Version: 2.0.0.dev26
3
+ Version: 2.0.0.dev28
4
4
  Summary: Wizata Data Science Toolkit
5
5
  Author: Wizata S.A.
6
6
  Author-email: info@wizata.com
@@ -1,2 +0,0 @@
1
- from .common import ts_chart, confusion_matrix, r_squared, anomalies_chart, parallel_coordinates, data_table
2
- from . import theme
@@ -1,270 +0,0 @@
1
- import pandas
2
- import wizata_dsapi
3
- import numpy as np
4
- import pandas as pd
5
- import plotly.graph_objects as go
6
- import plotly.express as px
7
- import sklearn.metrics
8
-
9
- from .theme import (
10
- apply_theme, SERIES_COLORS, ANOMALY_COLOR, TEXT_MUTED, WHITE,
11
- TABLE_HEADER_BG, TABLE_HEADER_TEXT, TABLE_CELL_BG, TABLE_CELL_TEXT,
12
- TABLE_LINE_COLOR, FONT_FAMILY, FONT_SIZE,
13
- )
14
-
15
-
16
- def check_single_column_and_target_feat(context: wizata_dsapi.Context):
17
- """
18
- check_single_column_and_target_feat
19
- :param context:
20
- :return:
21
- """
22
- input_io = context.step.get_unique_input()
23
- if input_io.dataframe not in context.dataframes or input_io.dataframe + '.reference' not in context.dataframes:
24
- raise ValueError(f'impossible to find {input_io.dataframe} dataframe and reference inside the context ')
25
-
26
- predict_df = context.dataframes[input_io.dataframe]
27
- ref_df = context.dataframes[input_io.dataframe + ".reference"]
28
- if ref_df.ndim != 1:
29
- raise ValueError('please use a model that predict only one serie/dimension.')
30
-
31
- if "output_columns_names" not in context.properties:
32
- raise RuntimeError('there is no output columns in properties, r squared cannot find results')
33
-
34
- column_name = context.properties["output_columns_names"]
35
- if not isinstance(column_name, str):
36
- if isinstance(column_name, list) and len(column_name) == 1:
37
- column_name = column_name[0]
38
- elif isinstance(column_name, list):
39
- raise ValueError('please use a model that predict only one serie/dimension - mulitple column names')
40
- else:
41
- raise TypeError(f'column_name is not a str or a list {column_name.__class__.__name__}')
42
-
43
- if not isinstance(predict_df, pandas.DataFrame):
44
- raise TypeError(f'predicted dataframe is not a dataframe {predict_df.__class__.__name__}')
45
- predict_df = predict_df.copy()
46
- predict_df = predict_df[[column_name]]
47
-
48
- if isinstance(ref_df, pandas.Series):
49
- ref_df = pandas.DataFrame(ref_df, index=predict_df.index)
50
- return predict_df, ref_df
51
-
52
-
53
- def confusion_matrix(context: wizata_dsapi.Context):
54
- """Plot a confusion matrix heatmap comparing predicted vs actual binary classes."""
55
- predict_df, ref_df = check_single_column_and_target_feat(context)
56
- cm = sklearn.metrics.confusion_matrix(ref_df, predict_df)
57
- inverted_cm = np.flip(cm, axis=1)
58
-
59
- fig = go.Figure(data=go.Heatmap(
60
- z=inverted_cm,
61
- x=['Positive', 'Negative'],
62
- y=['Negative', 'Positive'],
63
- colorscale=[[0, '#062B56'], [1, '#E64600']],
64
- colorbar=dict(title='Count', tickfont=dict(color=TEXT_MUTED)),
65
- ))
66
-
67
- for i in range(len(inverted_cm)):
68
- for j in range(len(inverted_cm[i])):
69
- fig.add_annotation(
70
- x=j,
71
- y=i,
72
- text=str(inverted_cm[i][j]),
73
- showarrow=False,
74
- font=dict(color=WHITE if inverted_cm[i][j] > np.max(inverted_cm) / 2 else TEXT_MUTED)
75
- )
76
-
77
- fig.update_layout(
78
- xaxis=dict(title='Predicted'),
79
- yaxis=dict(title='Actual')
80
- )
81
-
82
- apply_theme(fig)
83
-
84
- context.set_plot(
85
- figure=fig,
86
- name="confusion_matrix"
87
- )
88
-
89
-
90
- def r_squared(context: wizata_dsapi.Context):
91
- """Plot actual vs predicted scatter with R-squared coefficient and reference diagonal."""
92
- predict_df, ref_df = check_single_column_and_target_feat(context)
93
-
94
- r2 = sklearn.metrics.r2_score(ref_df, predict_df)
95
-
96
- fig = go.Figure()
97
- fig.add_trace(go.Scatter(
98
- x=ref_df.values.flatten(),
99
- y=predict_df.values.flatten(),
100
- mode='markers',
101
- name='Data Points',
102
- marker=dict(color=SERIES_COLORS[0], size=5, opacity=0.7),
103
- ))
104
-
105
- min_value = min(ref_df.values.min(), predict_df.values.min())
106
- max_value = max(ref_df.values.max(), predict_df.values.max())
107
- fig.add_trace(go.Scatter(
108
- x=[min_value, max_value],
109
- y=[min_value, max_value],
110
- mode='lines',
111
- line=dict(color=TEXT_MUTED, dash='dash'),
112
- showlegend=False,
113
- ))
114
-
115
- fig.add_annotation(
116
- x=min_value,
117
- y=max_value,
118
- text=f'R\u00b2 = {r2:.4f}',
119
- showarrow=False,
120
- font=dict(color=WHITE, size=12),
121
- bgcolor='#184980',
122
- bordercolor='rgba(255,255,255,0.2)',
123
- borderwidth=1,
124
- borderpad=4,
125
- opacity=0.9,
126
- )
127
-
128
- fig.update_layout(
129
- xaxis=dict(title='Actual'),
130
- yaxis=dict(title='Predicted')
131
- )
132
-
133
- apply_theme(fig)
134
-
135
- context.set_plot(
136
- figure=fig,
137
- name="r_squared"
138
- )
139
-
140
-
141
- def ts_chart(context: wizata_dsapi.Context):
142
- """Plot all dataframe columns as time-series lines over the index."""
143
- df = context.dataframe
144
- traces = []
145
- for i, column in enumerate(df.columns):
146
- trace = go.Scatter(
147
- x=df.index,
148
- y=df[column],
149
- mode='lines',
150
- name=column,
151
- line=dict(color=SERIES_COLORS[i % len(SERIES_COLORS)]),
152
- )
153
- traces.append(trace)
154
-
155
- fig = go.Figure(traces)
156
- apply_theme(fig)
157
-
158
- context.set_plot(
159
- figure=fig,
160
- name="ts_chart"
161
- )
162
-
163
-
164
- def anomalies_chart(context: wizata_dsapi.Context):
165
- """Plot time-series signals with detected anomaly regions highlighted in red."""
166
- df = context.dataframe
167
-
168
- # Add Signals
169
- traces = []
170
- color_idx = 0
171
- for column in df.columns:
172
- if column != "anomalies_type":
173
- trace = go.Scatter(
174
- x=df.index,
175
- y=df[column],
176
- mode='lines',
177
- name=column,
178
- line=dict(color=SERIES_COLORS[color_idx % len(SERIES_COLORS)]),
179
- )
180
- traces.append(trace)
181
- color_idx += 1
182
- fig = go.Figure(traces)
183
-
184
- # Add Anomalies as Highlighted
185
- anomalies_list = context.dataframe.copy()
186
- anomalies_list['anomaly'] = np.where(anomalies_list['anomalies_type'] != 0, 1, 0)
187
- anomalies_list['new_occurrence'] = np.where(
188
- (anomalies_list['anomaly'] != anomalies_list['anomaly'].shift(1)) |
189
- (anomalies_list['anomalies_type'] != anomalies_list['anomalies_type'].shift(1)), 1, 0)
190
- anomalies_list['new_occurrence_index'] = anomalies_list['new_occurrence'].cumsum()
191
- anomalies_occurrences = anomalies_list[anomalies_list['anomaly'] != 0].reset_index(). \
192
- groupby(['new_occurrence_index']). \
193
- agg({'Timestamp': ['first', 'last'], 'anomalies_type': 'first'})
194
- anomalies_occurrences.columns = ['from', 'to', 'anomaly_group']
195
- for i in anomalies_occurrences.index:
196
- fig.add_vrect(x0=anomalies_occurrences['from'][i], x1=anomalies_occurrences['to'][i], line_width=0,
197
- fillcolor=ANOMALY_COLOR, opacity=1)
198
-
199
- apply_theme(fig)
200
-
201
- context.set_plot(
202
- figure=fig,
203
- name="anomalies_chart"
204
- )
205
-
206
-
207
- def parallel_coordinates(context: wizata_dsapi.Context):
208
- """Plot parallel coordinates colored by anomaly type for multi-dimensional analysis."""
209
- df = context.dataframe
210
-
211
- fig = px.parallel_coordinates(df,
212
- color='anomalies_type',
213
- dimensions=df,
214
- color_continuous_scale=px.colors.diverging.Portland)
215
- apply_theme(fig)
216
-
217
- context.set_plot(
218
- figure=fig,
219
- name="parallel_coordinates"
220
- )
221
-
222
-
223
- def data_table(context: wizata_dsapi.Context):
224
- """Render the dataframe as a styled table with Wizata theme colors."""
225
- df = context.dataframe.copy()
226
-
227
- # Format the index as a column if it has a name or is a DatetimeIndex
228
- if isinstance(df.index, pd.DatetimeIndex):
229
- df.insert(0, df.index.name or "Timestamp", df.index.strftime("%Y-%m-%d %H:%M:%S"))
230
- df = df.reset_index(drop=True)
231
- elif df.index.name is not None:
232
- df = df.reset_index()
233
-
234
- # Round numeric columns
235
- precision = int(context.properties.get("precision", 4)) if "precision" in context.properties else 4
236
- for col in df.select_dtypes(include="number").columns:
237
- df[col] = df[col].round(precision)
238
-
239
- # Build alternating row colors
240
- n_rows = len(df)
241
- cell_bg = [TABLE_CELL_BG[i % 2] for i in range(n_rows)]
242
-
243
- fig = go.Figure(data=[go.Table(
244
- header=dict(
245
- values=[f"<b>{c}</b>" for c in df.columns],
246
- fill_color=TABLE_HEADER_BG,
247
- font=dict(family=FONT_FAMILY, size=FONT_SIZE, color=TABLE_HEADER_TEXT),
248
- align="left",
249
- line_color=TABLE_LINE_COLOR,
250
- height=32,
251
- ),
252
- cells=dict(
253
- values=[df[c].tolist() for c in df.columns],
254
- fill_color=[cell_bg],
255
- font=dict(family=FONT_FAMILY, size=FONT_SIZE, color=TABLE_CELL_TEXT),
256
- align="left",
257
- line_color=TABLE_LINE_COLOR,
258
- height=28,
259
- ),
260
- )])
261
-
262
- fig.update_layout(
263
- margin=dict(l=0, r=0, t=0, b=0),
264
- )
265
- apply_theme(fig)
266
-
267
- context.set_plot(
268
- figure=fig,
269
- name="data_table"
270
- )
@@ -1 +0,0 @@
1
- __version__ = "2.0.0.dev26"