syntaxmatrix 2.5.5.5__py3-none-any.whl → 2.5.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,550 @@
1
+ # === SMX Auto-Hardening Preface (do not edit) ===
2
+ import warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt
3
+ warnings.filterwarnings('ignore')
4
+ from sklearn.preprocessing import OneHotEncoder
5
+ import inspect
6
+ import pandas as _pd
7
+ import numpy as _np # noqa: F811
8
+
9
+ from sklearn.metrics import (
10
+ r2_score,
11
+ mean_absolute_error,
12
+ accuracy_score,
13
+ precision_score,
14
+ recall_score,
15
+ f1_score,
16
+ roc_auc_score,
17
+ confusion_matrix,
18
+ ConfusionMatrixDisplay,
19
+ classification_report,
20
+ )
21
+
22
+ __all__ = [
23
+ # plotting entrypoints
24
+ "SB_histplot",
25
+ "SB_barplot",
26
+ "SB_boxplot",
27
+ "SB_scatterplot",
28
+ "SB_heatmap",
29
+
30
+ # core helpers (all underscore-prefixed functions)
31
+ "_SMX_caption_from_ctx",
32
+ "_SMX_axes_have_titles",
33
+ "_SMX_export_png",
34
+ "_pick_df",
35
+ "_pick_ax_slot",
36
+ "_first_numeric",
37
+ "_first_categorical",
38
+ "_safe_plot",
39
+ "_safe_concat",
40
+ "_SMX_OHE",
41
+ "_SMX_mm",
42
+ "_SMX_call",
43
+ "_SMX_rmse",
44
+ "_SMX_autocoerce_dates",
45
+ "_SMX_autocoerce_numeric",
46
+
47
+ # display helper
48
+ "smx_show",
49
+ "show",
50
+
51
+ # metics
52
+ "r2_score",
53
+ "mean_absolute_error",
54
+ "accuracy_score",
55
+ "precision_score",
56
+ "recall_score",
57
+ "f1_score",
58
+ "roc_auc_score",
59
+ "confusion_matrix",
60
+ "ConfusionMatrixDisplay",
61
+ "classification_report",
62
+ ]
63
+
64
+
65
+ try:
66
+ import seaborn as sns
67
+ except Exception:
68
+ class _Dummy:
69
+ def __getattr__(self, name):
70
+ def _f(*a, **k):
71
+ from syntaxmatrix.display import show
72
+ show('⚠ seaborn not available; plot skipped.')
73
+ return _f
74
+ sns = _Dummy()
75
+
76
+ from syntaxmatrix.display import show as _SMX_base_show
77
+
78
+
79
+ boxplot = barplot = histplot = distplot = lineplot = countplot = heatmap = pairplot = None
80
+
81
+
82
+ def smx_show(obj, title=None):
83
+ try:
84
+ import pandas as pd, numbers
85
+ cap = (title or _SMX_caption_from_ctx())
86
+ # 1) DataFrame → Styler with caption
87
+ if isinstance(obj, pd.DataFrame):
88
+ try:
89
+ return _SMX_base_show(obj.style.set_caption(cap))
90
+ except Exception:
91
+ pass
92
+ # 2) dict of scalars → DataFrame with caption
93
+ if isinstance(obj, dict) and all(isinstance(v, numbers.Number) for v in obj.values()):
94
+ df_ = pd.DataFrame({'metric': list(obj.keys()), 'value': list(obj.values())})
95
+ try:
96
+ return _SMX_base_show(df_.style.set_caption(cap))
97
+ except Exception:
98
+ return _SMX_base_show(df_)
99
+ except Exception:
100
+ pass
101
+ return _SMX_base_show(obj)
102
+
103
+
104
+ def _SMX_caption_from_ctx():
105
+ """
106
+ Look up refined_question / askai_question in caller frames,
107
+ falling back to 'Table' if not found.
108
+ """
109
+ import inspect
110
+
111
+ frame = inspect.currentframe()
112
+ while frame is not None:
113
+ g = frame.f_globals
114
+ t = g.get("refined_question") or g.get("askai_question")
115
+ if t:
116
+ return str(t).strip().splitlines()[0][:120]
117
+ frame = frame.f_back
118
+
119
+ return "Table"
120
+
121
+
122
+ def _SMX_axes_have_titles(fig=None):
123
+ import matplotlib.pyplot as _plt
124
+ fig = fig or _plt.gcf()
125
+ try:
126
+ for _ax in fig.get_axes():
127
+ if (_ax.get_title() or '').strip():
128
+ return True
129
+ except Exception:
130
+ pass
131
+ return False
132
+
133
+
134
+ def _SMX_export_png():
135
+ import io, base64
136
+ fig = plt.gcf()
137
+ try:
138
+ if not _SMX_axes_have_titles(fig):
139
+ fig.suptitle(_SMX_caption_from_ctx(), fontsize=10)
140
+ except Exception:
141
+ pass
142
+ buf = io.BytesIO()
143
+ plt.savefig(buf, format='png', bbox_inches='tight')
144
+ buf.seek(0)
145
+ from IPython.display import display, HTML
146
+ _img = base64.b64encode(buf.read()).decode('ascii')
147
+ display(
148
+ HTML(
149
+ f"<img src='data:image/png;base64,{_img}' "
150
+ "style='max-width:100%;height:auto;border:1px solid #ccc;border-radius:4px;'/>"
151
+ )
152
+ )
153
+ plt.close()
154
+
155
+
156
+ def _pick_df():
157
+ """
158
+ Try to find `df` in the caller's context, not just this module.
159
+ This restores the behaviour we had when the preface lived inline.
160
+ """
161
+ import inspect
162
+
163
+ # 1) Check our own module globals first (just in case).
164
+ g = globals()
165
+ if "df" in g:
166
+ return g["df"]
167
+
168
+ # 2) Walk up the call stack looking for `df`
169
+ frame = inspect.currentframe().f_back
170
+ while frame is not None:
171
+ # locals of the frame
172
+ if "df" in frame.f_locals:
173
+ return frame.f_locals["df"]
174
+ # globals of the frame (e.g. the exec cell)
175
+ if "df" in frame.f_globals:
176
+ return frame.f_globals["df"]
177
+ frame = frame.f_back
178
+
179
+ return None
180
+
181
+
182
+ def _pick_ax_slot():
183
+ ax = None
184
+ try:
185
+ _axes = globals().get('axes', None)
186
+ import numpy as _np
187
+ if _axes is not None:
188
+ arr = _np.ravel(_axes)
189
+ for _a in arr:
190
+ try:
191
+ if hasattr(_a, 'has_data') and not _a.has_data():
192
+ ax = _a
193
+ break
194
+ except Exception:
195
+ continue
196
+ except Exception:
197
+ ax = None
198
+ return ax
199
+
200
+
201
+ def _first_numeric(_d):
202
+ import numpy as np, pandas as pd
203
+ try:
204
+ preferred = ["median_house_value", "price", "value", "target", "label", "y"]
205
+ for c in preferred:
206
+ if c in _d.columns and pd.api.types.is_numeric_dtype(_d[c]):
207
+ return c
208
+ cols = _d.select_dtypes(include=[np.number]).columns.tolist()
209
+ return cols[0] if cols else None
210
+ except Exception:
211
+ return None
212
+
213
+
214
+ def _first_categorical(_d):
215
+ import pandas as pd, numpy as np
216
+ try:
217
+ num = set(_d.select_dtypes(include=[np.number]).columns.tolist())
218
+ cand = [c for c in _d.columns if c not in num and _d[c].nunique(dropna=True) <= 50]
219
+ return cand[0] if cand else None
220
+ except Exception:
221
+ return None
222
+
223
+
224
+ def _safe_plot(func, *args, **kwargs):
225
+ try:
226
+ ax = func(*args, **kwargs)
227
+ if ax is None:
228
+ ax = plt.gca()
229
+ try:
230
+ if hasattr(ax, 'has_data') and not ax.has_data():
231
+ from syntaxmatrix.display import show as _show
232
+ _show('⚠ Empty plot: no data drawn.')
233
+ except Exception:
234
+ pass
235
+ try:
236
+ plt.tight_layout()
237
+ except Exception:
238
+ pass
239
+ return ax
240
+ except Exception as e:
241
+ from syntaxmatrix.display import show as _show
242
+ _show(f'⚠ Plot skipped: {type(e).__name__}: {e}')
243
+ return None
244
+
245
+
246
+ def SB_histplot(*a, **k):
247
+ _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
248
+ _sentinel = (len(a) >= 1 and a[0] is None)
249
+ if (not a or _sentinel) and not k:
250
+ d = _pick_df()
251
+ if d is not None:
252
+ x = _first_numeric(d)
253
+ if x is not None:
254
+ def _draw():
255
+ plt.hist(d[x].dropna())
256
+ ax = plt.gca()
257
+ if not (ax.get_title() or '').strip():
258
+ ax.set_title(f'Distribution of {x}')
259
+ return ax
260
+ return _safe_plot(lambda **kw: _draw())
261
+ if _missing:
262
+ return _safe_plot(lambda **kw: plt.hist([]))
263
+ if _sentinel:
264
+ a = a[1:]
265
+ return _safe_plot(getattr(sns, 'histplot', plt.hist), *a, **k)
266
+
267
+
268
+ def SB_barplot(*a, **k):
269
+ _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
270
+ _sentinel = (len(a) >= 1 and a[0] is None)
271
+ _ax = k.get('ax') or _pick_ax_slot()
272
+ if _ax is not None:
273
+ try:
274
+ plt.sca(_ax)
275
+ except Exception:
276
+ pass
277
+ k.setdefault('ax', _ax)
278
+ if (not a or _sentinel) and not k:
279
+ d = _pick_df()
280
+ if d is not None:
281
+ x = _first_categorical(d)
282
+ y = _first_numeric(d)
283
+ if x and y:
284
+ import pandas as _pd
285
+ g = d.groupby(x)[y].mean().reset_index()
286
+
287
+ def _draw():
288
+ if _missing:
289
+ plt.bar(g[x], g[y])
290
+ else:
291
+ sns.barplot(data=g, x=x, y=y, ax=k.get('ax'))
292
+ ax = plt.gca()
293
+ if not (ax.get_title() or '').strip():
294
+ ax.set_title(f'Mean {y} by {x}')
295
+ return ax
296
+
297
+ return _safe_plot(lambda **kw: _draw())
298
+ if _missing:
299
+ return _safe_plot(lambda **kw: plt.bar([], []))
300
+ if _sentinel:
301
+ a = a[1:]
302
+ return _safe_plot(sns.barplot, *a, **k)
303
+
304
+
305
+ def SB_boxplot(*a, **k):
306
+ _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
307
+ _sentinel = (len(a) >= 1 and a[0] is None)
308
+ _ax = k.get('ax') or _pick_ax_slot()
309
+ if _ax is not None:
310
+ try:
311
+ plt.sca(_ax)
312
+ except Exception:
313
+ pass
314
+ k.setdefault('ax', _ax)
315
+ if (not a or _sentinel) and not k:
316
+ d = _pick_df()
317
+ if d is not None:
318
+ x = _first_categorical(d)
319
+ y = _first_numeric(d)
320
+ if x and y:
321
+ def _draw():
322
+ if _missing:
323
+ plt.boxplot(d[y].dropna())
324
+ else:
325
+ sns.boxplot(data=d, x=x, y=y, ax=k.get('ax'))
326
+ ax = plt.gca()
327
+ if not (ax.get_title() or '').strip():
328
+ ax.set_title(f'Distribution of {y} by {x}')
329
+ return ax
330
+
331
+ return _safe_plot(lambda **kw: _draw())
332
+ if _missing:
333
+ return _safe_plot(lambda **kw: plt.boxplot([]))
334
+ if _sentinel:
335
+ a = a[1:]
336
+ return _safe_plot(sns.boxplot, *a, **k)
337
+
338
+
339
+ def SB_scatterplot(*a, **k):
340
+ _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
341
+ fn = getattr(sns, 'scatterplot', None)
342
+ # If seaborn is unavailable OR the caller passed (data=..., x='col', y='col'),
343
+ # use a robust matplotlib path that looks up data and coerces to numeric.
344
+ if _missing or fn is None:
345
+ data = k.get('data')
346
+ x = k.get('x')
347
+ y = k.get('y')
348
+ if (
349
+ data is not None
350
+ and isinstance(x, str)
351
+ and isinstance(y, str)
352
+ and x in data.columns
353
+ and y in data.columns
354
+ ):
355
+ xs = pd.to_numeric(data[x], errors='coerce')
356
+ ys = pd.to_numeric(data[y], errors='coerce')
357
+ m = xs.notna() & ys.notna()
358
+
359
+ def _draw():
360
+ plt.scatter(xs[m], ys[m])
361
+ ax = plt.gca()
362
+ if not (ax.get_title() or '').strip():
363
+ ax.set_title(f'{y} vs {x}')
364
+ return ax
365
+
366
+ return _safe_plot(lambda **kw: _draw())
367
+ # else: fall back to auto-pick two numeric columns
368
+ d = _pick_df()
369
+ if d is not None:
370
+ num = d.select_dtypes(include=[np.number]).columns.tolist()
371
+ if len(num) >= 2:
372
+ def _draw2():
373
+ plt.scatter(d[num[0]], d[num[1]])
374
+ ax = plt.gca()
375
+ if not (ax.get_title() or '').strip():
376
+ ax.set_title(f'{num[1]} vs {num[0]}')
377
+ return ax
378
+
379
+ return _safe_plot(lambda **kw: _draw2())
380
+ return _safe_plot(lambda **kw: plt.scatter([], []))
381
+ # seaborn path
382
+ return _safe_plot(fn, *a, **k)
383
+
384
+
385
+ def SB_heatmap(*a, **k):
386
+ _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
387
+ data = None
388
+ if a:
389
+ data = a[0]
390
+ elif 'data' in k:
391
+ data = k['data']
392
+ if data is None:
393
+ d = _pick_df()
394
+ try:
395
+ if d is not None:
396
+ import numpy as _np
397
+ data = d.select_dtypes(include=[_np.number]).corr()
398
+ except Exception:
399
+ data = None
400
+ if data is None:
401
+ from syntaxmatrix.display import show as _show
402
+ _show('⚠ Heatmap skipped: no data.')
403
+ return None
404
+ if not _missing and hasattr(sns, 'heatmap'):
405
+ _k = {kk: vv for kk, vv in k.items() if kk != 'data'}
406
+
407
+ def _draw():
408
+ ax = sns.heatmap(data, **_k)
409
+ try:
410
+ ax = ax or plt.gca()
411
+ if not (ax.get_title() or '').strip():
412
+ ax.set_title('Correlation Heatmap')
413
+ except Exception:
414
+ pass
415
+ return ax
416
+
417
+ return _safe_plot(lambda **kw: _draw())
418
+
419
+ def _mat_heat():
420
+ im = plt.imshow(data, aspect='auto')
421
+ try:
422
+ plt.colorbar()
423
+ except Exception:
424
+ pass
425
+ try:
426
+ cols = list(getattr(data, 'columns', []))
427
+ rows = list(getattr(data, 'index', []))
428
+ if cols:
429
+ plt.xticks(range(len(cols)), cols, rotation=90)
430
+ if rows:
431
+ plt.yticks(range(len(rows)), rows)
432
+ except Exception:
433
+ pass
434
+ ax = plt.gca()
435
+ try:
436
+ if not (ax.get_title() or '').strip():
437
+ ax.set_title('Correlation Heatmap')
438
+ except Exception:
439
+ pass
440
+ return ax
441
+
442
+ return _safe_plot(lambda **kw: _mat_heat())
443
+
444
+
445
+ def _safe_concat(objs, **kwargs):
446
+ import pandas as _pd
447
+ if objs is None:
448
+ return _pd.DataFrame()
449
+ if isinstance(objs, (list, tuple)) and len(objs) == 0:
450
+ return _pd.DataFrame()
451
+ try:
452
+ return _pd.concat(objs, **kwargs)
453
+ except Exception as e:
454
+ smx_show(f'⚠ concat skipped: {e}')
455
+ return _pd.DataFrame()
456
+
457
+
458
+ def _SMX_OHE(**k):
459
+ # normalise arg name across sklearn versions
460
+ if "sparse" in k and "sparse_output" not in k:
461
+ k["sparse_output"] = k.pop("sparse")
462
+ k.setdefault("handle_unknown", "ignore")
463
+ k.setdefault("sparse_output", False)
464
+ try:
465
+ if "sparse_output" not in inspect.signature(OneHotEncoder).parameters:
466
+ if "sparse_output" in k:
467
+ k["sparse"] = k.pop("sparse_output")
468
+ return OneHotEncoder(**k)
469
+ except TypeError:
470
+ if "sparse_output" in k:
471
+ k["sparse"] = k.pop("sparse_output")
472
+ return OneHotEncoder(**k)
473
+
474
+
475
+ def _SMX_mm(a, b):
476
+ try:
477
+ return a @ b # normal path
478
+ except Exception:
479
+ try:
480
+ A = _np.asarray(a)
481
+ B = _np.asarray(b)
482
+ # If same 2D shape (e.g. (n,k) & (n,k)), treat as row-wise dot
483
+ if A.ndim == 2 and B.ndim == 2 and A.shape == B.shape:
484
+ return (A * B).sum(axis=1)
485
+ # Otherwise try element-wise product (broadcast if possible)
486
+ return A * B
487
+ except Exception as e:
488
+ smx_show(f'⚠ Matmul relaxed: {type(e).__name__}: {e}')
489
+ return _np.nan
490
+
491
+
492
+ def _SMX_call(fn, *a, **k):
493
+ try:
494
+ return fn(*a, **k)
495
+ except TypeError as e:
496
+ msg = str(e)
497
+ if "unexpected keyword argument 'squared'" in msg:
498
+ k.pop('squared', None)
499
+ return fn(*a, **k)
500
+ raise
501
+
502
+
503
+ def _SMX_rmse(y_true, y_pred):
504
+ try:
505
+ from sklearn.metrics import mean_squared_error as _mse
506
+ try:
507
+ return _mse(y_true, y_pred, squared=False)
508
+ except TypeError:
509
+ return (_mse(y_true, y_pred)) ** 0.5
510
+ except Exception:
511
+ import numpy as _np
512
+ yt = _np.asarray(y_true, dtype=float)
513
+ yp = _np.asarray(y_pred, dtype=float)
514
+ diff = yt - yp
515
+ return float((_np.mean(diff * diff)) ** 0.5)
516
+
517
+
518
+ def _SMX_autocoerce_dates(_df):
519
+ if _df is None or not hasattr(_df, 'columns'):
520
+ return
521
+ for c in list(_df.columns):
522
+ s = _df[c]
523
+ n = str(c).lower()
524
+ if _pd.api.types.is_datetime64_any_dtype(s):
525
+ continue
526
+ if (
527
+ _pd.api.types.is_object_dtype(s)
528
+ or ('date' in n or 'time' in n or 'timestamp' in n or n.endswith('_dt'))
529
+ ):
530
+ try:
531
+ conv = _pd.to_datetime(s, errors='coerce', utc=True).dt.tz_localize(None)
532
+ # accept only if at least 10% (min 3) parse as dates
533
+ if getattr(conv, 'notna', lambda: _pd.Series([]))().sum() >= max(3, int(0.1 * len(_df))):
534
+ _df[c] = conv
535
+ except Exception:
536
+ pass
537
+
538
+
539
+ def _SMX_autocoerce_numeric(_df, cols):
540
+ if _df is None:
541
+ return
542
+ for c in cols:
543
+ if c in getattr(_df, 'columns', []):
544
+ try:
545
+ _df[c] = _pd.to_numeric(_df[c], errors='coerce')
546
+ except Exception:
547
+ pass
548
+
549
+ def show(*args, **kwargs):
550
+ return smx_show(*args, **kwargs)