syntaxmatrix 2.5.5.5__py3-none-any.whl → 2.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syntaxmatrix/__init__.py +3 -2
- syntaxmatrix/agentic/agents.py +1220 -169
- syntaxmatrix/agentic/agents_orchestrer.py +326 -0
- syntaxmatrix/agentic/code_tools_registry.py +27 -32
- syntaxmatrix/auth.py +142 -5
- syntaxmatrix/commentary.py +16 -16
- syntaxmatrix/core.py +192 -84
- syntaxmatrix/db.py +460 -4
- syntaxmatrix/{display.py → display_html.py} +2 -6
- syntaxmatrix/gpt_models_latest.py +1 -1
- syntaxmatrix/media/__init__.py +0 -0
- syntaxmatrix/media/media_pixabay.py +277 -0
- syntaxmatrix/models.py +1 -1
- syntaxmatrix/page_builder_defaults.py +183 -0
- syntaxmatrix/page_builder_generation.py +1122 -0
- syntaxmatrix/page_layout_contract.py +644 -0
- syntaxmatrix/page_patch_publish.py +1471 -0
- syntaxmatrix/preface.py +670 -0
- syntaxmatrix/profiles.py +28 -10
- syntaxmatrix/routes.py +1941 -593
- syntaxmatrix/selftest_page_templates.py +360 -0
- syntaxmatrix/settings/client_items.py +28 -0
- syntaxmatrix/settings/model_map.py +1022 -207
- syntaxmatrix/settings/prompts.py +328 -130
- syntaxmatrix/static/assets/hero-default.svg +22 -0
- syntaxmatrix/static/icons/bot-icon.png +0 -0
- syntaxmatrix/static/icons/favicon.png +0 -0
- syntaxmatrix/static/icons/logo.png +0 -0
- syntaxmatrix/static/icons/logo3.png +0 -0
- syntaxmatrix/templates/admin_branding.html +104 -0
- syntaxmatrix/templates/admin_features.html +63 -0
- syntaxmatrix/templates/admin_secretes.html +108 -0
- syntaxmatrix/templates/change_password.html +124 -0
- syntaxmatrix/templates/dashboard.html +296 -131
- syntaxmatrix/templates/dataset_resize.html +535 -0
- syntaxmatrix/templates/edit_page.html +2535 -0
- syntaxmatrix/utils.py +2728 -2835
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.6.2.dist-info}/METADATA +6 -2
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.6.2.dist-info}/RECORD +42 -25
- syntaxmatrix/generate_page.py +0 -634
- syntaxmatrix/static/icons/hero_bg.jpg +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.6.2.dist-info}/WHEEL +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.6.2.dist-info}/licenses/LICENSE.txt +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.6.2.dist-info}/top_level.txt +0 -0
syntaxmatrix/preface.py
ADDED
|
@@ -0,0 +1,670 @@
|
|
|
1
|
+
# === SMX Auto-Hardening Preface (do not edit) ===
|
|
2
|
+
import warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt
|
|
3
|
+
warnings.filterwarnings('ignore')
|
|
4
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
5
|
+
import inspect
|
|
6
|
+
import pandas as _pd
|
|
7
|
+
import numpy as _np # noqa: F811
|
|
8
|
+
|
|
9
|
+
from sklearn.metrics import (
|
|
10
|
+
r2_score,
|
|
11
|
+
mean_absolute_error,
|
|
12
|
+
accuracy_score,
|
|
13
|
+
precision_score,
|
|
14
|
+
recall_score,
|
|
15
|
+
f1_score,
|
|
16
|
+
roc_auc_score,
|
|
17
|
+
confusion_matrix,
|
|
18
|
+
ConfusionMatrixDisplay,
|
|
19
|
+
classification_report,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
# plotting entrypoints
|
|
24
|
+
"SB_histplot",
|
|
25
|
+
"SB_barplot",
|
|
26
|
+
"SB_boxplot",
|
|
27
|
+
"SB_scatterplot",
|
|
28
|
+
"SB_heatmap",
|
|
29
|
+
"viz_stacked_bar",
|
|
30
|
+
|
|
31
|
+
# core helpers (all underscore-prefixed functions)
|
|
32
|
+
"_SMX_caption_from_ctx",
|
|
33
|
+
"_SMX_axes_have_titles",
|
|
34
|
+
"_SMX_export_png",
|
|
35
|
+
"_pick_df",
|
|
36
|
+
"_pick_ax_slot",
|
|
37
|
+
"_first_numeric",
|
|
38
|
+
"_first_categorical",
|
|
39
|
+
"_safe_plot",
|
|
40
|
+
"_safe_concat",
|
|
41
|
+
"_SMX_OHE",
|
|
42
|
+
"_SMX_mm",
|
|
43
|
+
"_SMX_call",
|
|
44
|
+
"_SMX_rmse",
|
|
45
|
+
"_SMX_autocoerce_dates",
|
|
46
|
+
"_SMX_autocoerce_numeric",
|
|
47
|
+
|
|
48
|
+
# display helper
|
|
49
|
+
"smx_show",
|
|
50
|
+
"show",
|
|
51
|
+
|
|
52
|
+
# metics
|
|
53
|
+
"r2_score",
|
|
54
|
+
"mean_absolute_error",
|
|
55
|
+
"accuracy_score",
|
|
56
|
+
"precision_score",
|
|
57
|
+
"recall_score",
|
|
58
|
+
"f1_score",
|
|
59
|
+
"roc_auc_score",
|
|
60
|
+
"confusion_matrix",
|
|
61
|
+
"ConfusionMatrixDisplay",
|
|
62
|
+
"classification_report",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
import seaborn as sns
|
|
68
|
+
except Exception:
|
|
69
|
+
class _Dummy:
|
|
70
|
+
def __getattr__(self, name):
|
|
71
|
+
def _f(*a, **k):
|
|
72
|
+
from syntaxmatrix.display_html import show
|
|
73
|
+
show('⚠ seaborn not available; plot skipped.')
|
|
74
|
+
return _f
|
|
75
|
+
sns = _Dummy()
|
|
76
|
+
|
|
77
|
+
from syntaxmatrix.display_html import show as _SMX_base_show
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
boxplot = barplot = histplot = distplot = lineplot = countplot = heatmap = pairplot = None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def smx_show(obj, title=None):
|
|
84
|
+
try:
|
|
85
|
+
import pandas as pd, numbers
|
|
86
|
+
cap = (title or _SMX_caption_from_ctx())
|
|
87
|
+
# 1) DataFrame → Styler with caption
|
|
88
|
+
if isinstance(obj, pd.DataFrame):
|
|
89
|
+
try:
|
|
90
|
+
return _SMX_base_show(obj.style.set_caption(cap))
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
# 2) dict of scalars → DataFrame with caption
|
|
94
|
+
if isinstance(obj, dict) and all(isinstance(v, numbers.Number) for v in obj.values()):
|
|
95
|
+
df_ = pd.DataFrame({'metric': list(obj.keys()), 'value': list(obj.values())})
|
|
96
|
+
try:
|
|
97
|
+
return _SMX_base_show(df_.style.set_caption(cap))
|
|
98
|
+
except Exception:
|
|
99
|
+
return _SMX_base_show(df_)
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
return _SMX_base_show(obj)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _SMX_caption_from_ctx():
|
|
106
|
+
"""
|
|
107
|
+
Look up refined_question / askai_question in caller frames,
|
|
108
|
+
falling back to 'Table' if not found.
|
|
109
|
+
"""
|
|
110
|
+
import inspect
|
|
111
|
+
|
|
112
|
+
frame = inspect.currentframe()
|
|
113
|
+
while frame is not None:
|
|
114
|
+
g = frame.f_globals
|
|
115
|
+
t = g.get("refined_question") or g.get("askai_question")
|
|
116
|
+
if t:
|
|
117
|
+
return str(t).strip().splitlines()[0][:120]
|
|
118
|
+
frame = frame.f_back
|
|
119
|
+
|
|
120
|
+
return "Table"
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _SMX_axes_have_titles(fig=None):
|
|
124
|
+
import matplotlib.pyplot as _plt
|
|
125
|
+
fig = fig or _plt.gcf()
|
|
126
|
+
try:
|
|
127
|
+
for _ax in fig.get_axes():
|
|
128
|
+
if (_ax.get_title() or '').strip():
|
|
129
|
+
return True
|
|
130
|
+
except Exception:
|
|
131
|
+
pass
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _SMX_export_png():
|
|
136
|
+
import io, base64
|
|
137
|
+
fig = plt.gcf()
|
|
138
|
+
|
|
139
|
+
# If the figure has no real data, skip exporting to avoid blank images.
|
|
140
|
+
try:
|
|
141
|
+
axes = fig.get_axes()
|
|
142
|
+
has_data = any(
|
|
143
|
+
getattr(ax, "has_data", lambda: False)()
|
|
144
|
+
for ax in axes
|
|
145
|
+
)
|
|
146
|
+
except Exception:
|
|
147
|
+
has_data = True # fail open: better a plot than nothing if check breaks
|
|
148
|
+
|
|
149
|
+
if not has_data:
|
|
150
|
+
try:
|
|
151
|
+
from syntaxmatrix.display_html import show as _show
|
|
152
|
+
_show("⚠ Plot skipped: figure has no data to export.")
|
|
153
|
+
except Exception:
|
|
154
|
+
pass
|
|
155
|
+
plt.close(fig)
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
if not _SMX_axes_have_titles(fig):
|
|
160
|
+
fig.suptitle(_SMX_caption_from_ctx(), fontsize=10)
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
buf = io.BytesIO()
|
|
165
|
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
|
166
|
+
buf.seek(0)
|
|
167
|
+
from IPython.display import display, HTML
|
|
168
|
+
_img = base64.b64encode(buf.read()).decode('ascii')
|
|
169
|
+
display(
|
|
170
|
+
HTML(
|
|
171
|
+
f"<img src='data:image/png;base64,{_img}' "
|
|
172
|
+
"style='max-width:100%;height:auto;border:1px solid #ccc;border-radius:4px;'/>"
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
plt.close(fig)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _pick_df():
|
|
179
|
+
"""
|
|
180
|
+
Try to find `df` in the caller's context, not just this module.
|
|
181
|
+
This restores the behaviour we had when the preface lived inline.
|
|
182
|
+
"""
|
|
183
|
+
import inspect
|
|
184
|
+
|
|
185
|
+
# 1) Check our own module globals first (just in case).
|
|
186
|
+
g = globals()
|
|
187
|
+
if "df" in g:
|
|
188
|
+
return g["df"]
|
|
189
|
+
|
|
190
|
+
# 2) Walk up the call stack looking for `df`
|
|
191
|
+
frame = inspect.currentframe().f_back
|
|
192
|
+
while frame is not None:
|
|
193
|
+
# locals of the frame
|
|
194
|
+
if "df" in frame.f_locals:
|
|
195
|
+
return frame.f_locals["df"]
|
|
196
|
+
# globals of the frame (e.g. the exec cell)
|
|
197
|
+
if "df" in frame.f_globals:
|
|
198
|
+
return frame.f_globals["df"]
|
|
199
|
+
frame = frame.f_back
|
|
200
|
+
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _pick_ax_slot():
|
|
205
|
+
ax = None
|
|
206
|
+
try:
|
|
207
|
+
_axes = globals().get('axes', None)
|
|
208
|
+
import numpy as _np
|
|
209
|
+
if _axes is not None:
|
|
210
|
+
arr = _np.ravel(_axes)
|
|
211
|
+
for _a in arr:
|
|
212
|
+
try:
|
|
213
|
+
if hasattr(_a, 'has_data') and not _a.has_data():
|
|
214
|
+
ax = _a
|
|
215
|
+
break
|
|
216
|
+
except Exception:
|
|
217
|
+
continue
|
|
218
|
+
except Exception:
|
|
219
|
+
ax = None
|
|
220
|
+
return ax
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _first_numeric(_d):
|
|
224
|
+
import numpy as np, pandas as pd
|
|
225
|
+
try:
|
|
226
|
+
preferred = ["median_house_value", "price", "value", "target", "label", "y"]
|
|
227
|
+
for c in preferred:
|
|
228
|
+
if c in _d.columns and pd.api.types.is_numeric_dtype(_d[c]):
|
|
229
|
+
return c
|
|
230
|
+
cols = _d.select_dtypes(include=[np.number]).columns.tolist()
|
|
231
|
+
return cols[0] if cols else None
|
|
232
|
+
except Exception:
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _first_categorical(_d):
|
|
237
|
+
import pandas as pd, numpy as np
|
|
238
|
+
try:
|
|
239
|
+
num = set(_d.select_dtypes(include=[np.number]).columns.tolist())
|
|
240
|
+
cand = [c for c in _d.columns if c not in num and _d[c].nunique(dropna=True) <= 50]
|
|
241
|
+
return cand[0] if cand else None
|
|
242
|
+
except Exception:
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _safe_plot(func, *args, **kwargs):
|
|
247
|
+
try:
|
|
248
|
+
ax = func(*args, **kwargs)
|
|
249
|
+
if ax is None:
|
|
250
|
+
ax = plt.gca()
|
|
251
|
+
try:
|
|
252
|
+
if hasattr(ax, 'has_data') and not ax.has_data():
|
|
253
|
+
from syntaxmatrix.display_html import show as _show
|
|
254
|
+
_show('⚠ Empty plot: no data drawn.')
|
|
255
|
+
except Exception:
|
|
256
|
+
pass
|
|
257
|
+
try:
|
|
258
|
+
plt.tight_layout()
|
|
259
|
+
except Exception:
|
|
260
|
+
pass
|
|
261
|
+
return ax
|
|
262
|
+
except Exception as e:
|
|
263
|
+
from syntaxmatrix.display_html import show as _show
|
|
264
|
+
_show(f'⚠ Plot skipped: {type(e).__name__}: {e}')
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def SB_histplot(*a, **k):
|
|
269
|
+
_missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
|
|
270
|
+
_sentinel = (len(a) >= 1 and a[0] is None)
|
|
271
|
+
if (not a or _sentinel) and not k:
|
|
272
|
+
d = _pick_df()
|
|
273
|
+
if d is not None:
|
|
274
|
+
x = _first_numeric(d)
|
|
275
|
+
if x is not None:
|
|
276
|
+
def _draw():
|
|
277
|
+
plt.hist(d[x].dropna())
|
|
278
|
+
ax = plt.gca()
|
|
279
|
+
if not (ax.get_title() or '').strip():
|
|
280
|
+
ax.set_title(f'Distribution of {x}')
|
|
281
|
+
return ax
|
|
282
|
+
return _safe_plot(lambda **kw: _draw())
|
|
283
|
+
if _missing:
|
|
284
|
+
return _safe_plot(lambda **kw: plt.hist([]))
|
|
285
|
+
if _sentinel:
|
|
286
|
+
a = a[1:]
|
|
287
|
+
return _safe_plot(getattr(sns, 'histplot', plt.hist), *a, **k)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def SB_barplot(*a, **k):
|
|
291
|
+
_missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
|
|
292
|
+
_sentinel = (len(a) >= 1 and a[0] is None)
|
|
293
|
+
_ax = k.get('ax') or _pick_ax_slot()
|
|
294
|
+
if _ax is not None:
|
|
295
|
+
try:
|
|
296
|
+
plt.sca(_ax)
|
|
297
|
+
except Exception:
|
|
298
|
+
pass
|
|
299
|
+
k.setdefault('ax', _ax)
|
|
300
|
+
if (not a or _sentinel) and not k:
|
|
301
|
+
d = _pick_df()
|
|
302
|
+
if d is not None:
|
|
303
|
+
x = _first_categorical(d)
|
|
304
|
+
y = _first_numeric(d)
|
|
305
|
+
if x and y:
|
|
306
|
+
import pandas as _pd
|
|
307
|
+
g = d.groupby(x)[y].mean().reset_index()
|
|
308
|
+
|
|
309
|
+
def _draw():
|
|
310
|
+
if _missing:
|
|
311
|
+
plt.bar(g[x], g[y])
|
|
312
|
+
else:
|
|
313
|
+
sns.barplot(data=g, x=x, y=y, ax=k.get('ax'))
|
|
314
|
+
ax = plt.gca()
|
|
315
|
+
if not (ax.get_title() or '').strip():
|
|
316
|
+
ax.set_title(f'Mean {y} by {x}')
|
|
317
|
+
return ax
|
|
318
|
+
|
|
319
|
+
return _safe_plot(lambda **kw: _draw())
|
|
320
|
+
if _missing:
|
|
321
|
+
return _safe_plot(lambda **kw: plt.bar([], []))
|
|
322
|
+
if _sentinel:
|
|
323
|
+
a = a[1:]
|
|
324
|
+
return _safe_plot(sns.barplot, *a, **k)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def SB_boxplot(*a, **k):
|
|
328
|
+
_missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
|
|
329
|
+
_sentinel = (len(a) >= 1 and a[0] is None)
|
|
330
|
+
_ax = k.get('ax') or _pick_ax_slot()
|
|
331
|
+
if _ax is not None:
|
|
332
|
+
try:
|
|
333
|
+
plt.sca(_ax)
|
|
334
|
+
except Exception:
|
|
335
|
+
pass
|
|
336
|
+
k.setdefault('ax', _ax)
|
|
337
|
+
if (not a or _sentinel) and not k:
|
|
338
|
+
d = _pick_df()
|
|
339
|
+
if d is not None:
|
|
340
|
+
x = _first_categorical(d)
|
|
341
|
+
y = _first_numeric(d)
|
|
342
|
+
if x and y:
|
|
343
|
+
def _draw():
|
|
344
|
+
if _missing:
|
|
345
|
+
plt.boxplot(d[y].dropna())
|
|
346
|
+
else:
|
|
347
|
+
sns.boxplot(data=d, x=x, y=y, ax=k.get('ax'))
|
|
348
|
+
ax = plt.gca()
|
|
349
|
+
if not (ax.get_title() or '').strip():
|
|
350
|
+
ax.set_title(f'Distribution of {y} by {x}')
|
|
351
|
+
return ax
|
|
352
|
+
|
|
353
|
+
return _safe_plot(lambda **kw: _draw())
|
|
354
|
+
if _missing:
|
|
355
|
+
return _safe_plot(lambda **kw: plt.boxplot([]))
|
|
356
|
+
if _sentinel:
|
|
357
|
+
a = a[1:]
|
|
358
|
+
return _safe_plot(sns.boxplot, *a, **k)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def SB_scatterplot(*a, **k):
|
|
362
|
+
_missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
|
|
363
|
+
fn = getattr(sns, 'scatterplot', None)
|
|
364
|
+
# If seaborn is unavailable OR the caller passed (data=..., x='col', y='col'),
|
|
365
|
+
# use a robust matplotlib path that looks up data and coerces to numeric.
|
|
366
|
+
if _missing or fn is None:
|
|
367
|
+
data = k.get('data')
|
|
368
|
+
x = k.get('x')
|
|
369
|
+
y = k.get('y')
|
|
370
|
+
if (
|
|
371
|
+
data is not None
|
|
372
|
+
and isinstance(x, str)
|
|
373
|
+
and isinstance(y, str)
|
|
374
|
+
and x in data.columns
|
|
375
|
+
and y in data.columns
|
|
376
|
+
):
|
|
377
|
+
xs = pd.to_numeric(data[x], errors='coerce')
|
|
378
|
+
ys = pd.to_numeric(data[y], errors='coerce')
|
|
379
|
+
m = xs.notna() & ys.notna()
|
|
380
|
+
|
|
381
|
+
def _draw():
|
|
382
|
+
plt.scatter(xs[m], ys[m])
|
|
383
|
+
ax = plt.gca()
|
|
384
|
+
if not (ax.get_title() or '').strip():
|
|
385
|
+
ax.set_title(f'{y} vs {x}')
|
|
386
|
+
return ax
|
|
387
|
+
|
|
388
|
+
return _safe_plot(lambda **kw: _draw())
|
|
389
|
+
# else: fall back to auto-pick two numeric columns
|
|
390
|
+
d = _pick_df()
|
|
391
|
+
if d is not None:
|
|
392
|
+
num = d.select_dtypes(include=[np.number]).columns.tolist()
|
|
393
|
+
if len(num) >= 2:
|
|
394
|
+
def _draw2():
|
|
395
|
+
plt.scatter(d[num[0]], d[num[1]])
|
|
396
|
+
ax = plt.gca()
|
|
397
|
+
if not (ax.get_title() or '').strip():
|
|
398
|
+
ax.set_title(f'{num[1]} vs {num[0]}')
|
|
399
|
+
return ax
|
|
400
|
+
|
|
401
|
+
return _safe_plot(lambda **kw: _draw2())
|
|
402
|
+
return _safe_plot(lambda **kw: plt.scatter([], []))
|
|
403
|
+
# seaborn path
|
|
404
|
+
return _safe_plot(fn, *a, **k)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def SB_heatmap(*a, **k):
|
|
408
|
+
_missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')
|
|
409
|
+
data = None
|
|
410
|
+
if a:
|
|
411
|
+
data = a[0]
|
|
412
|
+
elif 'data' in k:
|
|
413
|
+
data = k['data']
|
|
414
|
+
if data is None:
|
|
415
|
+
d = _pick_df()
|
|
416
|
+
try:
|
|
417
|
+
if d is not None:
|
|
418
|
+
import numpy as _np
|
|
419
|
+
data = d.select_dtypes(include=[_np.number]).corr()
|
|
420
|
+
except Exception:
|
|
421
|
+
data = None
|
|
422
|
+
if data is None:
|
|
423
|
+
from syntaxmatrix.display_html import show as _show
|
|
424
|
+
_show('⚠ Heatmap skipped: no data.')
|
|
425
|
+
return None
|
|
426
|
+
if not _missing and hasattr(sns, 'heatmap'):
|
|
427
|
+
_k = {kk: vv for kk, vv in k.items() if kk != 'data'}
|
|
428
|
+
|
|
429
|
+
def _draw():
|
|
430
|
+
ax = sns.heatmap(data, **_k)
|
|
431
|
+
try:
|
|
432
|
+
ax = ax or plt.gca()
|
|
433
|
+
if not (ax.get_title() or '').strip():
|
|
434
|
+
ax.set_title('Correlation Heatmap')
|
|
435
|
+
except Exception:
|
|
436
|
+
pass
|
|
437
|
+
return ax
|
|
438
|
+
|
|
439
|
+
return _safe_plot(lambda **kw: _draw())
|
|
440
|
+
|
|
441
|
+
def _mat_heat():
|
|
442
|
+
im = plt.imshow(data, aspect='auto')
|
|
443
|
+
try:
|
|
444
|
+
plt.colorbar()
|
|
445
|
+
except Exception:
|
|
446
|
+
pass
|
|
447
|
+
try:
|
|
448
|
+
cols = list(getattr(data, 'columns', []))
|
|
449
|
+
rows = list(getattr(data, 'index', []))
|
|
450
|
+
if cols:
|
|
451
|
+
plt.xticks(range(len(cols)), cols, rotation=90)
|
|
452
|
+
if rows:
|
|
453
|
+
plt.yticks(range(len(rows)), rows)
|
|
454
|
+
except Exception:
|
|
455
|
+
pass
|
|
456
|
+
ax = plt.gca()
|
|
457
|
+
try:
|
|
458
|
+
if not (ax.get_title() or '').strip():
|
|
459
|
+
ax.set_title('Correlation Heatmap')
|
|
460
|
+
except Exception:
|
|
461
|
+
pass
|
|
462
|
+
return ax
|
|
463
|
+
|
|
464
|
+
return _safe_plot(lambda **kw: _mat_heat())
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def viz_stacked_bar(df=None, x=None, hue=None, normalise=True, top_k=8):
|
|
468
|
+
"""
|
|
469
|
+
Stacked (optionally percentage-stacked) bar chart for two categorical columns.
|
|
470
|
+
|
|
471
|
+
- df: optional dataframe. If None, falls back to the active `df` via _pick_df().
|
|
472
|
+
- x: base categorical axis (e.g. 'state').
|
|
473
|
+
- hue: second categorical (e.g. 'body').
|
|
474
|
+
- normalise: if True, show percentages by x; else raw counts.
|
|
475
|
+
"""
|
|
476
|
+
from syntaxmatrix.display_html import show as _show
|
|
477
|
+
|
|
478
|
+
d = df if df is not None else _pick_df()
|
|
479
|
+
if d is None:
|
|
480
|
+
_show("⚠ Stacked bar skipped: no dataframe.")
|
|
481
|
+
return None
|
|
482
|
+
|
|
483
|
+
# Choose categorical candidates with reasonable cardinality
|
|
484
|
+
cat_cols = [
|
|
485
|
+
c for c in d.columns
|
|
486
|
+
if (d[c].dtype == "object" or str(d[c].dtype).startswith("category"))
|
|
487
|
+
and d[c].nunique(dropna=True) > 1
|
|
488
|
+
and d[c].nunique(dropna=True) <= 30
|
|
489
|
+
]
|
|
490
|
+
|
|
491
|
+
if x is None or x not in d.columns:
|
|
492
|
+
x = cat_cols[0] if cat_cols else None
|
|
493
|
+
if hue is None or hue not in d.columns:
|
|
494
|
+
remaining = [c for c in cat_cols if c != x]
|
|
495
|
+
hue = remaining[0] if remaining else None
|
|
496
|
+
|
|
497
|
+
if x is None or hue is None:
|
|
498
|
+
_show("⚠ Stacked bar skipped: need two categorical columns.")
|
|
499
|
+
return None
|
|
500
|
+
|
|
501
|
+
work = d[[x, hue]].dropna()
|
|
502
|
+
if work.empty:
|
|
503
|
+
_show("⚠ Stacked bar skipped: no data.")
|
|
504
|
+
return None
|
|
505
|
+
|
|
506
|
+
def _draw():
|
|
507
|
+
_work = work.copy()
|
|
508
|
+
|
|
509
|
+
# Compress minor hue categories into "Other" for readability
|
|
510
|
+
keep_h = _work[hue].astype(str).value_counts().index[:top_k]
|
|
511
|
+
_work[hue] = _work[hue].astype(str).where(
|
|
512
|
+
_work[hue].astype(str).isin(keep_h),
|
|
513
|
+
other="Other",
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
tab = pd.crosstab(_work[x].astype(str), _work[hue].astype(str))
|
|
517
|
+
|
|
518
|
+
try:
|
|
519
|
+
_show(tab)
|
|
520
|
+
except Exception:
|
|
521
|
+
pass
|
|
522
|
+
|
|
523
|
+
plot_tab = tab.copy()
|
|
524
|
+
ylabel = "Count"
|
|
525
|
+
if normalise:
|
|
526
|
+
plot_tab = plot_tab.div(plot_tab.sum(axis=1), axis=0) * 100
|
|
527
|
+
ylabel = "Percentage"
|
|
528
|
+
|
|
529
|
+
ax = plot_tab.plot(kind="bar", stacked=True, figsize=(8, 4))
|
|
530
|
+
title = f"{hue} composition by {x}"
|
|
531
|
+
if normalise:
|
|
532
|
+
title += " (%)"
|
|
533
|
+
|
|
534
|
+
if not (ax.get_title() or "").strip():
|
|
535
|
+
ax.set_title(title)
|
|
536
|
+
ax.set_xlabel(str(x))
|
|
537
|
+
ax.set_ylabel(ylabel)
|
|
538
|
+
plt.xticks(rotation=45, ha="right")
|
|
539
|
+
plt.tight_layout()
|
|
540
|
+
return ax
|
|
541
|
+
|
|
542
|
+
# NOTE: _safe_plot handles empty plots and layout, but does NOT export PNGs.
|
|
543
|
+
return _safe_plot(lambda **kw: _draw())
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _safe_concat(objs, **kwargs):
|
|
547
|
+
import pandas as _pd
|
|
548
|
+
if objs is None:
|
|
549
|
+
return _pd.DataFrame()
|
|
550
|
+
if isinstance(objs, (list, tuple)) and len(objs) == 0:
|
|
551
|
+
return _pd.DataFrame()
|
|
552
|
+
try:
|
|
553
|
+
return _pd.concat(objs, **kwargs)
|
|
554
|
+
except Exception as e:
|
|
555
|
+
smx_show(f'⚠ concat skipped: {e}')
|
|
556
|
+
return _pd.DataFrame()
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def _SMX_OHE(**k):
|
|
560
|
+
# normalise arg name across sklearn versions
|
|
561
|
+
if "sparse" in k and "sparse_output" not in k:
|
|
562
|
+
k["sparse_output"] = k.pop("sparse")
|
|
563
|
+
k.setdefault("handle_unknown", "ignore")
|
|
564
|
+
k.setdefault("sparse_output", False)
|
|
565
|
+
try:
|
|
566
|
+
if "sparse_output" not in inspect.signature(OneHotEncoder).parameters:
|
|
567
|
+
if "sparse_output" in k:
|
|
568
|
+
k["sparse"] = k.pop("sparse_output")
|
|
569
|
+
return OneHotEncoder(**k)
|
|
570
|
+
except TypeError:
|
|
571
|
+
if "sparse_output" in k:
|
|
572
|
+
k["sparse"] = k.pop("sparse_output")
|
|
573
|
+
return OneHotEncoder(**k)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _SMX_mm(a, b):
|
|
577
|
+
try:
|
|
578
|
+
return a @ b # normal path
|
|
579
|
+
except Exception:
|
|
580
|
+
try:
|
|
581
|
+
A = _np.asarray(a)
|
|
582
|
+
B = _np.asarray(b)
|
|
583
|
+
# If same 2D shape (e.g. (n,k) & (n,k)), treat as row-wise dot
|
|
584
|
+
if A.ndim == 2 and B.ndim == 2 and A.shape == B.shape:
|
|
585
|
+
return (A * B).sum(axis=1)
|
|
586
|
+
# Otherwise try element-wise product (broadcast if possible)
|
|
587
|
+
return A * B
|
|
588
|
+
except Exception as e:
|
|
589
|
+
smx_show(f'⚠ Matmul relaxed: {type(e).__name__}: {e}')
|
|
590
|
+
return _np.nan
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def _SMX_call(fn, *a, **k):
|
|
594
|
+
"""Safe metric invocation that can handle older sklearn signatures.
|
|
595
|
+
|
|
596
|
+
- If the metric accepts the provided keywords, it just runs.
|
|
597
|
+
- If we hit "unexpected keyword argument 'squared'", we drop that kw
|
|
598
|
+
and retry. When the caller asked for squared=False with
|
|
599
|
+
mean_squared_error, we emulate RMSE by taking the square root of
|
|
600
|
+
the returned MSE.
|
|
601
|
+
"""
|
|
602
|
+
squared_flag = k.get("squared", None)
|
|
603
|
+
try:
|
|
604
|
+
return fn(*a, **k)
|
|
605
|
+
except TypeError as e:
|
|
606
|
+
msg = str(e)
|
|
607
|
+
if "unexpected keyword argument 'squared'" in msg:
|
|
608
|
+
# remove unsupported kw and retry
|
|
609
|
+
k.pop("squared", None)
|
|
610
|
+
result = fn(*a, **k)
|
|
611
|
+
# emulate squared=False for old sklearn.mean_squared_error
|
|
612
|
+
try:
|
|
613
|
+
if squared_flag is False and getattr(fn, "__name__", "") == "mean_squared_error":
|
|
614
|
+
import numpy as _np
|
|
615
|
+
return float(_np.asarray(result, dtype=float) ** 0.5)
|
|
616
|
+
except Exception:
|
|
617
|
+
# if anything goes wrong, just fall back to the raw result
|
|
618
|
+
pass
|
|
619
|
+
return result
|
|
620
|
+
raise
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def _SMX_rmse(y_true, y_pred):
|
|
624
|
+
try:
|
|
625
|
+
from sklearn.metrics import mean_squared_error as _mse
|
|
626
|
+
try:
|
|
627
|
+
return _mse(y_true, y_pred, squared=False)
|
|
628
|
+
except TypeError:
|
|
629
|
+
return (_mse(y_true, y_pred)) ** 0.5
|
|
630
|
+
except Exception:
|
|
631
|
+
import numpy as _np
|
|
632
|
+
yt = _np.asarray(y_true, dtype=float)
|
|
633
|
+
yp = _np.asarray(y_pred, dtype=float)
|
|
634
|
+
diff = yt - yp
|
|
635
|
+
return float((_np.mean(diff * diff)) ** 0.5)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def _SMX_autocoerce_dates(_df):
|
|
639
|
+
if _df is None or not hasattr(_df, 'columns'):
|
|
640
|
+
return
|
|
641
|
+
for c in list(_df.columns):
|
|
642
|
+
s = _df[c]
|
|
643
|
+
n = str(c).lower()
|
|
644
|
+
if _pd.api.types.is_datetime64_any_dtype(s):
|
|
645
|
+
continue
|
|
646
|
+
if (
|
|
647
|
+
_pd.api.types.is_object_dtype(s)
|
|
648
|
+
or ('date' in n or 'time' in n or 'timestamp' in n or n.endswith('_dt'))
|
|
649
|
+
):
|
|
650
|
+
try:
|
|
651
|
+
conv = _pd.to_datetime(s, errors='coerce', utc=True).dt.tz_localize(None)
|
|
652
|
+
# accept only if at least 10% (min 3) parse as dates
|
|
653
|
+
if getattr(conv, 'notna', lambda: _pd.Series([]))().sum() >= max(3, int(0.1 * len(_df))):
|
|
654
|
+
_df[c] = conv
|
|
655
|
+
except Exception:
|
|
656
|
+
pass
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _SMX_autocoerce_numeric(_df, cols):
|
|
660
|
+
if _df is None:
|
|
661
|
+
return
|
|
662
|
+
for c in cols:
|
|
663
|
+
if c in getattr(_df, 'columns', []):
|
|
664
|
+
try:
|
|
665
|
+
_df[c] = _pd.to_numeric(_df[c], errors='coerce')
|
|
666
|
+
except Exception:
|
|
667
|
+
pass
|
|
668
|
+
|
|
669
|
+
def show(*args, **kwargs):
|
|
670
|
+
return smx_show(*args, **kwargs)
|