syntaxmatrix 2.5.5.5__py3-none-any.whl → 2.5.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syntaxmatrix/__init__.py +3 -2
- syntaxmatrix/agentic/agents.py +14 -23
- syntaxmatrix/auth.py +142 -5
- syntaxmatrix/core.py +34 -15
- syntaxmatrix/generate_page.py +17 -7
- syntaxmatrix/preface.py +550 -0
- syntaxmatrix/routes.py +238 -177
- syntaxmatrix/templates/change_password.html +124 -0
- syntaxmatrix/templates/dashboard.html +12 -10
- syntaxmatrix/utils.py +363 -481
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/METADATA +1 -1
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/RECORD +15 -13
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/WHEEL +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/licenses/LICENSE.txt +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/top_level.txt +0 -0
syntaxmatrix/utils.py
CHANGED
|
@@ -93,6 +93,55 @@ def classify_ml_job(prompt: str) -> str:
|
|
|
93
93
|
return "eda"
|
|
94
94
|
|
|
95
95
|
|
|
96
|
+
def _indent(code: str, spaces: int = 4) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Indent a block of code by `spaces` spaces, line by line.
|
|
99
|
+
Blank lines are preserved unchanged.
|
|
100
|
+
"""
|
|
101
|
+
pad = " " * spaces
|
|
102
|
+
lines = code.splitlines()
|
|
103
|
+
return "\n".join((pad + line) if line.strip() else line for line in lines)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def wrap_llm_code_safe(body: str) -> str:
|
|
107
|
+
"""
|
|
108
|
+
Wrap arbitrary LLM code so that:
|
|
109
|
+
- Any exception is caught and shown.
|
|
110
|
+
- A minimal, useful EDA fallback still runs so the user sees *something*.
|
|
111
|
+
This happens once in the framework; you never touch the individual cells.
|
|
112
|
+
"""
|
|
113
|
+
return textwrap.dedent(
|
|
114
|
+
"try:\n"
|
|
115
|
+
+ _indent(body)
|
|
116
|
+
+ "\n"
|
|
117
|
+
"except Exception as e:\n"
|
|
118
|
+
" from syntaxmatrix.display import show\n"
|
|
119
|
+
" msg = f\"⚠️ Skipped LLM block due to: {type(e).__name__}: {e}\"\n"
|
|
120
|
+
" show(msg)\n"
|
|
121
|
+
" # --- automatic EDA fallback ---\n"
|
|
122
|
+
" try:\n"
|
|
123
|
+
" df_local = globals().get('df')\n"
|
|
124
|
+
" if df_local is not None:\n"
|
|
125
|
+
" import pandas as pd\n"
|
|
126
|
+
" from syntaxmatrix.preface import SB_histplot, _SMX_export_png\n"
|
|
127
|
+
" num_cols = df_local.select_dtypes(include=['number', 'bool']).columns.tolist()\n"
|
|
128
|
+
" cat_cols = [c for c in df_local.columns if c not in num_cols]\n"
|
|
129
|
+
" info = {\n"
|
|
130
|
+
" 'rows': len(df_local),\n"
|
|
131
|
+
" 'cols': len(df_local.columns),\n"
|
|
132
|
+
" 'numeric_cols': len(num_cols),\n"
|
|
133
|
+
" 'categorical_cols': len(cat_cols),\n"
|
|
134
|
+
" }\n"
|
|
135
|
+
" show(df_local.head())\n"
|
|
136
|
+
" show(info)\n"
|
|
137
|
+
" if num_cols:\n"
|
|
138
|
+
" SB_histplot()\n"
|
|
139
|
+
" _SMX_export_png()\n"
|
|
140
|
+
" except Exception as _f:\n"
|
|
141
|
+
" show(f\"⚠️ Fallback EDA failed: {type(_f).__name__}: {_f}\")\n"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
96
145
|
def harden_ai_code(code: str) -> str:
|
|
97
146
|
"""
|
|
98
147
|
Make any AI-generated cell resilient:
|
|
@@ -106,40 +155,54 @@ def harden_ai_code(code: str) -> str:
|
|
|
106
155
|
# Remove any LLM-added try/except blocks (hardener adds its own)
|
|
107
156
|
import re
|
|
108
157
|
|
|
109
|
-
def strip_placeholders(code: str) -> str:
|
|
110
|
-
code = re.sub(r"\bshow\(\s*\.\.\.\s*\)",
|
|
111
|
-
"show('⚠ Block skipped due to an error.')",
|
|
112
|
-
code)
|
|
113
|
-
code = re.sub(r"\breturn\s+\.\.\.", "return None", code)
|
|
114
|
-
return code
|
|
115
|
-
|
|
116
|
-
def _indent(code: str, spaces: int = 4) -> str:
|
|
117
|
-
pad = " " * spaces
|
|
118
|
-
return "\n".join(pad + line for line in code.splitlines())
|
|
119
|
-
|
|
120
|
-
def _SMX_OHE(**k):
|
|
121
|
-
# normalise arg name across sklearn versions
|
|
122
|
-
if "sparse" in k and "sparse_output" not in k:
|
|
123
|
-
k["sparse_output"] = k.pop("sparse")
|
|
124
|
-
# default behaviour we want
|
|
125
|
-
k.setdefault("handle_unknown", "ignore")
|
|
126
|
-
k.setdefault("sparse_output", False)
|
|
127
|
-
try:
|
|
128
|
-
# if running on old sklearn without sparse_output, translate back
|
|
129
|
-
if "sparse_output" not in inspect.signature(OneHotEncoder).parameters:
|
|
130
|
-
if "sparse_output" in k:
|
|
131
|
-
k["sparse"] = k.pop("sparse_output")
|
|
132
|
-
return OneHotEncoder(**k)
|
|
133
|
-
except TypeError:
|
|
134
|
-
# final fallback: try legacy name
|
|
135
|
-
if "sparse_output" in k:
|
|
136
|
-
k["sparse"] = k.pop("sparse_output")
|
|
137
|
-
return OneHotEncoder(**k)
|
|
138
158
|
|
|
139
159
|
def _strip_stray_backrefs(code: str) -> str:
|
|
140
160
|
code = re.sub(r'(?m)^\s*\\\d+\s*', '', code)
|
|
141
161
|
code = re.sub(r'(?m)[;]\s*\\\d+\s*', '; ', code)
|
|
142
162
|
return code
|
|
163
|
+
|
|
164
|
+
def _patch_feature_coef_dataframe(code: str) -> str:
|
|
165
|
+
"""
|
|
166
|
+
Harden patterns like:
|
|
167
|
+
coeffs_df = pd.DataFrame({'feature': num_features, 'coefficient': coef})
|
|
168
|
+
which can crash with:
|
|
169
|
+
ValueError: All arrays must be of the same length
|
|
170
|
+
We wrap them in a try/except and, on failure, rebuild the
|
|
171
|
+
DataFrame by zipping feature names with coefficients up to
|
|
172
|
+
the min length.
|
|
173
|
+
"""
|
|
174
|
+
# Match single-line assignments of the form:
|
|
175
|
+
# <var> = pd.DataFrame({'feature': <feat>, 'coefficient': <coef>})
|
|
176
|
+
import re
|
|
177
|
+
|
|
178
|
+
pattern = re.compile(
|
|
179
|
+
r"(?P<indent>^[ \t]*)"
|
|
180
|
+
r"(?P<var>\w+)\s*=\s*pd\.DataFrame\(\s*{\s*"
|
|
181
|
+
r"['\"]feature['\"]\s*:\s*(?P<feat_expr>.+?)\s*,\s*"
|
|
182
|
+
r"['\"]coefficient['\"]\s*:\s*(?P<coef_expr>.+?)\s*"
|
|
183
|
+
r"}\s*\)\s*$",
|
|
184
|
+
re.MULTILINE,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def repl(m: re.Match) -> str:
|
|
188
|
+
indent = m.group("indent")
|
|
189
|
+
var = m.group("var")
|
|
190
|
+
feat_expr = m.group("feat_expr").strip()
|
|
191
|
+
coef_expr = m.group("coef_expr").strip()
|
|
192
|
+
|
|
193
|
+
# Keep the original intent, but add a safe fallback.
|
|
194
|
+
return (
|
|
195
|
+
f"{indent}try:\n"
|
|
196
|
+
f"{indent} {var} = pd.DataFrame({{'feature': {feat_expr}, 'coefficient': {coef_expr}}})\n"
|
|
197
|
+
f"{indent}except Exception as _e:\n"
|
|
198
|
+
f"{indent} import numpy as _np\n"
|
|
199
|
+
f"{indent} _feat = list({feat_expr})\n"
|
|
200
|
+
f"{indent} _coef = _np.asarray({coef_expr}).ravel()\n"
|
|
201
|
+
f"{indent} _k = min(len(_feat), len(_coef))\n"
|
|
202
|
+
f"{indent} {var} = pd.DataFrame({{'feature': _feat[:_k], 'coefficient': _coef[:_k]}})\n"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return pattern.sub(repl, code)
|
|
143
206
|
|
|
144
207
|
def _wrap_metric_calls(code: str) -> str:
|
|
145
208
|
names = [
|
|
@@ -224,6 +287,252 @@ def harden_ai_code(code: str) -> str:
|
|
|
224
287
|
except Exception:
|
|
225
288
|
pass
|
|
226
289
|
|
|
290
|
+
def _ensure_metrics_imports(code: str) -> str:
|
|
291
|
+
needed = set()
|
|
292
|
+
if "r2_score" in code:
|
|
293
|
+
needed.add("r2_score")
|
|
294
|
+
if "mean_absolute_error" in code:
|
|
295
|
+
needed.add("mean_absolute_error")
|
|
296
|
+
# ... add others if you like ...
|
|
297
|
+
|
|
298
|
+
if not needed:
|
|
299
|
+
return code
|
|
300
|
+
|
|
301
|
+
if "from sklearn.metrics import" in code:
|
|
302
|
+
return code # assume user/LLM handled it
|
|
303
|
+
|
|
304
|
+
import_line = "from sklearn.metrics import " + ", ".join(sorted(needed)) + "\n"
|
|
305
|
+
return import_line + code
|
|
306
|
+
|
|
307
|
+
def _fix_unexpected_indent(src: str) -> str:
|
|
308
|
+
"""
|
|
309
|
+
Some LLM snippets jump indentation (e.g. extra 8 spaces on an 'import'
|
|
310
|
+
line) without a preceding block opener. That causes
|
|
311
|
+
`IndentationError: unexpected indent` when we wrap in our own `try:`.
|
|
312
|
+
This normalises those lines back to the previous indent level, but only
|
|
313
|
+
when we're not in a multi-line bracket/paren context.
|
|
314
|
+
"""
|
|
315
|
+
lines = src.splitlines()
|
|
316
|
+
out = []
|
|
317
|
+
prev_indent = 0
|
|
318
|
+
prev_ends_colon = False
|
|
319
|
+
paren_depth = 0 # (), [], {} depth across lines (very approximate)
|
|
320
|
+
|
|
321
|
+
for raw in lines:
|
|
322
|
+
stripped = raw.lstrip()
|
|
323
|
+
if not stripped: # blank / whitespace line
|
|
324
|
+
out.append(raw)
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
indent = len(raw) - len(stripped)
|
|
328
|
+
|
|
329
|
+
# Only flatten if:
|
|
330
|
+
# - we're not inside a (...) / [...] / {...} block, and
|
|
331
|
+
# - previous logical line did NOT end with ':', and
|
|
332
|
+
# - this line is indented more than the previous indent.
|
|
333
|
+
if paren_depth == 0 and not prev_ends_colon and indent > prev_indent:
|
|
334
|
+
indent = prev_indent
|
|
335
|
+
new_line = " " * indent + stripped
|
|
336
|
+
out.append(new_line)
|
|
337
|
+
|
|
338
|
+
# Update simple state for next line
|
|
339
|
+
txt = stripped
|
|
340
|
+
paren_depth += txt.count("(") + txt.count("[") + txt.count("{")
|
|
341
|
+
paren_depth -= txt.count(")") + txt.count("]") + txt.count("}")
|
|
342
|
+
prev_ends_colon = txt.rstrip().endswith(":")
|
|
343
|
+
prev_indent = indent
|
|
344
|
+
|
|
345
|
+
return "\n".join(out)
|
|
346
|
+
|
|
347
|
+
def _fallback_snippet() -> str:
|
|
348
|
+
"""
|
|
349
|
+
Final-resort snippet when the LLM code is syntactically broken.
|
|
350
|
+
|
|
351
|
+
It:
|
|
352
|
+
- attempts a simple automatic ML task (classification or regression)
|
|
353
|
+
- then falls back to generic but useful EDA.
|
|
354
|
+
|
|
355
|
+
It assumes `from syntaxmatrix.preface import *` has already been done,
|
|
356
|
+
so `_SMX_OHE`, `_SMX_call`, `SB_histplot`, `_SMX_export_png` and the
|
|
357
|
+
patched `show()` are available.
|
|
358
|
+
"""
|
|
359
|
+
import textwrap
|
|
360
|
+
|
|
361
|
+
return textwrap.dedent(
|
|
362
|
+
"""\
|
|
363
|
+
import pandas as pd
|
|
364
|
+
import numpy as np
|
|
365
|
+
import matplotlib.pyplot as plt
|
|
366
|
+
from sklearn.model_selection import train_test_split
|
|
367
|
+
from sklearn.compose import ColumnTransformer
|
|
368
|
+
from sklearn.preprocessing import StandardScaler
|
|
369
|
+
from sklearn.linear_model import LogisticRegression, LinearRegression
|
|
370
|
+
from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error
|
|
371
|
+
|
|
372
|
+
df = df.copy()
|
|
373
|
+
|
|
374
|
+
# --- basic column introspection ---
|
|
375
|
+
num_cols = df.select_dtypes(include=['number', 'bool']).columns.tolist()
|
|
376
|
+
cat_cols = [c for c in df.columns if c not in num_cols]
|
|
377
|
+
|
|
378
|
+
# --- attempt an automatic ML task ---
|
|
379
|
+
target_col = None
|
|
380
|
+
task_type = None
|
|
381
|
+
|
|
382
|
+
# Prefer a low-cardinality target (classification)
|
|
383
|
+
for c in num_cols + cat_cols:
|
|
384
|
+
uniq = df[c].dropna().nunique()
|
|
385
|
+
if 2 <= uniq <= 10:
|
|
386
|
+
target_col = c
|
|
387
|
+
task_type = 'classification'
|
|
388
|
+
break
|
|
389
|
+
|
|
390
|
+
# If none found, try a numeric regression target
|
|
391
|
+
if target_col is None and num_cols:
|
|
392
|
+
target_col = num_cols[-1]
|
|
393
|
+
task_type = 'regression'
|
|
394
|
+
|
|
395
|
+
if target_col is not None:
|
|
396
|
+
try:
|
|
397
|
+
X = df.drop(columns=[target_col]).copy()
|
|
398
|
+
y = df[target_col].copy()
|
|
399
|
+
|
|
400
|
+
num_feats = X.select_dtypes(include=['number', 'bool']).columns.tolist()
|
|
401
|
+
cat_feats = [c for c in X.columns if c not in num_feats]
|
|
402
|
+
|
|
403
|
+
pre = ColumnTransformer(
|
|
404
|
+
transformers=[
|
|
405
|
+
('num', StandardScaler(), num_feats),
|
|
406
|
+
('cat', _SMX_OHE(handle_unknown='ignore'), cat_feats),
|
|
407
|
+
],
|
|
408
|
+
remainder='drop',
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
from sklearn.pipeline import Pipeline
|
|
412
|
+
if task_type == 'classification':
|
|
413
|
+
model = LogisticRegression(max_iter=1000)
|
|
414
|
+
else:
|
|
415
|
+
model = LinearRegression()
|
|
416
|
+
|
|
417
|
+
pipe = Pipeline([('pre', pre), ('model', model)])
|
|
418
|
+
|
|
419
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
420
|
+
X, y, test_size=0.25, random_state=42
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
pipe.fit(X_train, y_train)
|
|
424
|
+
y_pred = pipe.predict(X_test)
|
|
425
|
+
|
|
426
|
+
if task_type == 'classification':
|
|
427
|
+
# If predictions look like probabilities, convert to labels
|
|
428
|
+
if getattr(y_pred, 'ndim', 1) > 1 and y_pred.shape[1] > 1:
|
|
429
|
+
y_pred_labels = y_pred.argmax(axis=1)
|
|
430
|
+
else:
|
|
431
|
+
try:
|
|
432
|
+
y_pred_labels = (y_pred > 0.5).astype(y_test.dtype)
|
|
433
|
+
except Exception:
|
|
434
|
+
y_pred_labels = y_pred
|
|
435
|
+
|
|
436
|
+
acc = _SMX_call(accuracy_score, y_test, y_pred_labels)
|
|
437
|
+
show({
|
|
438
|
+
'target': target_col,
|
|
439
|
+
'task': 'classification',
|
|
440
|
+
'accuracy': acc,
|
|
441
|
+
})
|
|
442
|
+
else:
|
|
443
|
+
r2 = _SMX_call(r2_score, y_test, y_pred)
|
|
444
|
+
mae = _SMX_call(mean_absolute_error, y_test, y_pred)
|
|
445
|
+
show({
|
|
446
|
+
'target': target_col,
|
|
447
|
+
'task': 'regression',
|
|
448
|
+
'r2': r2,
|
|
449
|
+
'mae': mae,
|
|
450
|
+
})
|
|
451
|
+
|
|
452
|
+
except Exception as _ml_e:
|
|
453
|
+
show(f"⚠ ML fallback failed: {type(_ml_e).__name__}: {_ml_e}")
|
|
454
|
+
|
|
455
|
+
# --- EDA fallback that still helps answer the question ---
|
|
456
|
+
try:
|
|
457
|
+
info = {
|
|
458
|
+
'rows': len(df),
|
|
459
|
+
'cols': len(df.columns),
|
|
460
|
+
'numeric_cols': len(num_cols),
|
|
461
|
+
'categorical_cols': len(cat_cols),
|
|
462
|
+
}
|
|
463
|
+
show(df.head(), title='Sample of data')
|
|
464
|
+
show(info, title='Dataset summary')
|
|
465
|
+
|
|
466
|
+
# Quick univariate look if we have numeric columns
|
|
467
|
+
if num_cols:
|
|
468
|
+
SB_histplot()
|
|
469
|
+
_SMX_export_png()
|
|
470
|
+
except Exception as _eda_e:
|
|
471
|
+
show(f"⚠ EDA fallback failed: {type(_eda_e).__name__}: {_eda_e}")
|
|
472
|
+
"""
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
def _strip_file_io_ops(code: str) -> str:
|
|
476
|
+
"""
|
|
477
|
+
Remove obvious local file I/O operations in LLM code
|
|
478
|
+
so nothing writes to the container filesystem.
|
|
479
|
+
"""
|
|
480
|
+
# 1) Methods like df.to_csv(...), df.to_excel(...), etc.
|
|
481
|
+
FILE_WRITE_METHODS = (
|
|
482
|
+
"to_csv", "to_excel", "to_pickle", "to_parquet",
|
|
483
|
+
"to_json", "to_hdf",
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
for mname in FILE_WRITE_METHODS:
|
|
487
|
+
pat = re.compile(
|
|
488
|
+
rf"(?m)^(\s*)([A-Za-z_][A-Za-z0-9_\.]*)\s*\.\s*{mname}\s*\([^)]*\)\s*$"
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
def _repl(match):
|
|
492
|
+
indent = match.group(1)
|
|
493
|
+
expr = match.group(2)
|
|
494
|
+
return f"{indent}# [SMX] stripped file write: {expr}.{mname}(...)"
|
|
495
|
+
|
|
496
|
+
code = pat.sub(_repl, code)
|
|
497
|
+
|
|
498
|
+
# 2) plt.savefig(...) calls
|
|
499
|
+
pat_savefig = re.compile(r"(?m)^(\s*)(plt\.savefig\s*\([^)]*\)\s*)$")
|
|
500
|
+
code = pat_savefig.sub(
|
|
501
|
+
lambda m: f"{m.group(1)}# [SMX] stripped savefig: {m.group(2).strip()}",
|
|
502
|
+
code,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# 3) with open(..., 'w'/'wb') as f:
|
|
506
|
+
pat_with_open = re.compile(
|
|
507
|
+
r"(?m)^(\s*)with\s+open\([^)]*['\"]w[b]?['\"][^)]*\)\s+as\s+([A-Za-z_][A-Za-z0-9_]*)\s*:\s*$"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
def _with_open_repl(match):
|
|
511
|
+
indent = match.group(1)
|
|
512
|
+
var = match.group(2)
|
|
513
|
+
return f"{indent}if False: # [SMX] file write stripped (was: with open(... as {var}))"
|
|
514
|
+
|
|
515
|
+
code = pat_with_open.sub(_with_open_repl, code)
|
|
516
|
+
|
|
517
|
+
# 4) joblib.dump(...), pickle.dump(...)
|
|
518
|
+
for mod in ("joblib", "pickle"):
|
|
519
|
+
pat = re.compile(rf"(?m)^(\s*){mod}\.dump\s*\([^)]*\)\s*$")
|
|
520
|
+
code = pat.sub(
|
|
521
|
+
lambda m: f"{m.group(1)}# [SMX] stripped {mod}.dump(...)",
|
|
522
|
+
code,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
# 5) bare open(..., 'w'/'wb') calls
|
|
526
|
+
pat_open = re.compile(
|
|
527
|
+
r"(?m)^(\s*)open\([^)]*['\"]w[b]?['\"][^)]*\)\s*$"
|
|
528
|
+
)
|
|
529
|
+
code = pat_open.sub(
|
|
530
|
+
lambda m: f"{m.group(1)}# [SMX] stripped open(..., 'w'/'wb')",
|
|
531
|
+
code,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
return code
|
|
535
|
+
|
|
227
536
|
# Register and run patches once per execution
|
|
228
537
|
for _patch in (
|
|
229
538
|
_smx_patch_mean_squared_error_squared_kw,
|
|
@@ -235,412 +544,6 @@ def harden_ai_code(code: str) -> str:
|
|
|
235
544
|
except Exception:
|
|
236
545
|
pass
|
|
237
546
|
|
|
238
|
-
PREFACE = (
|
|
239
|
-
"# === SMX Auto-Hardening Preface (do not edit) ===\n"
|
|
240
|
-
"import warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt\n"
|
|
241
|
-
"warnings.filterwarnings('ignore')\n"
|
|
242
|
-
"try:\n"
|
|
243
|
-
" import seaborn as sns\n"
|
|
244
|
-
"except Exception:\n"
|
|
245
|
-
" class _Dummy:\n"
|
|
246
|
-
" def __getattr__(self, name):\n"
|
|
247
|
-
" def _f(*a, **k):\n"
|
|
248
|
-
" from syntaxmatrix.display import show\n"
|
|
249
|
-
" show('⚠ seaborn not available; plot skipped.')\n"
|
|
250
|
-
" return _f\n"
|
|
251
|
-
" sns = _Dummy()\n"
|
|
252
|
-
"\n"
|
|
253
|
-
"from syntaxmatrix.display import show as _SMX_base_show\n"
|
|
254
|
-
"def _SMX_caption_from_ctx():\n"
|
|
255
|
-
" g = globals()\n"
|
|
256
|
-
" t = g.get('refined_question') or g.get('askai_question') or 'Table'\n"
|
|
257
|
-
" return str(t).strip().splitlines()[0][:120]\n"
|
|
258
|
-
"\n"
|
|
259
|
-
"def show(obj, title=None):\n"
|
|
260
|
-
" try:\n"
|
|
261
|
-
" import pandas as pd\n"
|
|
262
|
-
" if isinstance(obj, pd.DataFrame):\n"
|
|
263
|
-
" cap = (title or _SMX_caption_from_ctx())\n"
|
|
264
|
-
" try:\n"
|
|
265
|
-
" return _SMX_base_show(obj.style.set_caption(cap))\n"
|
|
266
|
-
" except Exception:\n"
|
|
267
|
-
" pass\n"
|
|
268
|
-
" except Exception:\n"
|
|
269
|
-
" pass\n"
|
|
270
|
-
" return _SMX_base_show(obj)\n"
|
|
271
|
-
"\n"
|
|
272
|
-
"def _SMX_axes_have_titles(fig=None):\n"
|
|
273
|
-
" import matplotlib.pyplot as _plt\n"
|
|
274
|
-
" fig = fig or _plt.gcf()\n"
|
|
275
|
-
" try:\n"
|
|
276
|
-
" for _ax in fig.get_axes():\n"
|
|
277
|
-
" if (_ax.get_title() or '').strip():\n"
|
|
278
|
-
" return True\n"
|
|
279
|
-
" except Exception:\n"
|
|
280
|
-
" pass\n"
|
|
281
|
-
" return False\n"
|
|
282
|
-
"\n"
|
|
283
|
-
"def _SMX_export_png():\n"
|
|
284
|
-
" import io, base64\n"
|
|
285
|
-
" fig = plt.gcf()\n"
|
|
286
|
-
" try:\n"
|
|
287
|
-
" if not _SMX_axes_have_titles(fig):\n"
|
|
288
|
-
" fig.suptitle(_SMX_caption_from_ctx(), fontsize=10)\n"
|
|
289
|
-
" except Exception:\n"
|
|
290
|
-
" pass\n"
|
|
291
|
-
" buf = io.BytesIO()\n"
|
|
292
|
-
" plt.savefig(buf, format='png', bbox_inches='tight')\n"
|
|
293
|
-
" buf.seek(0)\n"
|
|
294
|
-
" from IPython.display import display, HTML\n"
|
|
295
|
-
" _img = base64.b64encode(buf.read()).decode('ascii')\n"
|
|
296
|
-
" display(HTML(f\"<img src='data:image/png;base64,{_img}' style='max-width:100%;height:auto;border:1px solid #ccc;border-radius:4px;'/>\"))\n"
|
|
297
|
-
" plt.close()\n"
|
|
298
|
-
"\n"
|
|
299
|
-
"def _pick_df():\n"
|
|
300
|
-
" return globals().get('df', None)\n"
|
|
301
|
-
"\n"
|
|
302
|
-
"def _pick_ax_slot():\n"
|
|
303
|
-
" ax = None\n"
|
|
304
|
-
" try:\n"
|
|
305
|
-
" _axes = globals().get('axes', None)\n"
|
|
306
|
-
" import numpy as _np\n"
|
|
307
|
-
" if _axes is not None:\n"
|
|
308
|
-
" arr = _np.ravel(_axes)\n"
|
|
309
|
-
" for _a in arr:\n"
|
|
310
|
-
" try:\n"
|
|
311
|
-
" if hasattr(_a,'has_data') and not _a.has_data():\n"
|
|
312
|
-
" ax = _a; break\n"
|
|
313
|
-
" except Exception:\n"
|
|
314
|
-
" continue\n"
|
|
315
|
-
" except Exception:\n"
|
|
316
|
-
" ax = None\n"
|
|
317
|
-
" return ax\n"
|
|
318
|
-
"\n"
|
|
319
|
-
"def _first_numeric(_d):\n"
|
|
320
|
-
" import numpy as np, pandas as pd\n"
|
|
321
|
-
" try:\n"
|
|
322
|
-
" preferred = [\"median_house_value\", \"price\", \"value\", \"target\", \"label\", \"y\"]\n"
|
|
323
|
-
" for c in preferred:\n"
|
|
324
|
-
" if c in _d.columns and pd.api.types.is_numeric_dtype(_d[c]):\n"
|
|
325
|
-
" return c\n"
|
|
326
|
-
" cols = _d.select_dtypes(include=[np.number]).columns.tolist()\n"
|
|
327
|
-
" return cols[0] if cols else None\n"
|
|
328
|
-
" except Exception:\n"
|
|
329
|
-
" return None\n"
|
|
330
|
-
"\n"
|
|
331
|
-
"def _first_categorical(_d):\n"
|
|
332
|
-
" import pandas as pd, numpy as np\n"
|
|
333
|
-
" try:\n"
|
|
334
|
-
" num = set(_d.select_dtypes(include=[np.number]).columns.tolist())\n"
|
|
335
|
-
" cand = [c for c in _d.columns if c not in num and _d[c].nunique(dropna=True) <= 50]\n"
|
|
336
|
-
" return cand[0] if cand else None\n"
|
|
337
|
-
" except Exception:\n"
|
|
338
|
-
" return None\n"
|
|
339
|
-
"\n"
|
|
340
|
-
"boxplot = barplot = histplot = distplot = lineplot = countplot = heatmap = pairplot = None\n"
|
|
341
|
-
"\n"
|
|
342
|
-
"def _safe_plot(func, *args, **kwargs):\n"
|
|
343
|
-
" try:\n"
|
|
344
|
-
" ax = func(*args, **kwargs)\n"
|
|
345
|
-
" if ax is None:\n"
|
|
346
|
-
" ax = plt.gca()\n"
|
|
347
|
-
" try:\n"
|
|
348
|
-
" if hasattr(ax, 'has_data') and not ax.has_data():\n"
|
|
349
|
-
" from syntaxmatrix.display import show as _show\n"
|
|
350
|
-
" _show('⚠ Empty plot: no data drawn.')\n"
|
|
351
|
-
" except Exception:\n"
|
|
352
|
-
" pass\n"
|
|
353
|
-
" try: plt.tight_layout()\n"
|
|
354
|
-
" except Exception: pass\n"
|
|
355
|
-
" return ax\n"
|
|
356
|
-
" except Exception as e:\n"
|
|
357
|
-
" from syntaxmatrix.display import show as _show\n"
|
|
358
|
-
" _show(f'⚠ Plot skipped: {type(e).__name__}: {e}')\n"
|
|
359
|
-
" return None\n"
|
|
360
|
-
"\n"
|
|
361
|
-
"def SB_histplot(*a, **k):\n"
|
|
362
|
-
" _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')\n"
|
|
363
|
-
" _sentinel = (len(a) >= 1 and a[0] is None)\n"
|
|
364
|
-
" if (not a or _sentinel) and not k:\n"
|
|
365
|
-
" d = _pick_df()\n"
|
|
366
|
-
" if d is not None:\n"
|
|
367
|
-
" x = _first_numeric(d)\n"
|
|
368
|
-
" if x is not None:\n"
|
|
369
|
-
" def _draw():\n"
|
|
370
|
-
" plt.hist(d[x].dropna())\n"
|
|
371
|
-
" ax = plt.gca()\n"
|
|
372
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
373
|
-
" ax.set_title(f'Distribution of {x}')\n"
|
|
374
|
-
" return ax\n"
|
|
375
|
-
" return _safe_plot(lambda **kw: _draw())\n"
|
|
376
|
-
" if _missing:\n"
|
|
377
|
-
" return _safe_plot(lambda **kw: plt.hist([]))\n"
|
|
378
|
-
" if _sentinel:\n"
|
|
379
|
-
" a = a[1:]\n"
|
|
380
|
-
" return _safe_plot(getattr(sns,'histplot', plt.hist), *a, **k)\n"
|
|
381
|
-
"\n"
|
|
382
|
-
"def SB_barplot(*a, **k):\n"
|
|
383
|
-
" _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')\n"
|
|
384
|
-
" _sentinel = (len(a) >= 1 and a[0] is None)\n"
|
|
385
|
-
" _ax = k.get('ax') or _pick_ax_slot()\n"
|
|
386
|
-
" if _ax is not None:\n"
|
|
387
|
-
" try: plt.sca(_ax)\n"
|
|
388
|
-
" except Exception: pass\n"
|
|
389
|
-
" k.setdefault('ax', _ax)\n"
|
|
390
|
-
" if (not a or _sentinel) and not k:\n"
|
|
391
|
-
" d = _pick_df()\n"
|
|
392
|
-
" if d is not None:\n"
|
|
393
|
-
" x = _first_categorical(d)\n"
|
|
394
|
-
" y = _first_numeric(d)\n"
|
|
395
|
-
" if x and y:\n"
|
|
396
|
-
" import pandas as _pd\n"
|
|
397
|
-
" g = d.groupby(x)[y].mean().reset_index()\n"
|
|
398
|
-
" def _draw():\n"
|
|
399
|
-
" if _missing:\n"
|
|
400
|
-
" plt.bar(g[x], g[y])\n"
|
|
401
|
-
" else:\n"
|
|
402
|
-
" sns.barplot(data=g, x=x, y=y, ax=k.get('ax'))\n"
|
|
403
|
-
" ax = plt.gca()\n"
|
|
404
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
405
|
-
" ax.set_title(f'Mean {y} by {x}')\n"
|
|
406
|
-
" return ax\n"
|
|
407
|
-
" return _safe_plot(lambda **kw: _draw())\n"
|
|
408
|
-
" if _missing:\n"
|
|
409
|
-
" return _safe_plot(lambda **kw: plt.bar([], []))\n"
|
|
410
|
-
" if _sentinel:\n"
|
|
411
|
-
" a = a[1:]\n"
|
|
412
|
-
" return _safe_plot(sns.barplot, *a, **k)\n"
|
|
413
|
-
"\n"
|
|
414
|
-
"def SB_boxplot(*a, **k):\n"
|
|
415
|
-
" _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')\n"
|
|
416
|
-
" _sentinel = (len(a) >= 1 and a[0] is None)\n"
|
|
417
|
-
" _ax = k.get('ax') or _pick_ax_slot()\n"
|
|
418
|
-
" if _ax is not None:\n"
|
|
419
|
-
" try: plt.sca(_ax)\n"
|
|
420
|
-
" except Exception: pass\n"
|
|
421
|
-
" k.setdefault('ax', _ax)\n"
|
|
422
|
-
" if (not a or _sentinel) and not k:\n"
|
|
423
|
-
" d = _pick_df()\n"
|
|
424
|
-
" if d is not None:\n"
|
|
425
|
-
" x = _first_categorical(d)\n"
|
|
426
|
-
" y = _first_numeric(d)\n"
|
|
427
|
-
" if x and y:\n"
|
|
428
|
-
" def _draw():\n"
|
|
429
|
-
" if _missing:\n"
|
|
430
|
-
" plt.boxplot(d[y].dropna())\n"
|
|
431
|
-
" else:\n"
|
|
432
|
-
" sns.boxplot(data=d, x=x, y=y, ax=k.get('ax'))\n"
|
|
433
|
-
" ax = plt.gca()\n"
|
|
434
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
435
|
-
" ax.set_title(f'Distribution of {y} by {x}')\n"
|
|
436
|
-
" return ax\n"
|
|
437
|
-
" return _safe_plot(lambda **kw: _draw())\n"
|
|
438
|
-
" if _missing:\n"
|
|
439
|
-
" return _safe_plot(lambda **kw: plt.boxplot([]))\n"
|
|
440
|
-
" if _sentinel:\n"
|
|
441
|
-
" a = a[1:]\n"
|
|
442
|
-
" return _safe_plot(sns.boxplot, *a, **k)\n"
|
|
443
|
-
"\n"
|
|
444
|
-
"def SB_scatterplot(*a, **k):\n"
|
|
445
|
-
" _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')\n"
|
|
446
|
-
" fn = getattr(sns,'scatterplot', None)\n"
|
|
447
|
-
" # If seaborn is unavailable OR the caller passed (data=..., x='col', y='col'),\n"
|
|
448
|
-
" # use a robust matplotlib path that looks up data and coerces to numeric.\n"
|
|
449
|
-
" if _missing or fn is None:\n"
|
|
450
|
-
" data = k.get('data'); x = k.get('x'); y = k.get('y')\n"
|
|
451
|
-
" if data is not None and isinstance(x, str) and isinstance(y, str) and x in data.columns and y in data.columns:\n"
|
|
452
|
-
" xs = pd.to_numeric(data[x], errors='coerce')\n"
|
|
453
|
-
" ys = pd.to_numeric(data[y], errors='coerce')\n"
|
|
454
|
-
" m = xs.notna() & ys.notna()\n"
|
|
455
|
-
" def _draw():\n"
|
|
456
|
-
" plt.scatter(xs[m], ys[m])\n"
|
|
457
|
-
" ax = plt.gca()\n"
|
|
458
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
459
|
-
" ax.set_title(f'{y} vs {x}')\n"
|
|
460
|
-
" return ax\n"
|
|
461
|
-
" return _safe_plot(lambda **kw: _draw())\n"
|
|
462
|
-
" # else: fall back to auto-pick two numeric columns\n"
|
|
463
|
-
" d = _pick_df()\n"
|
|
464
|
-
" if d is not None:\n"
|
|
465
|
-
" num = d.select_dtypes(include=[np.number]).columns.tolist()\n"
|
|
466
|
-
" if len(num) >= 2:\n"
|
|
467
|
-
" def _draw2():\n"
|
|
468
|
-
" plt.scatter(d[num[0]], d[num[1]])\n"
|
|
469
|
-
" ax = plt.gca()\n"
|
|
470
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
471
|
-
" ax.set_title(f'{num[1]} vs {num[0]}')\n"
|
|
472
|
-
" return ax\n"
|
|
473
|
-
" return _safe_plot(lambda **kw: _draw2())\n"
|
|
474
|
-
" return _safe_plot(lambda **kw: plt.scatter([], []))\n"
|
|
475
|
-
" # seaborn path\n"
|
|
476
|
-
" return _safe_plot(fn, *a, **k)\n"
|
|
477
|
-
"\n"
|
|
478
|
-
"def SB_heatmap(*a, **k):\n"
|
|
479
|
-
" _missing = (getattr(sns, '__class__', type(sns)).__name__ == '_Dummy')\n"
|
|
480
|
-
" data = None\n"
|
|
481
|
-
" if a:\n"
|
|
482
|
-
" data = a[0]\n"
|
|
483
|
-
" elif 'data' in k:\n"
|
|
484
|
-
" data = k['data']\n"
|
|
485
|
-
" if data is None:\n"
|
|
486
|
-
" d = _pick_df()\n"
|
|
487
|
-
" try:\n"
|
|
488
|
-
" if d is not None:\n"
|
|
489
|
-
" import numpy as _np\n"
|
|
490
|
-
" data = d.select_dtypes(include=[_np.number]).corr()\n"
|
|
491
|
-
" except Exception:\n"
|
|
492
|
-
" data = None\n"
|
|
493
|
-
" if data is None:\n"
|
|
494
|
-
" from syntaxmatrix.display import show as _show\n"
|
|
495
|
-
" _show('⚠ Heatmap skipped: no data.')\n"
|
|
496
|
-
" return None\n"
|
|
497
|
-
" if not _missing and hasattr(sns, 'heatmap'):\n"
|
|
498
|
-
" _k = {kk: vv for kk, vv in k.items() if kk != 'data'}\n"
|
|
499
|
-
" def _draw():\n"
|
|
500
|
-
" ax = sns.heatmap(data, **_k)\n"
|
|
501
|
-
" try:\n"
|
|
502
|
-
" ax = ax or plt.gca()\n"
|
|
503
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
504
|
-
" ax.set_title('Correlation Heatmap')\n"
|
|
505
|
-
" except Exception:\n"
|
|
506
|
-
" pass\n"
|
|
507
|
-
" return ax\n"
|
|
508
|
-
" return _safe_plot(lambda **kw: _draw())\n"
|
|
509
|
-
" def _mat_heat():\n"
|
|
510
|
-
" im = plt.imshow(data, aspect='auto')\n"
|
|
511
|
-
" try: plt.colorbar()\n"
|
|
512
|
-
" except Exception: pass\n"
|
|
513
|
-
" try:\n"
|
|
514
|
-
" cols = list(getattr(data, 'columns', []))\n"
|
|
515
|
-
" rows = list(getattr(data, 'index', []))\n"
|
|
516
|
-
" if cols: plt.xticks(range(len(cols)), cols, rotation=90)\n"
|
|
517
|
-
" if rows: plt.yticks(range(len(rows)), rows)\n"
|
|
518
|
-
" except Exception:\n"
|
|
519
|
-
" pass\n"
|
|
520
|
-
" ax = plt.gca()\n"
|
|
521
|
-
" try:\n"
|
|
522
|
-
" if not (ax.get_title() or '').strip():\n"
|
|
523
|
-
" ax.set_title('Correlation Heatmap')\n"
|
|
524
|
-
" except Exception:\n"
|
|
525
|
-
" pass\n"
|
|
526
|
-
" return ax\n"
|
|
527
|
-
" return _safe_plot(lambda **kw: _mat_heat())\n"
|
|
528
|
-
"\n"
|
|
529
|
-
"def _safe_concat(objs, **kwargs):\n"
|
|
530
|
-
" import pandas as _pd\n"
|
|
531
|
-
" if objs is None: return _pd.DataFrame()\n"
|
|
532
|
-
" if isinstance(objs,(list,tuple)) and len(objs)==0: return _pd.DataFrame()\n"
|
|
533
|
-
" try: return _pd.concat(objs, **kwargs)\n"
|
|
534
|
-
" except Exception as e:\n"
|
|
535
|
-
" show(f'⚠ concat skipped: {e}')\n"
|
|
536
|
-
" return _pd.DataFrame()\n"
|
|
537
|
-
"\n"
|
|
538
|
-
"from sklearn.preprocessing import OneHotEncoder\n"
|
|
539
|
-
"import inspect\n"
|
|
540
|
-
"def _SMX_OHE(**k):\n"
|
|
541
|
-
" # normalise arg name across sklearn versions\n"
|
|
542
|
-
" if 'sparse' in k and 'sparse_output' not in k:\n"
|
|
543
|
-
" k['sparse_output'] = k.pop('sparse')\n"
|
|
544
|
-
" k.setdefault('handle_unknown','ignore')\n"
|
|
545
|
-
" k.setdefault('sparse_output', False)\n"
|
|
546
|
-
" try:\n"
|
|
547
|
-
" sig = inspect.signature(OneHotEncoder)\n"
|
|
548
|
-
" if 'sparse_output' not in sig.parameters and 'sparse_output' in k:\n"
|
|
549
|
-
" k['sparse'] = k.pop('sparse_output')\n"
|
|
550
|
-
" except Exception:\n"
|
|
551
|
-
" if 'sparse_output' in k:\n"
|
|
552
|
-
" k['sparse'] = k.pop('sparse_output')\n"
|
|
553
|
-
" return OneHotEncoder(**k)\n"
|
|
554
|
-
"\n"
|
|
555
|
-
"import numpy as _np\n"
|
|
556
|
-
"def _SMX_mm(a, b):\n"
|
|
557
|
-
" try:\n"
|
|
558
|
-
" return a @ b # normal path\n"
|
|
559
|
-
" except Exception:\n"
|
|
560
|
-
" try:\n"
|
|
561
|
-
" A = _np.asarray(a); B = _np.asarray(b)\n"
|
|
562
|
-
" # If same 2D shape (e.g. (n,k) & (n,k)), treat as row-wise dot\n"
|
|
563
|
-
" if A.ndim==2 and B.ndim==2 and A.shape==B.shape:\n"
|
|
564
|
-
" return (A * B).sum(axis=1)\n"
|
|
565
|
-
" # Otherwise try element-wise product (broadcast if possible)\n"
|
|
566
|
-
" return A * B\n"
|
|
567
|
-
" except Exception as e:\n"
|
|
568
|
-
" from syntaxmatrix.display import show\n"
|
|
569
|
-
" show(f'⚠ Matmul relaxed: {type(e).__name__}: {e}'); return _np.nan\n"
|
|
570
|
-
"\n"
|
|
571
|
-
"def _SMX_call(fn, *a, **k):\n"
|
|
572
|
-
" try:\n"
|
|
573
|
-
" return fn(*a, **k)\n"
|
|
574
|
-
" except TypeError as e:\n"
|
|
575
|
-
" msg = str(e)\n"
|
|
576
|
-
" if \"unexpected keyword argument 'squared'\" in msg:\n"
|
|
577
|
-
" k.pop('squared', None)\n"
|
|
578
|
-
" return fn(*a, **k)\n"
|
|
579
|
-
" raise\n"
|
|
580
|
-
"\n"
|
|
581
|
-
"def _SMX_rmse(y_true, y_pred):\n"
|
|
582
|
-
" try:\n"
|
|
583
|
-
" from sklearn.metrics import mean_squared_error as _mse\n"
|
|
584
|
-
" try:\n"
|
|
585
|
-
" return _mse(y_true, y_pred, squared=False)\n"
|
|
586
|
-
" except TypeError:\n"
|
|
587
|
-
" return (_mse(y_true, y_pred)) ** 0.5\n"
|
|
588
|
-
" except Exception:\n"
|
|
589
|
-
" import numpy as _np\n"
|
|
590
|
-
" yt = _np.asarray(y_true, dtype=float)\n"
|
|
591
|
-
" yp = _np.asarray(y_pred, dtype=float)\n"
|
|
592
|
-
" diff = yt - yp\n"
|
|
593
|
-
" return float((_np.mean(diff * diff)) ** 0.5)\n"
|
|
594
|
-
"\n"
|
|
595
|
-
"import pandas as _pd\n"
|
|
596
|
-
"import numpy as _np\n"
|
|
597
|
-
"def _SMX_autocoerce_dates(_df):\n"
|
|
598
|
-
" if _df is None or not hasattr(_df, 'columns'): return\n"
|
|
599
|
-
" for c in list(_df.columns):\n"
|
|
600
|
-
" s = _df[c]\n"
|
|
601
|
-
" n = str(c).lower()\n"
|
|
602
|
-
" if _pd.api.types.is_datetime64_any_dtype(s):\n"
|
|
603
|
-
" continue\n"
|
|
604
|
-
" if _pd.api.types.is_object_dtype(s) or ('date' in n or 'time' in n or 'timestamp' in n or n.endswith('_dt')):\n"
|
|
605
|
-
" try:\n"
|
|
606
|
-
" conv = _pd.to_datetime(s, errors='coerce', utc=True).dt.tz_localize(None)\n"
|
|
607
|
-
" # accept only if at least 10% (min 3) parse as dates\n"
|
|
608
|
-
" if getattr(conv, 'notna', lambda: _pd.Series([]))().sum() >= max(3, int(0.1*len(_df))):\n"
|
|
609
|
-
" _df[c] = conv\n"
|
|
610
|
-
" except Exception:\n"
|
|
611
|
-
" pass\n"
|
|
612
|
-
"\n"
|
|
613
|
-
"def _SMX_autocoerce_numeric(_df, cols):\n"
|
|
614
|
-
" if _df is None: return\n"
|
|
615
|
-
" for c in cols:\n"
|
|
616
|
-
" if c in getattr(_df, 'columns', []):\n"
|
|
617
|
-
" try:\n"
|
|
618
|
-
" _df[c] = _pd.to_numeric(_df[c], errors='coerce')\n"
|
|
619
|
-
" except Exception:\n"
|
|
620
|
-
" pass\n"
|
|
621
|
-
"\n"
|
|
622
|
-
"def show(obj, title=None):\n"
|
|
623
|
-
" try:\n"
|
|
624
|
-
" import pandas as pd, numbers\n"
|
|
625
|
-
" cap = (title or _SMX_caption_from_ctx())\n"
|
|
626
|
-
" # 1) DataFrame → Styler with caption\n"
|
|
627
|
-
" if isinstance(obj, pd.DataFrame):\n"
|
|
628
|
-
" try: return _SMX_base_show(obj.style.set_caption(cap))\n"
|
|
629
|
-
" except Exception: pass\n"
|
|
630
|
-
" # 2) dict of scalars → DataFrame with caption\n"
|
|
631
|
-
" if isinstance(obj, dict) and all(isinstance(v, numbers.Number) for v in obj.values()):\n"
|
|
632
|
-
" df_ = pd.DataFrame({'metric': list(obj.keys()), 'value': list(obj.values())})\n"
|
|
633
|
-
" try: return _SMX_base_show(df_.style.set_caption(cap))\n"
|
|
634
|
-
" except Exception: return _SMX_base_show(df_)\n"
|
|
635
|
-
" except Exception:\n"
|
|
636
|
-
" pass\n"
|
|
637
|
-
" return _SMX_base_show(obj)\n"
|
|
638
|
-
)
|
|
639
|
-
|
|
640
|
-
PREFACE_IMPORT = "from syntaxmatrix.smx_preface import *\n"
|
|
641
|
-
# if PREFACE not in code:
|
|
642
|
-
# code = PREFACE_IMPORT + code
|
|
643
|
-
|
|
644
547
|
fixed = code
|
|
645
548
|
|
|
646
549
|
fixed = re.sub(
|
|
@@ -690,6 +593,24 @@ def harden_ai_code(code: str) -> str:
|
|
|
690
593
|
fixed
|
|
691
594
|
)
|
|
692
595
|
|
|
596
|
+
try:
|
|
597
|
+
ast.parse(fixed)
|
|
598
|
+
except (SyntaxError, IndentationError):
|
|
599
|
+
fixed = _fallback_snippet()
|
|
600
|
+
|
|
601
|
+
fixed = re.sub(
|
|
602
|
+
r"except\s+Exception\s+as\s+e:\s*\n\s*show\(\.\.\.\)",
|
|
603
|
+
"except Exception as e:\n show(f\"⚠ Block skipped due to: {type(e).__name__}: {e}\")",
|
|
604
|
+
fixed,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
# Fix placeholder Ellipsis handlers from LLM
|
|
608
|
+
fixed = re.sub(
|
|
609
|
+
r"except\s+Exception\s+as\s+e:\s*\n\s*show\(\.\.\.\)",
|
|
610
|
+
"except Exception as e:\n show(f\"⚠ Block skipped due to: {type(e).__name__}: {e}\")",
|
|
611
|
+
fixed,
|
|
612
|
+
)
|
|
613
|
+
|
|
693
614
|
try:
|
|
694
615
|
class _SMXMatmulRewriter(ast.NodeTransformer):
|
|
695
616
|
def visit_BinOp(self, node):
|
|
@@ -708,44 +629,16 @@ def harden_ai_code(code: str) -> str:
|
|
|
708
629
|
# 6) Final safety wrapper
|
|
709
630
|
fixed = fixed.replace("\t", " ")
|
|
710
631
|
fixed = textwrap.dedent(fixed).strip("\n")
|
|
711
|
-
|
|
632
|
+
fixed = _ensure_metrics_imports(fixed)
|
|
712
633
|
fixed = _strip_stray_backrefs(fixed)
|
|
713
634
|
fixed = _wrap_metric_calls(fixed)
|
|
635
|
+
fixed = _fix_unexpected_indent(fixed)
|
|
636
|
+
fixed = _patch_feature_coef_dataframe(fixed)
|
|
637
|
+
fixed = _strip_file_io_ops(fixed)
|
|
714
638
|
|
|
715
|
-
#
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
ast.parse(fixed)
|
|
719
|
-
except (SyntaxError, IndentationError):
|
|
720
|
-
fixed = (
|
|
721
|
-
"import pandas as pd\n"
|
|
722
|
-
"df = df.copy()\n"
|
|
723
|
-
"_info = {\n"
|
|
724
|
-
" 'rows': len(df),\n"
|
|
725
|
-
" 'cols': len(df.columns),\n"
|
|
726
|
-
" 'numeric_cols': len(df.select_dtypes(include=['number','bool']).columns),\n"
|
|
727
|
-
" 'categorical_cols': len(df.select_dtypes(exclude=['number','bool']).columns),\n"
|
|
728
|
-
"}\n"
|
|
729
|
-
"show(df.head(), title='Sample of data')\n"
|
|
730
|
-
"show(_info, title='Dataset summary')\n"
|
|
731
|
-
"try:\n"
|
|
732
|
-
" _num = df.select_dtypes(include=['number','bool']).columns.tolist()\n"
|
|
733
|
-
" if _num:\n"
|
|
734
|
-
" SB_histplot()\n"
|
|
735
|
-
" _SMX_export_png()\n"
|
|
736
|
-
"except Exception as e:\n"
|
|
737
|
-
" show(f\"⚠ Fallback visualisation failed: {type(e).__name__}: {e}\")\n"
|
|
738
|
-
)
|
|
739
|
-
|
|
740
|
-
# Fix placeholder Ellipsis handlers from LLM
|
|
741
|
-
fixed = re.sub(
|
|
742
|
-
r"except\s+Exception\s+as\s+e:\s*\n\s*show\(\.\.\.\)",
|
|
743
|
-
"except Exception as e:\n show(f\"⚠ Block skipped due to: {type(e).__name__}: {e}\")",
|
|
744
|
-
fixed,
|
|
745
|
-
)
|
|
746
|
-
|
|
747
|
-
wrapped = PREFACE + "try:\n" + _indent(fixed) + "\nexcept Exception as e:\n show(...)\n"
|
|
748
|
-
wrapped = wrapped.lstrip()
|
|
639
|
+
# Import shared preface helpers once and wrap the LLM body safely
|
|
640
|
+
header = "from syntaxmatrix.preface import *\n\n"
|
|
641
|
+
wrapped = header + wrap_llm_code_safe(fixed)
|
|
749
642
|
return wrapped
|
|
750
643
|
|
|
751
644
|
|
|
@@ -754,17 +647,6 @@ def indent_code(code: str, spaces: int = 4) -> str:
|
|
|
754
647
|
return "\n".join(pad + line for line in code.splitlines())
|
|
755
648
|
|
|
756
649
|
|
|
757
|
-
def wrap_llm_code_safe(code: str) -> str:
|
|
758
|
-
# Swallow any runtime error from the LLM block instead of crashing the run
|
|
759
|
-
return (
|
|
760
|
-
"# __SAFE_WRAPPED__\n"
|
|
761
|
-
"try:\n" + indent_code(code) + "\n"
|
|
762
|
-
"except Exception as e:\n"
|
|
763
|
-
" from syntaxmatrix.display import show\n"
|
|
764
|
-
" show(f\"⚠️ Skipped LLM block due to: {type(e).__name__}: {e}\")\n"
|
|
765
|
-
)
|
|
766
|
-
|
|
767
|
-
|
|
768
650
|
def fix_boxplot_placeholder(code: str) -> str:
|
|
769
651
|
# Replace invalid 'sns.boxplot(boxplot)' with a safe call using df/group_label/m
|
|
770
652
|
return re.sub(
|