syntaxmatrix 2.5.5.5__py3-none-any.whl → 2.5.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syntaxmatrix/__init__.py +3 -2
- syntaxmatrix/agentic/agents.py +14 -23
- syntaxmatrix/auth.py +142 -5
- syntaxmatrix/core.py +34 -15
- syntaxmatrix/generate_page.py +17 -7
- syntaxmatrix/preface.py +550 -0
- syntaxmatrix/routes.py +238 -177
- syntaxmatrix/templates/change_password.html +124 -0
- syntaxmatrix/templates/dashboard.html +12 -10
- syntaxmatrix/utils.py +363 -481
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/METADATA +1 -1
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/RECORD +15 -13
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/WHEEL +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/licenses/LICENSE.txt +0 -0
- {syntaxmatrix-2.5.5.5.dist-info → syntaxmatrix-2.5.6.1.dist-info}/top_level.txt +0 -0
syntaxmatrix/__init__.py
CHANGED
|
@@ -44,8 +44,8 @@ get_widget_value = _app_instance.get_widget_value
|
|
|
44
44
|
save_embed_model = _app_instance.save_embed_model
|
|
45
45
|
load_embed_model = _app_instance.load_embed_model
|
|
46
46
|
delete_embed_key = _app_instance.delete_embed_key
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
set_smxai_identity = _app_instance.set_smxai_identity
|
|
48
|
+
set_smxai_instructions = _app_instance.set_smxai_instructions
|
|
49
49
|
set_website_description = _app_instance.set_website_description
|
|
50
50
|
smiv_index = _app_instance.smiv_index
|
|
51
51
|
smpv_search = _app_instance.smpv_search
|
|
@@ -54,6 +54,7 @@ process_query_stream = _app_instance.process_query_stream
|
|
|
54
54
|
process_query = _app_instance.process_query
|
|
55
55
|
embed_query = _app_instance.embed_query
|
|
56
56
|
enable_user_files = _app_instance.enable_user_files
|
|
57
|
+
enable_registration = _app_instance.enable_registration
|
|
57
58
|
stream_write = _app_instance.stream_write
|
|
58
59
|
enable_stream = _app_instance.enable_stream
|
|
59
60
|
stream = _app_instance.stream
|
syntaxmatrix/agentic/agents.py
CHANGED
|
@@ -425,11 +425,12 @@ def refine_question_agent(raw_question: str, dataset_context: str | None = None)
|
|
|
425
425
|
|
|
426
426
|
return "Configure LLM Profiles or contact your administrator."
|
|
427
427
|
|
|
428
|
-
system_prompt = (
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
428
|
+
system_prompt = ("""
|
|
429
|
+
- You are a Machine Learning (ML) and Data Science (DS) expert.
|
|
430
|
+
- You rewrite user questions into clear ML job specifications to help AI assistant generate Python code that provides solution to the user question when it is run. Most user questions are vague. So, your goal is to ensure that your output guards the assistant agains making potential errors that you anticipated could arise due to the nature of the question.
|
|
431
|
+
- If a dataset summary is provided, use it to respect column and help you rewrite the question properly.
|
|
432
|
+
- DO NOT write andy prelude or preamble"
|
|
433
|
+
""")
|
|
433
434
|
|
|
434
435
|
user_prompt = f"User question:\n{raw_question}\n\n"
|
|
435
436
|
if dataset_context:
|
|
@@ -446,20 +447,7 @@ def refine_question_agent(raw_question: str, dataset_context: str | None = None)
|
|
|
446
447
|
|
|
447
448
|
|
|
448
449
|
def classify_ml_job_agent(refined_question, dataset_profile):
|
|
449
|
-
|
|
450
|
-
Instructs an LLM (gemini-2.5-flash) to analyze a task description
|
|
451
|
-
and return a list of associated machine learning job/task types.
|
|
452
|
-
This version uses a highly extensive, generalized list of ML jobs
|
|
453
|
-
to ensure robustness across all domains (NLP, CV, RL, etc.).
|
|
454
|
-
|
|
455
|
-
Args:
|
|
456
|
-
task_description: The detailed description of the statistical/ML task.
|
|
457
|
-
|
|
458
|
-
Returns:
|
|
459
|
-
A list of strings identifying the relevant ML jobs. Returns an empty
|
|
460
|
-
list if the API call fails or the output cannot be parsed.
|
|
461
|
-
"""
|
|
462
|
-
|
|
450
|
+
|
|
463
451
|
def ml_response(user_prompt, system_prompt, profile):
|
|
464
452
|
_profile = profile # _prof.get_profile["admin"]
|
|
465
453
|
|
|
@@ -571,10 +559,13 @@ def classify_ml_job_agent(refined_question, dataset_profile):
|
|
|
571
559
|
|
|
572
560
|
return "Configure LLM Profiles or contact your administrator."
|
|
573
561
|
|
|
574
|
-
system_prompt = (
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
562
|
+
system_prompt = ("""
|
|
563
|
+
You are a strict machine learning task classifier for an ML workbench.
|
|
564
|
+
Your goal is to correctly label the user's task specifications with the most relevant tags from a fixed list.
|
|
565
|
+
You Must always have 'data_preprocessing' as the 1st tag. Then add up to 4 to make 5 max. Your list, therefore, should have 1-5 tags. If you think a task is too complext for the given context, even if relevant, exclude it.
|
|
566
|
+
If no relevant tag, default to "data_preprocessing" and return that alone.
|
|
567
|
+
You should return only your list of tags, no prelude or preamble.
|
|
568
|
+
""")
|
|
578
569
|
|
|
579
570
|
# --- 1. Define the Master List of ML Tasks (Generalized) ---
|
|
580
571
|
ml_task_list = [
|
syntaxmatrix/auth.py
CHANGED
|
@@ -41,6 +41,19 @@ def init_auth_db():
|
|
|
41
41
|
);
|
|
42
42
|
""")
|
|
43
43
|
|
|
44
|
+
# Ensure new must_reset_password flag exists for mandatory first-login reset
|
|
45
|
+
try:
|
|
46
|
+
cur = conn.execute("PRAGMA table_info(users)")
|
|
47
|
+
cols = [row[1] for row in cur.fetchall()]
|
|
48
|
+
if "must_reset_password" not in cols:
|
|
49
|
+
conn.execute(
|
|
50
|
+
"ALTER TABLE users "
|
|
51
|
+
"ADD COLUMN must_reset_password INTEGER NOT NULL DEFAULT 0"
|
|
52
|
+
)
|
|
53
|
+
except Exception:
|
|
54
|
+
# Best-effort migration; if this fails we still let the app start
|
|
55
|
+
pass
|
|
56
|
+
|
|
44
57
|
# --- Roles table ---
|
|
45
58
|
conn.execute("""
|
|
46
59
|
CREATE TABLE IF NOT EXISTS roles (
|
|
@@ -302,6 +315,60 @@ def register_user(email:str, username:str, password:str, role:str = "user") -> b
|
|
|
302
315
|
finally:
|
|
303
316
|
conn.close()
|
|
304
317
|
|
|
318
|
+
def set_must_reset_by_email(email: str, must_reset: bool = True) -> None:
|
|
319
|
+
"""
|
|
320
|
+
Mark a user account as requiring a password reset (or clear the flag) by email.
|
|
321
|
+
Used when an admin creates a user with a temporary password.
|
|
322
|
+
"""
|
|
323
|
+
if not email:
|
|
324
|
+
return
|
|
325
|
+
conn = _get_conn()
|
|
326
|
+
try:
|
|
327
|
+
conn.execute(
|
|
328
|
+
"UPDATE users SET must_reset_password = ? WHERE email = ?",
|
|
329
|
+
(1 if must_reset else 0, email),
|
|
330
|
+
)
|
|
331
|
+
conn.commit()
|
|
332
|
+
finally:
|
|
333
|
+
conn.close()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def user_must_reset_password(user_id: int) -> bool:
|
|
337
|
+
"""
|
|
338
|
+
Check whether this user is currently forced to change their password.
|
|
339
|
+
"""
|
|
340
|
+
if not user_id:
|
|
341
|
+
return False
|
|
342
|
+
conn = _get_conn()
|
|
343
|
+
try:
|
|
344
|
+
cur = conn.execute(
|
|
345
|
+
"SELECT must_reset_password FROM users WHERE id = ?",
|
|
346
|
+
(user_id,),
|
|
347
|
+
)
|
|
348
|
+
row = cur.fetchone()
|
|
349
|
+
finally:
|
|
350
|
+
conn.close()
|
|
351
|
+
if not row:
|
|
352
|
+
return False
|
|
353
|
+
return bool(row[0])
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def clear_must_reset(user_id: int) -> None:
|
|
357
|
+
"""
|
|
358
|
+
Clear the mandatory-reset flag (called after the user has changed their password).
|
|
359
|
+
"""
|
|
360
|
+
if not user_id:
|
|
361
|
+
return
|
|
362
|
+
conn = _get_conn()
|
|
363
|
+
try:
|
|
364
|
+
conn.execute(
|
|
365
|
+
"UPDATE users SET must_reset_password = 0 WHERE id = ?",
|
|
366
|
+
(user_id,),
|
|
367
|
+
)
|
|
368
|
+
conn.commit()
|
|
369
|
+
finally:
|
|
370
|
+
conn.close()
|
|
371
|
+
|
|
305
372
|
def authenticate(email:str, password:str) -> Optional[Dict]:
|
|
306
373
|
"""Return user dict if creds match, else None."""
|
|
307
374
|
conn = _get_conn()
|
|
@@ -315,15 +382,85 @@ def authenticate(email:str, password:str) -> Optional[Dict]:
|
|
|
315
382
|
return {"id": row[0], "email":row[1], "username": row[2], "role": row[4]}
|
|
316
383
|
return None
|
|
317
384
|
|
|
385
|
+
def verify_password(user_id: int, candidate: str) -> bool:
|
|
386
|
+
"""
|
|
387
|
+
Check whether `candidate` matches the current password of the user.
|
|
388
|
+
Used by the change-password flow.
|
|
389
|
+
"""
|
|
390
|
+
if not user_id or not candidate:
|
|
391
|
+
return False
|
|
392
|
+
|
|
393
|
+
conn = _get_conn()
|
|
394
|
+
try:
|
|
395
|
+
cur = conn.execute(
|
|
396
|
+
"SELECT password FROM users WHERE id = ?",
|
|
397
|
+
(user_id,),
|
|
398
|
+
)
|
|
399
|
+
row = cur.fetchone()
|
|
400
|
+
finally:
|
|
401
|
+
conn.close()
|
|
402
|
+
|
|
403
|
+
if not row:
|
|
404
|
+
return False
|
|
405
|
+
return check_password_hash(row[0], candidate)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def update_password(user_id: int, new_password: str) -> None:
|
|
409
|
+
"""
|
|
410
|
+
Overwrite the user's password with a new hash.
|
|
411
|
+
"""
|
|
412
|
+
if not user_id or not new_password:
|
|
413
|
+
return
|
|
414
|
+
|
|
415
|
+
hashed = generate_password_hash(new_password)
|
|
416
|
+
conn = _get_conn()
|
|
417
|
+
try:
|
|
418
|
+
conn.execute(
|
|
419
|
+
"UPDATE users SET password = ? WHERE id = ?",
|
|
420
|
+
(hashed, user_id),
|
|
421
|
+
)
|
|
422
|
+
conn.commit()
|
|
423
|
+
finally:
|
|
424
|
+
conn.close()
|
|
425
|
+
|
|
426
|
+
def update_password(user_id: int, new_password: str) -> bool:
|
|
427
|
+
"""
|
|
428
|
+
Update the stored password hash for a given user id.
|
|
429
|
+
Returns True on success, False if something goes wrong.
|
|
430
|
+
"""
|
|
431
|
+
hashed = generate_password_hash(new_password)
|
|
432
|
+
conn = _get_conn()
|
|
433
|
+
try:
|
|
434
|
+
conn.execute(
|
|
435
|
+
"UPDATE users SET password = ? WHERE id = ?",
|
|
436
|
+
(hashed, user_id),
|
|
437
|
+
)
|
|
438
|
+
conn.commit()
|
|
439
|
+
return True
|
|
440
|
+
except Exception:
|
|
441
|
+
# We do not raise inside auth; caller shows a friendly message instead.
|
|
442
|
+
return False
|
|
443
|
+
finally:
|
|
444
|
+
conn.close()
|
|
445
|
+
|
|
318
446
|
def login_required(f):
|
|
319
447
|
@wraps(f)
|
|
320
|
-
def
|
|
321
|
-
if not session
|
|
322
|
-
flash("Please log in
|
|
448
|
+
def wrapper(*args, **kwargs):
|
|
449
|
+
if "user_id" not in session:
|
|
450
|
+
flash("Please log in.")
|
|
323
451
|
return redirect(url_for("login", next=request.path))
|
|
324
|
-
return f(*args, **kwargs)
|
|
325
|
-
return decorated
|
|
326
452
|
|
|
453
|
+
# If the account is flagged for a mandatory reset, force the user
|
|
454
|
+
# to the change-password screen before allowing anything else.
|
|
455
|
+
if session.get("must_reset_password") and request.endpoint not in (
|
|
456
|
+
"change_password",
|
|
457
|
+
"logout",
|
|
458
|
+
):
|
|
459
|
+
flash("Please set a new password before continuing.")
|
|
460
|
+
return redirect(url_for("change_password"))
|
|
461
|
+
|
|
462
|
+
return f(*args, **kwargs)
|
|
463
|
+
return wrapper
|
|
327
464
|
|
|
328
465
|
def admin_required(view):
|
|
329
466
|
@wraps(view)
|
syntaxmatrix/core.py
CHANGED
|
@@ -54,7 +54,7 @@ class SyntaxMUI:
|
|
|
54
54
|
port="5080",
|
|
55
55
|
user_icon="👩🏿🦲",
|
|
56
56
|
bot_icon="<img src='/static/icons/favicon.png' width=20' alt='bot'/>",
|
|
57
|
-
favicon="/static/icons/favicon.png",
|
|
57
|
+
favicon="", # /static/icons/favicon.png",
|
|
58
58
|
site_logo="<img src='/static/icons/logo.png' width='30' alt='logo'/>",
|
|
59
59
|
site_title="SyntaxMatrix",
|
|
60
60
|
project_name="smxAI",
|
|
@@ -75,6 +75,7 @@ class SyntaxMUI:
|
|
|
75
75
|
self.ui_mode = ui_mode
|
|
76
76
|
self.theme_toggle_enabled = False
|
|
77
77
|
self.user_files_enabled = False
|
|
78
|
+
self.registration_enabled = False
|
|
78
79
|
self.smxai_identity = SMXAI_CHAT_ID
|
|
79
80
|
self.smxai_instructions = SMXAI_CHAT_INSTRUCTIONS
|
|
80
81
|
self.website_description = SMXAI_WEBSITE_DESCRIPTION
|
|
@@ -311,6 +312,9 @@ class SyntaxMUI:
|
|
|
311
312
|
|
|
312
313
|
def enable_user_files(self):
|
|
313
314
|
self.user_files_enabled = True
|
|
315
|
+
|
|
316
|
+
def enable_registration(self):
|
|
317
|
+
self.registration_enabled = True
|
|
314
318
|
|
|
315
319
|
@staticmethod
|
|
316
320
|
def columns(components):
|
|
@@ -514,12 +518,12 @@ class SyntaxMUI:
|
|
|
514
518
|
# ──────────────────────────────────────────────────────────────
|
|
515
519
|
# *********** LLM CLIENT HELPERS **********************
|
|
516
520
|
# ──────────────────────────────────────────────────────────────
|
|
517
|
-
def
|
|
518
|
-
self.
|
|
521
|
+
def set_smxai_identity(self, profile):
|
|
522
|
+
self.set_smxai_identity = profile
|
|
519
523
|
|
|
520
524
|
|
|
521
|
-
def
|
|
522
|
-
self.
|
|
525
|
+
def set_smxai_instructions(self, instructions):
|
|
526
|
+
self.set_smxai_instructions = instructions
|
|
523
527
|
|
|
524
528
|
|
|
525
529
|
def set_website_description(self, desc):
|
|
@@ -1137,21 +1141,23 @@ class SyntaxMUI:
|
|
|
1137
1141
|
tasks = [str(t).strip().lower() for t in tasks if str(t).strip()]
|
|
1138
1142
|
|
|
1139
1143
|
ai_profile = """
|
|
1140
|
-
- You are a Python expert specializing in
|
|
1144
|
+
- You are a Python expert specializing in Data Science (DS) and Machine Learning (ML).
|
|
1141
1145
|
- Your task is to generate a single, complete, production-quality, executable Python script for a Jupyter-like Python kernel, based on the given instructions.
|
|
1142
1146
|
- The dataset is already loaded as a pandas DataFrame named `df` (no file I/O or file uploads).
|
|
1143
|
-
- Make a copy of `df` and name it `df_copy`.
|
|
1144
|
-
-
|
|
1147
|
+
- Make a copy of `df` and name it `df_copy`.
|
|
1148
|
+
- Make sure `df_copy` is preprocessed and cleaned, and name it `df_cleaned`, if not already done so.
|
|
1149
|
+
- Work only with `df_cleaned` to perform the ML tasks described in the given context.
|
|
1150
|
+
- Select your features and targets, from `df_cleaned`, with care and name it `required_cols`
|
|
1145
1151
|
- Create your 'df_filtered by doing: df_filtered = df_cleaned[required_cols].
|
|
1146
|
-
- Use the {TEMPLATE_CATALOGUE} below to educate yourself on which visualizations you will implement in the code.
|
|
1147
|
-
- The final output MUST
|
|
1152
|
+
- Use the {TEMPLATE_CATALOGUE} below to educate yourself on which visualizations you will implement in the code, and ensure the implementations are in the code you generate.
|
|
1153
|
+
- The final output MUST BE the complete, executable Python code only, enclosed in a single markdown code block (```python ... ```), and MUST BE able to fulfill the user's request: {tasks}.
|
|
1148
1154
|
- Do not include any explanatory text or markdown outside the code block.
|
|
1149
1155
|
"""
|
|
1150
1156
|
|
|
1151
1157
|
TEMPLATE_CATALOGUE = """
|
|
1152
1158
|
### Available SyntaxMatrix templates (use these instead of inventing new helpers)
|
|
1153
1159
|
|
|
1154
|
-
Visualisation templates
|
|
1160
|
+
Visualisation templates:
|
|
1155
1161
|
- viz_pie(df, category_col=None, top_k=8): pie/donut shares within a category.
|
|
1156
1162
|
- viz_stacked_bar(df, x=None, hue=None, normalise=True): composition across groups.
|
|
1157
1163
|
- viz_count_bar(df, category_col=None, top_k=12): counts/denominators by category.
|
|
@@ -1191,9 +1197,9 @@ class SyntaxMUI:
|
|
|
1191
1197
|
|
|
1192
1198
|
"""
|
|
1193
1199
|
### Template rules
|
|
1194
|
-
- You MAY call
|
|
1200
|
+
- You MAY call 1 or more templates if they matche the task.
|
|
1195
1201
|
- Do NOT invent template names.
|
|
1196
|
-
- If no template fits, write minimal direct pandas/sklearn/seaborn code instead.
|
|
1202
|
+
- If no template fits, write minimal direct pandas/sklearn/seaborn code instead, for visualization.
|
|
1197
1203
|
- Keep the solution short: avoid writing wrappers/utilities already handled by SyntaxMatrix hardener.
|
|
1198
1204
|
|
|
1199
1205
|
#### Template selection hint examples:
|
|
@@ -1216,8 +1222,7 @@ class SyntaxMUI:
|
|
|
1216
1222
|
set `random_state=42` where relevant.
|
|
1217
1223
|
4) Be defensive, but avoid hard-failing on optional fields:
|
|
1218
1224
|
- If the primary column, needed to answer the question, is missing, review your copy of the `df` again.
|
|
1219
|
-
Make sure that you selected the proper column.
|
|
1220
|
-
Never use a column/variable which isn't available or defined.
|
|
1225
|
+
- Make sure that you selected the proper column. Never use a column/variable which isn't available or defined.
|
|
1221
1226
|
- If a secondary/extra column is missing, show a warning with `show(...)` and continue using available fields.
|
|
1222
1227
|
- Handle missing values sensibly (drop rows for simple EDA; use `ColumnTransformer` + `SimpleImputer` for modelling).
|
|
1223
1228
|
- For categorical features in ML, use `OneHotEncoder(handle_unknown="ignore")`
|
|
@@ -1249,6 +1254,20 @@ class SyntaxMUI:
|
|
|
1249
1254
|
11) You MUST NOT reference any column outside Available columns: {AVAILABLE_COLUMNS}.
|
|
1250
1255
|
12) If asked to predict/classify, choose the target by matching the task text to Allowed columns
|
|
1251
1256
|
and never invent a new name.
|
|
1257
|
+
13) Treat df as the primary dataset you must work with.
|
|
1258
|
+
14) The dataset is already loaded as df (no file I/O or file uploads).
|
|
1259
|
+
15) All outputs must be visible to the user via the provided show(...) helper.
|
|
1260
|
+
16) Never use print(...); use show(...) instead.
|
|
1261
|
+
17) You MUST NOT read from or write to local files, folders, or external storage.
|
|
1262
|
+
- Do not call open(...), Path(...).write_text/write_bytes, or similar file APIs.
|
|
1263
|
+
- Do not use df.to_csv(...), df.to_excel(...), df.to_parquet(...),
|
|
1264
|
+
df.to_pickle(...), df.to_json(...), df.to_hdf(...), or any other
|
|
1265
|
+
method that writes to disk.
|
|
1266
|
+
- Do not call joblib.dump(...), pickle.dump(...), torch.save(...),
|
|
1267
|
+
numpy.save(...), numpy.savetxt(...), or similar saving functions.
|
|
1268
|
+
- Do not call plt.savefig(..., 'somefile.png') or any variant that
|
|
1269
|
+
writes an image to a filename. Plots must be rendered in-memory only.
|
|
1270
|
+
18) Keep everything in memory and surface results via show(...) or plots.
|
|
1252
1271
|
|
|
1253
1272
|
#### Cohort rules
|
|
1254
1273
|
When you generate plots for cohorts or categories, you MUST obey these rules:
|
syntaxmatrix/generate_page.py
CHANGED
|
@@ -35,15 +35,25 @@ _SMX_FADEIN_CSS = '''
|
|
|
35
35
|
</style>
|
|
36
36
|
'''.strip()
|
|
37
37
|
|
|
38
|
-
_SMX_LAYOUT_CSS =
|
|
38
|
+
_SMX_LAYOUT_CSS = """
|
|
39
39
|
<style>
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
#smx-root,
|
|
41
|
+
[id^="smx-"]{
|
|
42
|
+
margin-top: 0;
|
|
43
|
+
margin-bottom: 40px;
|
|
44
|
+
/* No side padding on desktop */
|
|
45
|
+
padding-inline: 0;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/* Keep a bit of breathing room on small screens */
|
|
49
|
+
@media (max-width: 768px) {
|
|
50
|
+
#smx-root,
|
|
51
|
+
[id^="smx-"]{
|
|
52
|
+
padding-inline: 12px;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
45
55
|
</style>
|
|
46
|
-
|
|
56
|
+
""".strip()
|
|
47
57
|
|
|
48
58
|
def smx_strip_fences(html: str) -> str:
|
|
49
59
|
s = (html or '').strip()
|