syntaxmatrix 2.5.7__tar.gz → 2.5.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/PKG-INFO +1 -1
  2. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/SyntaxMatrix.egg-info/PKG-INFO +1 -1
  3. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/SyntaxMatrix.egg-info/SOURCES.txt +1 -0
  4. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/setup.py +1 -1
  5. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/agentic/agents.py +17 -25
  6. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/core.py +1 -1
  7. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/routes.py +189 -2
  8. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/settings/model_map.py +1 -2
  9. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/dashboard.html +148 -61
  10. syntaxmatrix-2.5.8.1/syntaxmatrix/templates/dataset_resize.html +535 -0
  11. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/utils.py +61 -4
  12. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/LICENSE.txt +0 -0
  13. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/README.md +0 -0
  14. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/SyntaxMatrix.egg-info/dependency_links.txt +0 -0
  15. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/SyntaxMatrix.egg-info/requires.txt +0 -0
  16. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/SyntaxMatrix.egg-info/top_level.txt +0 -0
  17. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/pyproject.toml +0 -0
  18. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/setup.cfg +0 -0
  19. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/__init__.py +0 -0
  20. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/agentic/__init__.py +0 -0
  21. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/agentic/agent_tools.py +0 -0
  22. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/agentic/code_tools_registry.py +0 -0
  23. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/agentic/model_templates.py +0 -0
  24. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/auth.py +0 -0
  25. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/bootstrap.py +0 -0
  26. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/commentary.py +0 -0
  27. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/dataset_preprocessing.py +0 -0
  28. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/db.py +0 -0
  29. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/display.py +0 -0
  30. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/emailer.py +0 -0
  31. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/file_processor.py +0 -0
  32. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/generate_page.py +0 -0
  33. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/gpt_models_latest.py +0 -0
  34. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/history_store.py +0 -0
  35. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/kernel_manager.py +0 -0
  36. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/llm_store.py +0 -0
  37. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/models.py +0 -0
  38. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/plottings.py +0 -0
  39. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/preface.py +0 -0
  40. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/profiles.py +0 -0
  41. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/project_root.py +0 -0
  42. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/session.py +0 -0
  43. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/settings/__init__.py +0 -0
  44. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/settings/default.yaml +0 -0
  45. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/settings/logging.py +0 -0
  46. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/settings/prompts.py +0 -0
  47. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/settings/string_navbar.py +0 -0
  48. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/smiv.py +0 -0
  49. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/smpv.py +0 -0
  50. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/css/style.css +0 -0
  51. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/docs.md +0 -0
  52. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/icons/favicon.png +0 -0
  53. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/icons/hero_bg.jpg +0 -0
  54. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/icons/logo.png +0 -0
  55. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/icons/svg_497526.svg +0 -0
  56. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/icons/svg_497528.svg +0 -0
  57. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/js/chat.js +0 -0
  58. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/js/sidebar.js +0 -0
  59. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/static/js/widgets.js +0 -0
  60. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/change_password.html +0 -0
  61. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/code_cell.html +0 -0
  62. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/docs.html +0 -0
  63. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/error.html +0 -0
  64. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/login.html +0 -0
  65. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/templates/register.html +0 -0
  66. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/themes.py +0 -0
  67. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/ui_modes.py +0 -0
  68. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vector_db.py +0 -0
  69. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/__init__.py +0 -0
  70. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/adapters/__init__.py +0 -0
  71. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/adapters/milvus_adapter.py +0 -0
  72. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/adapters/pgvector_adapter.py +0 -0
  73. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/adapters/sqlite_adapter.py +0 -0
  74. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/base.py +0 -0
  75. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectordb/registry.py +0 -0
  76. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/vectorizer.py +0 -0
  77. {syntaxmatrix-2.5.7 → syntaxmatrix-2.5.8.1}/syntaxmatrix/workspace_db.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syntaxmatrix
3
- Version: 2.5.7
3
+ Version: 2.5.8.1
4
4
  Summary: SyntaxMUI: A customizable framework for Python AI Assistant Projects.
5
5
  Author: Bob Nti
6
6
  Author-email: bob.nti@syntaxmatrix.net
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: syntaxmatrix
3
- Version: 2.5.7
3
+ Version: 2.5.8.1
4
4
  Summary: SyntaxMUI: A customizable framework for Python AI Assistant Projects.
5
5
  Author: Bob Nti
6
6
  Author-email: bob.nti@syntaxmatrix.net
@@ -66,6 +66,7 @@ syntaxmatrix/static/js/widgets.js
66
66
  syntaxmatrix/templates/change_password.html
67
67
  syntaxmatrix/templates/code_cell.html
68
68
  syntaxmatrix/templates/dashboard.html
69
+ syntaxmatrix/templates/dataset_resize.html
69
70
  syntaxmatrix/templates/docs.html
70
71
  syntaxmatrix/templates/error.html
71
72
  syntaxmatrix/templates/login.html
@@ -8,7 +8,7 @@ with open(os.path.join(this_directory, "README.md"), encoding="utf-8") as f:
8
8
 
9
9
  setup(
10
10
  name="syntaxmatrix",
11
- version="2.5.7",
11
+ version="2.5.8.1",
12
12
  author="Bob Nti",
13
13
  author_email="bob.nti@syntaxmatrix.net",
14
14
  description="SyntaxMUI: A customizable framework for Python AI Assistant Projects.",
@@ -139,7 +139,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile):
139
139
  reasoning_effort, verbosity = "medium", "medium"
140
140
  if _model == "gpt-5-nano":
141
141
  reasoning_effort, verbosity = "low", "low"
142
- elif _model in ["gpt-5-mini", "gpt-5-codex-mini"]:
142
+ elif _model in ["gpt-5-mini", "gpt-5-mini-codex"]:
143
143
  reasoning_effort, verbosity = "medium", "medium"
144
144
  elif _model in ["gpt-5", "gpt-5-codex", "gpt-5-pro"]:
145
145
  reasoning_effort, verbosity = "high", "high"
@@ -163,19 +163,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile):
163
163
 
164
164
  code = _out(resp).strip()
165
165
  if code:
166
- return code
167
-
168
- # Try to surface any block reason (safety / policy / etc.)
169
- block_reason = None
170
- output = resp.get("output")
171
- for item in output:
172
- fr = getattr(item, "finish_reason", None)
173
- if fr and fr != "stop":
174
- block_reason = fr
175
- break
176
- if block_reason:
177
- raise RuntimeError(f"{_model} stopped with reason: {block_reason}")
178
- raise RuntimeError(f"{_model} returned an empty response in this section due to insufficient data.")
166
+ return code
179
167
 
180
168
  except APIError as e:
181
169
  # IMPORTANT: return VALID PYTHON so the dashboard can show the error
@@ -263,7 +251,6 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile):
263
251
  {"role": "system", "content": system_prompt},
264
252
  {"role": "user", "content": user_prompt},
265
253
  ],
266
- extra_body={"thinking": {"type": "enabled"}},
267
254
  temperature=0,
268
255
  stream=False
269
256
  )
@@ -301,6 +288,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile):
301
288
  else:
302
289
  code = openai_sdk_generate_code()
303
290
 
291
+ code = str(code or "")
304
292
  return code, usage
305
293
 
306
294
 
@@ -397,9 +385,11 @@ def refine_question_agent(raw_question: str, dataset_context: str | None = None)
397
385
 
398
386
  system_prompt = ("""
399
387
  - You are a Machine Learning (ML) and Data Science (DS) expert.
400
- - You rewrite user questions into clear ML job specifications to help AI assistant generate Python code that provides solution to the user question when it is run. Most user questions are vague. So, your goal is to ensure that your output guards the assistant agains making potential errors that you anticipated could arise due to the nature of the question.
401
- - If a dataset summary is provided, use it to respect column and help you rewrite the question properly.
402
- - DO NOT write andy prelude or preamble"
388
+ - Your goal is to use the provided dataset summary to convert given question into clear ML job specifications.
389
+ - Use the provided dataset summary to respect columns and aid you in properly refining the user question.
390
+ - Include chronological outline in order to guide a code generator to avoid falling off tracks.
391
+ - DO NOT include any prelude or preamble. Just the refined tasks.
392
+ - If and only if the dataset summary columns are not relevant to your desired columns that you deduced by analysing the question, and you suspect that the wrong dataset was used in the dataset summary, stop and just say: 'incompatible'.
403
393
  """)
404
394
 
405
395
  user_prompt = f"User question:\n{raw_question}\n\n"
@@ -532,9 +522,7 @@ def classify_ml_job_agent(refined_question, dataset_profile):
532
522
  system_prompt = ("""
533
523
  You are a strict machine learning task classifier for an ML workbench.
534
524
  Your goal is to correctly label the user's task specifications with the most relevant tags from a fixed list.
535
- You Must always have 'data_preprocessing' as the 1st tag. Then add up to 4 more, as needed, to make 5 max.
536
- Your list should be 2-5 tags long. If no relevant tag, default to ["data_preprocessing"]
537
- If tasks specs and `df` don't match (of different industries, return ['context mismatch']
525
+ You Must always have 'data_preprocessing' as the 1st tag. Then add all other relevant tags.
538
526
  You should return only your list of tags, no prelude or preamble.
539
527
  """)
540
528
 
@@ -554,8 +542,7 @@ def classify_ml_job_agent(refined_question, dataset_profile):
554
542
  "generative_modeling", "causal_inference", "risk_modeling", "graph_analysis",
555
543
 
556
544
  # Foundational/Pipeline Steps
557
- "data_preprocessing", "feature_engineering", "statistical_inference",
558
- "model_validation", "hyperparameter_tuning"
545
+ "data_preprocessing", "feature_engineering", "statistical_inference", "clustering", "hyperparameter_tuning"
559
546
  ]
560
547
 
561
548
  # --- 2. Construct the Generalized Prompt for the LLM ---
@@ -575,7 +562,7 @@ def classify_ml_job_agent(refined_question, dataset_profile):
575
562
  ML Jobs List: {', '.join(ml_task_list)}
576
563
 
577
564
  Respond ONLY with a valid JSON array of strings containing the selected ML job names.
578
- Example Response: ["natural_language_processing", "classification", "feature_engineering"]
565
+ Example Response: ["data_preprocessing", "regression", "classification", "feature_engineering"]
579
566
  """
580
567
 
581
568
  if dataset_profile:
@@ -583,7 +570,12 @@ def classify_ml_job_agent(refined_question, dataset_profile):
583
570
 
584
571
  llm_profile = _prof.get_profile("classification") or _prof.get_profile("admin")
585
572
  if not llm_profile:
586
- return "ERROR"
573
+ return (
574
+ "<div class='smx-alert smx-alert-warn'>"
575
+ "No LLM profile is configured for Classification. Please, do that in the Admin panel or contact your Administrator."
576
+ "</div>"
577
+ )
578
+
587
579
 
588
580
  llm_profile['client'] = _prof.get_client(llm_profile)
589
581
 
@@ -54,7 +54,7 @@ class SyntaxMUI:
54
54
  port="5080",
55
55
  user_icon="👩🏿‍🦲",
56
56
  bot_icon="<img src='/static/icons/favicon.png' width=20' alt='bot'/>",
57
- favicon="", # /static/icons/favicon.png",
57
+ favicon="/static/icons/favicon.png",
58
58
  site_logo="<img src='/static/icons/logo.png' width='30' alt='logo'/>",
59
59
  site_title="SyntaxMatrix",
60
60
  project_name="smxAI",
@@ -65,6 +65,7 @@ _CLIENT_DIR = detect_project_root()
65
65
  _stream_q = queue.Queue()
66
66
  _stream_cancelled = {}
67
67
  _last_result_html = {} # { session_id: html_doc }
68
+ _last_resized_csv = {} # { resize_id: bytes for last resized CSV per browser session }
68
69
 
69
70
  # single, reused formatter: inline styles, padding, rounded corners, scroll
70
71
  _FMT = _HtmlFmt(
@@ -5610,8 +5611,19 @@ def setup_routes(smx):
5610
5611
  dataset_profile = f"modality: tabular; columns: {columns_summary}"
5611
5612
 
5612
5613
  refined_question = refine_question_agent(askai_question, dataset_context)
5613
- tags = classify_ml_job_agent(refined_question, dataset_profile)
5614
-
5614
+ tags = []
5615
+ if refined_question.lower() == "incompatible" or refined_question.lower() == "mismatch":
5616
+ return ("""
5617
+ <div style="position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
5618
+ <h1 style="margin: 0 0 10px 0;">Oops: Context mismatch</h1>
5619
+ <p style="margin: 0;">Please, upload the proper dataset for solution to your query.</p>
5620
+ <br>
5621
+ <a class='button' href='/dashboard' style='text-decoration:none;'>Return</a>
5622
+ </div>
5623
+ """)
5624
+ else:
5625
+ tags = classify_ml_job_agent(refined_question, dataset_profile)
5626
+
5615
5627
  ai_code = smx.ai_generate_code(refined_question, tags, df)
5616
5628
  llm_usage = smx.get_last_llm_usage()
5617
5629
  ai_code = auto_inject_template(ai_code, tags, df)
@@ -6513,6 +6525,7 @@ def setup_routes(smx):
6513
6525
  cell["highlighted_code"] = Markup(_pygmentize(cell["code"]))
6514
6526
 
6515
6527
  highlighted_ai_code = _pygmentize(ai_code)
6528
+ smxAI = "smxAI"
6516
6529
 
6517
6530
  return render_template(
6518
6531
  "dashboard.html",
@@ -6525,6 +6538,7 @@ def setup_routes(smx):
6525
6538
  askai_question=smx.sanitize_rough_to_markdown_task(askai_question),
6526
6539
  refined_question=refined_question,
6527
6540
  tasks=tags,
6541
+ smxAI=smxAI,
6528
6542
  data_cells=data_cells,
6529
6543
  session_id=session_id,
6530
6544
  llm_usage=llm_usage
@@ -6588,6 +6602,179 @@ def setup_routes(smx):
6588
6602
  # go back to the dashboard; dashboard() will auto-select the next file
6589
6603
  return redirect(url_for("dashboard"))
6590
6604
 
6605
+ # ── DATASET RESIZE (independent helper page) -------------------------
6606
+
6607
+
6608
+ @smx.app.route("/dataset/resize", methods=["GET", "POST"])
6609
+ def dataset_resize():
6610
+ """
6611
+ User uploads any CSV and picks a target size (percentage of rows).
6612
+ We keep the last resized CSV in memory and expose a download link.
6613
+ """
6614
+ # One id per browser session to index _last_resized_csv
6615
+ resize_id = session.get("dataset_resize_id")
6616
+ if not resize_id:
6617
+ resize_id = str(uuid.uuid4())
6618
+ session["dataset_resize_id"] = resize_id
6619
+
6620
+ resize_info = None # stats we pass down to the template
6621
+
6622
+ if request.method == "POST":
6623
+ file = request.files.get("dataset_file")
6624
+ target_pct_raw = (request.form.get("target_pct") or "").strip()
6625
+ strat_col = (request.form.get("strat_col") or "").strip()
6626
+
6627
+ error_msg = None
6628
+ df = None
6629
+
6630
+ # --- Basic validation ---
6631
+ if not file or file.filename == "":
6632
+ error_msg = "Please choose a CSV file."
6633
+ elif not file.filename.lower().endswith(".csv"):
6634
+ error_msg = "Only CSV files are supported."
6635
+
6636
+ # --- Read CSV into a DataFrame ---
6637
+ if not error_msg:
6638
+ try:
6639
+ df = pd.read_csv(file)
6640
+ except Exception as e:
6641
+ error_msg = f"Could not read CSV: {e}"
6642
+
6643
+ # --- Parse target percentage ---
6644
+ pct = None
6645
+ if not error_msg:
6646
+ try:
6647
+ pct = float(target_pct_raw)
6648
+ except Exception:
6649
+ error_msg = "Target size must be a number between 1 and 100."
6650
+
6651
+ if not error_msg and (pct <= 0 or pct > 100):
6652
+ error_msg = "Target size must be between 1 and 100."
6653
+
6654
+ if error_msg:
6655
+ flash(error_msg, "error")
6656
+ else:
6657
+ frac = pct / 100.0
6658
+ n_orig = len(df)
6659
+ n_target = max(1, int(round(n_orig * frac)))
6660
+
6661
+ df_resized = None
6662
+ used_strat = False
6663
+
6664
+ # --- Advanced: stratified sampling by a column (behind 'Show advanced options') ---
6665
+ if strat_col and strat_col in df.columns and n_orig > 0:
6666
+ used_strat = True
6667
+ groups = df.groupby(strat_col, sort=False)
6668
+
6669
+ # First pass: proportional allocation with rounding and minimum 1 per non-empty group
6670
+ allocations = {}
6671
+ total_alloc = 0
6672
+ for key, group in groups:
6673
+ size = len(group)
6674
+ if size <= 0:
6675
+ allocations[key] = 0
6676
+ continue
6677
+ alloc = int(round(size * frac))
6678
+ if alloc == 0 and size > 0:
6679
+ alloc = 1
6680
+ if alloc > size:
6681
+ alloc = size
6682
+ allocations[key] = alloc
6683
+ total_alloc += alloc
6684
+
6685
+ keys = list(allocations.keys())
6686
+
6687
+ # Adjust downwards if we overshot
6688
+ if total_alloc > n_target:
6689
+ idx = 0
6690
+ while total_alloc > n_target and any(v > 1 for v in allocations.values()):
6691
+ k = keys[idx % len(keys)]
6692
+ if allocations[k] > 1:
6693
+ allocations[k] -= 1
6694
+ total_alloc -= 1
6695
+ idx += 1
6696
+
6697
+ # Adjust upwards if we undershot and we still have room in groups
6698
+ if total_alloc < n_target and keys:
6699
+ idx = 0
6700
+ while total_alloc < n_target:
6701
+ k = keys[idx % len(keys)]
6702
+ group_size = len(groups.get_group(k))
6703
+ if allocations[k] < group_size:
6704
+ allocations[k] += 1
6705
+ total_alloc += 1
6706
+ idx += 1
6707
+ if idx > len(keys) * 3:
6708
+ break
6709
+
6710
+ sampled_parts = []
6711
+ for key, group in groups:
6712
+ n_g = allocations.get(key, 0)
6713
+ if n_g > 0:
6714
+ sampled_parts.append(group.sample(n=n_g, random_state=0))
6715
+
6716
+ if sampled_parts:
6717
+ df_resized = (
6718
+ pd.concat(sampled_parts, axis=0)
6719
+ .sample(frac=1.0, random_state=0)
6720
+ .reset_index(drop=True)
6721
+ )
6722
+
6723
+ # --- Default: simple random sample over all rows ---
6724
+ if df_resized is None:
6725
+ if n_target >= n_orig:
6726
+ df_resized = df.copy()
6727
+ else:
6728
+ df_resized = df.sample(n=n_target, random_state=0).reset_index(drop=True)
6729
+ if strat_col and strat_col not in df.columns:
6730
+ flash(
6731
+ f"Column '{strat_col}' not found. Used simple random sampling instead.",
6732
+ "warning",
6733
+ )
6734
+
6735
+ # --- Serialise to CSV in memory and stash in _last_resized_csv ---
6736
+ buf = _std_io.BytesIO()
6737
+ df_resized.to_csv(buf, index=False)
6738
+ buf.seek(0)
6739
+ _last_resized_csv[resize_id] = buf.getvalue()
6740
+
6741
+ resize_info = {
6742
+ "rows_in": n_orig,
6743
+ "rows_out": len(df_resized),
6744
+ "pct": pct,
6745
+ "used_strat": used_strat,
6746
+ "strat_col": strat_col if used_strat else "",
6747
+ }
6748
+ flash("Dataset resized successfully. Use the download link below.", "success")
6749
+
6750
+ return render_template("dataset_resize.html", resize_info=resize_info)
6751
+
6752
+ @smx.app.route("/dataset/resize/download", methods=["GET"])
6753
+ def download_resized_dataset():
6754
+ """Download the last resized dataset for this browser session as a CSV."""
6755
+ resize_id = session.get("dataset_resize_id")
6756
+ if not resize_id:
6757
+ return ("No resized dataset available.", 404)
6758
+
6759
+ data = _last_resized_csv.get(resize_id)
6760
+ if not data:
6761
+ return ("No resized dataset available.", 404)
6762
+
6763
+ buf = _std_io.BytesIO(data)
6764
+ buf.seek(0)
6765
+ stamp = datetime.now().strftime("%Y%m%d-%H%M%S-%f")
6766
+ filename = f"resized_dataset_{stamp}.csv"
6767
+
6768
+ # Drop it from memory once downloaded
6769
+ _last_resized_csv.pop(resize_id, None)
6770
+
6771
+ return send_file(
6772
+ buf,
6773
+ mimetype="text/csv; charset=utf-8",
6774
+ as_attachment=True,
6775
+ download_name=filename,
6776
+ )
6777
+
6591
6778
 
6592
6779
  def _pdf_fallback_reportlab(full_html: str):
6593
6780
  """ReportLab fallback: extract text + base64 <img> and lay them out."""
@@ -43,7 +43,7 @@ PROVIDERS_MODELS = {
43
43
  #5
44
44
  "moonshot": [
45
45
  "kimi-k2-thinking",
46
- "kimi-k2",
46
+ "kimi-k2-instruct",
47
47
  ],
48
48
  #6
49
49
  "alibaba": [
@@ -59,7 +59,6 @@ PROVIDERS_MODELS = {
59
59
  "claude-sonnet-4-5",
60
60
  "claude-sonnet-4-0",
61
61
  "claude-3-5-haiku-latest",
62
- "claude-3-haiku-20240307",
63
62
  ]
64
63
  }
65
64
 
@@ -160,31 +160,18 @@
160
160
  padding:0.2rem;
161
161
  }
162
162
  .del-btn:hover { opacity:0.8; background:red; }
163
-
164
- /* full-screen overlay */
165
- #loader-overlay {
166
- position: fixed;
167
- top: 0; left: 0;
168
- width: 100%; height: 100%;
169
- background: rgba(241, 235, 235, 0);
170
- display: none;
163
+
164
+ /* Make the Explore Data submit button compact instead of full-width */
165
+ .eda-submit-btn {
166
+ align-self: flex-start; /* stop flex from stretching it to 100% */
167
+ width: auto; /* shrink to content */
168
+ min-width: 7.5rem; /* tweak this if you want it smaller/larger */
169
+ padding: 6px 16px; /* a bit tighter than the global button */
170
+ display: inline-flex;
171
171
  align-items: center;
172
172
  justify-content: center;
173
- z-index: 9999;
174
- }
175
- /* simple spinner */
176
- .loader {
177
- border: 8px solid #eee;
178
- border-top: 8px solid #333;
179
- border-radius: 50%;
180
- width: 60px; height: 60px;
181
- animation: spin 1s linear infinite;
182
173
  }
183
- @keyframes spin {
184
- 0% { transform: rotate(0deg); }
185
- 100% { transform: rotate(360deg); }
186
- }
187
-
174
+
188
175
  /* --- Mobile fixes --- */
189
176
  .dashboard-content img,
190
177
  .dashboard-content canvas,
@@ -618,24 +605,92 @@
618
605
  details > summary::-webkit-details-marker {
619
606
  display: none;
620
607
  }
608
+
609
+ /* Spinner that sits inside the Explore Data submit button */
610
+ .eda-btn-spinner {
611
+ display: none;
612
+ width: 1.1rem;
613
+ height: 1.1rem;
614
+ border-radius: 999px;
615
+ border: 2px solid currentColor;
616
+ border-top-color: transparent;
617
+ border-right-color: transparent;
618
+ animation: edaBtnSpin 0.7s linear infinite;
619
+ margin-left: 0; /* was 0.5rem */
620
+ vertical-align: middle;
621
+ box-sizing: border-box;
622
+ }
623
+ .eda-btn-label {
624
+ display: inline-block;
625
+ margin-right: 0.5rem; /* space between text and spinner when both are visible */
626
+ }
627
+
628
+ /* While loading: hide the label, show the spinner */
629
+ .eda-btn-loading .eda-btn-label {
630
+ display: none; /* keeps button width stable */
631
+ }
632
+
633
+ .eda-btn-loading .eda-btn-spinner {
634
+ display: inline-block;
635
+ }
636
+
637
+ @keyframes edaBtnSpin {
638
+ to {
639
+ transform: rotate(360deg);
640
+ }
641
+ }
642
+ .sidebar-links {
643
+ margin-top: 22px;
644
+ }
645
+ .sidebar-links a {
646
+ display: block;
647
+ padding: 12px 10px 12px 0;
648
+ color: #333;
649
+ text-decoration: none;
650
+ font-size: clamp(0.93rem, 2vw, 1.08rem);
651
+ border-radius: 6px;
652
+ margin-bottom: 6px;
653
+ transition: background 0.2s, color 0.2s;
654
+ }
655
+ .sidebar-links a.active,
656
+ .sidebar-links a:hover {
657
+ background: #e0e5ee;
658
+ color: #007acc;
659
+ font-weight: bold;
660
+ }
661
+ /* Sub-links under a main section (e.g. Explore → Resize dataset) */
662
+ .sidebar-links a.sidebar-sub-link {
663
+ font-size: clamp(0.80rem, 1.4vw, 0.95rem); /* smaller than main items */
664
+ padding: 6px 10px 6px 14px; /* slight indent to show hierarchy */
665
+ margin-bottom: 4px;
666
+ opacity: 0.95;
667
+ }
668
+
669
+ /* Optional: visual cue arrow for sub-items */
670
+ .sidebar-links a.sidebar-sub-link::before {
671
+ content: "↳ ";
672
+ font-size: 0.8em;
673
+ opacity: 0.7;
674
+ }
621
675
  </style>
622
676
 
623
677
  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
624
678
  </head>
625
679
  <body>
626
680
  <div id="sidebarScrim" class="sidebar-scrim" aria-hidden="true"></div>
627
- <div id="loader-overlay">
628
- <div class="loader"></div>
629
- </div>
630
681
  <div class="dashboard-sidebar">
631
682
  <button class="sidebar-close" aria-label="Close menu">✕</button>
632
683
  <h2>ML&nbsp;Lab</h2>
633
684
  <a href="/">return to home</a>
634
685
  <div class="sidebar-links">
635
686
  <a href="/dashboard?section=explore"{% if section == 'explore' %} class="active"{% endif %}>Explore</a>
636
-
687
+
688
+ <!-- Explore subsets -->
689
+ <a href="{{ url_for('dataset_resize') }}" class="sidebar-sub-link">Resize dataset</a>
690
+
637
691
  <!-- Future: more links here -->
638
692
  </div>
693
+
639
694
  </div>
640
695
  <div class="dashboard-main">
641
696
  <button id="sidebarToggle" class="sidebar-toggle" aria-label="Open menu"></button>
@@ -691,11 +746,18 @@
691
746
  <h2>Explore Data</h2>
692
747
  <form id="form-askai" method="post" action="/dashboard?section=explore" style="margin-top:5px;padding:12px; border:1px solid grey;border-radius:5px;width:70vw;">
693
748
  <input type="hidden" name="dataset" value="{{ selected_dataset }}">
694
- <label for="askai"><strong>Ask smxAI:</strong></label>
695
- <textarea id="askai" name="askai_question" type="text" rows="4"
696
- style="position:relative; width:90%; padding:8px; font-size:0.8em; border-radius:8px;"
697
- placeholder="Ask me about {{ (selected_dataset or 'your dataset. Upload it first.').replace('_', ' ').replace('.csv', '') }}" required></textarea>
698
- <button type="submit" style="font-size:1.2rem; width:8rem; padding:4px;">Submit</button>
749
+ <label for="askai"><strong>Ask {{ smxAI }}:</strong></label>
750
+ <textarea id="askai" name="askai_question" type="text" rows="5"
751
+ style="position:relative; width:90%; padding:16px; font-size:0.8em; border-radius:8px;"
752
+ placeholder="Ask me about {{ (selected_dataset or 'your dataset\n. But upload it first.').replace('_', ' ').replace('.csv', '') }}" required></textarea>
753
+ <!-- <button type="submit" style="font-size:1.2rem; width:8rem; padding:4px;">Submit</button> -->
754
+ <button
755
+ type="submit"
756
+ class="btn btn-primary eda-submit-btn"
757
+ >
758
+ <span class="eda-btn-label">Submit</span>
759
+ <span class="eda-btn-spinner" aria-hidden="true"></span>
760
+ </button>
699
761
  </form>
700
762
  <div style="margin-bottom: 36px;">
701
763
  {% if askai_question %}
@@ -735,6 +797,7 @@
735
797
  {% endif %}
736
798
  {% if ai_outputs %}
737
799
  <div class="d-flex align-items-center justify-content-between" style="margin: 12px;">
800
+ <br>
738
801
  <h3 class="m-0">Result</h3>
739
802
  {% for html_block in ai_outputs %}
740
803
  <div class="ai-output" style="margin-bottom:18px;overflow-x:auto; max-width:100%;">
@@ -787,31 +850,31 @@
787
850
  </div>
788
851
  </div>
789
852
  <script>
790
- async function copyCode(btn){
791
- const pre = btn.parentElement.querySelector('pre');
792
- if(!pre) return;
793
- const text = pre.innerText;
794
-
795
- try {
796
- if (navigator.clipboard && navigator.clipboard.writeText) {
797
- await navigator.clipboard.writeText(text);
798
- } else {
799
- // fallback
800
- const range = document.createRange();
801
- range.selectNodeContents(pre);
802
- const sel = window.getSelection();
803
- sel.removeAllRanges(); sel.addRange(range);
804
- document.execCommand('copy');
805
- sel.removeAllRanges();
806
- }
807
- btn.textContent = 'Copied!';
808
- setTimeout(()=>btn.textContent='Copy', 1200);
809
- } catch(e){
810
- btn.textContent = 'Failed';
811
- setTimeout(()=>btn.textContent='Copy', 1200);
853
+ async function copyCode(btn){
854
+ const pre = btn.parentElement.querySelector('pre');
855
+ if(!pre) return;
856
+ const text = pre.innerText;
857
+
858
+ try {
859
+ if (navigator.clipboard && navigator.clipboard.writeText) {
860
+ await navigator.clipboard.writeText(text);
861
+ } else {
862
+ // fallback
863
+ const range = document.createRange();
864
+ range.selectNodeContents(pre);
865
+ const sel = window.getSelection();
866
+ sel.removeAllRanges(); sel.addRange(range);
867
+ document.execCommand('copy');
868
+ sel.removeAllRanges();
869
+ }
870
+ btn.textContent = 'Copied!';
871
+ setTimeout(()=>btn.textContent='Copy', 1200);
872
+ } catch(e){
873
+ btn.textContent = 'Failed';
874
+ setTimeout(()=>btn.textContent='Copy', 1200);
875
+ }
812
876
  }
813
- }
814
- </script>
877
+ </script>
815
878
  <script>
816
879
  function toggleCodeCell(link) {
817
880
  var cell = link.nextElementSibling;
@@ -824,6 +887,7 @@
824
887
  link.querySelector("span").innerText = "Show Code";
825
888
  }
826
889
  }
890
+
827
891
  function copyCodeToClipboard(btn) {
828
892
  var pre = btn.parentElement.querySelector("pre");
829
893
  if (!pre) return;
@@ -842,13 +906,36 @@
842
906
  }
843
907
  sel.removeAllRanges();
844
908
  }
845
-
846
- document.addEventListener("DOMContentLoaded", () => {
847
- const form = document.getElementById("form-askai");
848
- const overlay = document.getElementById("loader-overlay");
849
- form.addEventListener("submit", () => {
850
- overlay.style.display = "flex";
909
+
910
+ document.addEventListener("DOMContentLoaded", function () {
911
+ var form = document.getElementById("form-askai");
912
+ if (!form) return;
913
+
914
+ var submitBtn = form.querySelector(".eda-submit-btn");
915
+ if (!submitBtn) return;
916
+
917
+ // When the form actually submits, show the spinner and disable the button
918
+ form.addEventListener("submit", function () {
919
+ submitBtn.classList.add("eda-btn-loading");
920
+ submitBtn.disabled = true;
921
+ // IMPORTANT: no preventDefault here – the browser/htmx still submits normally
851
922
  });
923
+
924
+ // If htmx is used on this form, reset the spinner once the request completes or errors
925
+ if (window.htmx) {
926
+ form.addEventListener("htmx:afterRequest", function () {
927
+ submitBtn.classList.remove("eda-btn-loading");
928
+ submitBtn.disabled = false;
929
+ });
930
+ form.addEventListener("htmx:responseError", function () {
931
+ submitBtn.classList.remove("eda-btn-loading");
932
+ submitBtn.disabled = false;
933
+ });
934
+ form.addEventListener("htmx:sendError", function () {
935
+ submitBtn.classList.remove("eda-btn-loading");
936
+ submitBtn.disabled = false;
937
+ });
938
+ }
852
939
  });
853
940
  </script>
854
941
  <script>