syntaxmatrix 2.5.6.1__py3-none-any.whl → 2.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@ def token_calculator(total_input_content, llm_profile):
42
42
  input_prompt_tokens = len(enc.encode(total_input_content))
43
43
  return input_prompt_tokens
44
44
 
45
- def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1, max_tokens=4096):
45
+ def mlearning_agent(user_prompt, system_prompt, coding_profile):
46
46
  """
47
47
  Returns:
48
48
  (text, usage_dict)
@@ -95,72 +95,41 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
95
95
  # Google
96
96
  def google_generate_code():
97
97
  nonlocal usage
98
- """
99
- Generates content using the Gemini API and calculates token usage
100
- including Context Overhead for consistency.
101
- """
102
-
103
- try:
104
- # 1. Client Initialization
105
- config = types.GenerateContentConfig(
106
- system_instruction=system_prompt,
107
- temperature=temperature,
108
- max_output_tokens=max_tokens,
109
- )
98
+ config = types.GenerateContentConfig(
99
+ system_instruction=system_prompt,
100
+ # Optional: Force the model to generate a Python code block as JSON
101
+ response_mime_type="application/json",
102
+ response_schema=types.Schema(
103
+ type=types.Type.OBJECT,
104
+ properties={
105
+ "code": types.Schema(type=types.Type.STRING, description="The runnable Python code."),
106
+ "explanation": types.Schema(type=types.Type.STRING, description="A brief explanation of the code."),
107
+ },
108
+ required=["code"]
109
+ ),
110
+ )
110
111
 
111
- # 2. API Call
112
- resp = _client.models.generate_content(
112
+ try:
113
+ response = _client.models.generate_content(
113
114
  model=_model,
114
- contents=[user_prompt],
115
+ contents=user_prompt,
115
116
  config=config,
116
117
  )
118
+ except Exception as e:
119
+ return f"An error occurred during API call: {e}"
117
120
 
118
- # 3. Token Usage Capture and Context Overhead Calculation
119
- um = resp.usage_metadata
120
- usage["input_tokens"] = um.prompt_token_count
121
- usage["output_tokens"] = um.thoughts_token_count
122
- usage["total_tokens"] = um.total_token_count
123
-
124
- # 4. Response Extraction (same robust logic as before)
125
- text = getattr(resp, "text", None)
126
- if isinstance(text, str) and text.strip():
127
- return text.strip()
128
-
129
- chunks = []
130
- candidates = getattr(resp, "candidates", None) or []
131
- for cand in candidates:
132
- content = getattr(cand, "content", None)
133
- if content:
134
- parts = getattr(content, "parts", None) or []
135
- for part in parts:
136
- t = getattr(part, "text", None)
137
- if t:
138
- chunks.append(str(t))
139
-
140
- text = "\n".join(chunks).strip()
141
- if text:
142
- return text
143
-
144
- # 5. Handle blocked response
145
- fb = getattr(resp, "prompt_feedback", None)
146
- block_reason = getattr(fb, "block_reason", None) if fb else None
147
- if block_reason and block_reason != types.BlockedReason.REASON_UNSPECIFIED:
148
- raise RuntimeError(f"{_model} blocked the response. Reason: {block_reason.name}")
149
- raise RuntimeError(f"{_model} failed to return content due to insufficient data.")
121
+ # 3. Token Usage Capture and Context Overhead Calculation
122
+ um = response.usage_metadata
123
+ usage["input_tokens"] = um.prompt_token_count
124
+ usage["output_tokens"] = um.candidates_token_count + um.thoughts_token_count
125
+ usage["total_tokens"] = um.total_token_count
150
126
 
151
- except APIError as e:
152
- error_msg = f"Gemini API Error: {e}"
153
-
127
+ try:
128
+ # The response text will be a JSON string due to the config.
129
+ response_json = json.loads(response.text)
130
+ return response_json.get("code", "Error: Code field not found in response.")
154
131
  except Exception as e:
155
- error_msg = f"An unexpected error occurred during API call or processing: {e}"
156
-
157
- # --- Return the error message wrapped in the required output code structure ---
158
- msg = f"I smxAI have instructed {error_msg}\n"
159
- return (
160
- f"# {msg}\n"
161
- "from syntaxmatrix.display import show\n"
162
- f"show({msg!r})\n"
163
- )
132
+ return f"Error parsing response as JSON: {e}\nRaw Response: {response.text}"
164
133
 
165
134
  # OpenAI Responses API
166
135
  def gpt_models_latest_generate_code():
@@ -170,7 +139,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
170
139
  reasoning_effort, verbosity = "medium", "medium"
171
140
  if _model == "gpt-5-nano":
172
141
  reasoning_effort, verbosity = "low", "low"
173
- elif _model in ["gpt-5-mini", "gpt-5-codex-mini"]:
142
+ elif _model in ["gpt-5-mini", "gpt-5-mini-codex"]:
174
143
  reasoning_effort, verbosity = "medium", "medium"
175
144
  elif _model in ["gpt-5", "gpt-5-codex", "gpt-5-pro"]:
176
145
  reasoning_effort, verbosity = "high", "high"
@@ -194,19 +163,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
194
163
 
195
164
  code = _out(resp).strip()
196
165
  if code:
197
- return code
198
-
199
- # Try to surface any block reason (safety / policy / etc.)
200
- block_reason = None
201
- output = resp.get("output")
202
- for item in output:
203
- fr = getattr(item, "finish_reason", None)
204
- if fr and fr != "stop":
205
- block_reason = fr
206
- break
207
- if block_reason:
208
- raise RuntimeError(f"{_model} stopped with reason: {block_reason}")
209
- raise RuntimeError(f"{_model} returned an empty response in this section due to insufficient data.")
166
+ return code
210
167
 
211
168
  except APIError as e:
212
169
  # IMPORTANT: return VALID PYTHON so the dashboard can show the error
@@ -225,15 +182,14 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
225
182
  "from syntaxmatrix.display import show\n"
226
183
  f"show({msg!r})\n"
227
184
  )
228
-
185
+
229
186
  # Anthropic
230
187
  def anthropic_generate_code():
231
188
  nonlocal usage
232
189
  try:
233
190
  resp = _client.messages.create(
234
191
  model=_model,
235
- max_tokens=max_tokens,
236
- temperature=temperature,
192
+ temperature=0,
237
193
  system=system_prompt,
238
194
  messages=[
239
195
  {"role": "user", "content": user_prompt}
@@ -276,40 +232,43 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
276
232
  def openai_sdk_generate_code():
277
233
  nonlocal usage
278
234
  try:
279
- resp = _client.chat.completions.create(
235
+ response = None
236
+ if _model == "deepseek-reasoner":
237
+ response = _client.chat.completions.create(
238
+ model=_model,
239
+ messages=[
240
+ {"role": "system", "content": system_prompt},
241
+ {"role": "user", "content": user_prompt},
242
+ ],
243
+ extra_body={"thinking": {"type": "enabled"}},
244
+ temperature=0,
245
+ stream=False
246
+ )
247
+ else:
248
+ response = _client.chat.completions.create(
280
249
  model=_model,
281
250
  messages=[
282
251
  {"role": "system", "content": system_prompt},
283
252
  {"role": "user", "content": user_prompt},
284
253
  ],
285
- temperature=temperature,
286
- max_tokens=max_tokens,
254
+ temperature=0,
255
+ stream=False
287
256
  )
257
+ content = response.choices[0].message.content
288
258
 
289
-
290
-
291
- um = resp.usage
259
+ um = response.usage
292
260
  usage["input_tokens"] = um.prompt_tokens
293
261
  usage["output_tokens"] = um.completion_tokens
294
262
  usage["total_tokens"] = um.total_tokens
295
263
 
296
- text = resp.choices[0].message.content
297
- if text:
298
- return text
299
-
300
- # Try to surface any block reason (safety / policy / etc.)
301
- block_reason = None
302
- choices = getattr(resp, "choices", None) or []
303
- if choices:
304
- first = choices[0]
305
- fr = getattr(first, "finish_reason", None)
306
- if fr and fr != "stop":
307
- block_reason = fr
308
-
309
- if block_reason:
310
- raise RuntimeError(f"{_model} stopped with reason: {block_reason}")
311
- # Fallback: nothing useful came back
312
- raise RuntimeError(f"{_model} returned nothing in this section due to insufficient data.")
264
+ code_match = re.search(r"```(?:python)?\n(.*?)```", content, re.DOTALL)
265
+
266
+ if code_match:
267
+ return code_match.group(1).strip()
268
+ else:
269
+ # If no markdown blocks are found, return the raw content
270
+ # (assuming the model obeyed instructions to output only code)
271
+ return content.strip()
313
272
 
314
273
  except Exception as e:
315
274
  # IMPORTANT: return VALID PYTHON so the dashboard can show the error
@@ -318,9 +277,7 @@ def mlearning_agent(user_prompt, system_prompt, coding_profile, temperature=0.1,
318
277
  f"# {msg}\n"
319
278
  "from syntaxmatrix.display import show\n"
320
279
  f"show({msg!r})\n"
321
- )
322
-
323
- # print("TTOOKKEENN: ", token_calculator(system_prompt + user_prompt, coding_profile))
280
+ )
324
281
 
325
282
  if _provider == "google":
326
283
  code = google_generate_code()
@@ -427,9 +384,11 @@ def refine_question_agent(raw_question: str, dataset_context: str | None = None)
427
384
 
428
385
  system_prompt = ("""
429
386
  - You are a Machine Learning (ML) and Data Science (DS) expert.
430
- - You rewrite user questions into clear ML job specifications to help AI assistant generate Python code that provides solution to the user question when it is run. Most user questions are vague. So, your goal is to ensure that your output guards the assistant agains making potential errors that you anticipated could arise due to the nature of the question.
431
- - If a dataset summary is provided, use it to respect column and help you rewrite the question properly.
432
- - DO NOT write andy prelude or preamble"
387
+ - Your goal is to use the provided dataset summary to convert given question into clear ML job specifications.
388
+ - Use the provided dataset summary to respect columns and aid you in properly refining the user question.
389
+ - Include chronological outline in order to guide a code generator to avoid falling off tracks.
390
+ - DO NOT include any prelude or preamble. Just the refined tasks.
391
+ - If and only if the dataset summary columns are not relevant to your desired columns that you deduced by analysing the question, and you suspect that the wrong dataset was used in the dataset summary, stop and just say: 'incompatible'.
433
392
  """)
434
393
 
435
394
  user_prompt = f"User question:\n{raw_question}\n\n"
@@ -447,7 +406,7 @@ def refine_question_agent(raw_question: str, dataset_context: str | None = None)
447
406
 
448
407
 
449
408
  def classify_ml_job_agent(refined_question, dataset_profile):
450
-
409
+ import ast
451
410
  def ml_response(user_prompt, system_prompt, profile):
452
411
  _profile = profile # _prof.get_profile["admin"]
453
412
 
@@ -562,8 +521,7 @@ def classify_ml_job_agent(refined_question, dataset_profile):
562
521
  system_prompt = ("""
563
522
  You are a strict machine learning task classifier for an ML workbench.
564
523
  Your goal is to correctly label the user's task specifications with the most relevant tags from a fixed list.
565
- You Must always have 'data_preprocessing' as the 1st tag. Then add up to 4 to make 5 max. Your list, therefore, should have 1-5 tags. If you think a task is too complext for the given context, even if relevant, exclude it.
566
- If no relevant tag, default to "data_preprocessing" and return that alone.
524
+ You Must always have 'data_preprocessing' as the 1st tag. Then add all other relevant tags.
567
525
  You should return only your list of tags, no prelude or preamble.
568
526
  """)
569
527
 
@@ -583,15 +541,14 @@ def classify_ml_job_agent(refined_question, dataset_profile):
583
541
  "generative_modeling", "causal_inference", "risk_modeling", "graph_analysis",
584
542
 
585
543
  # Foundational/Pipeline Steps
586
- "feature_engineering", "statistical_inference", "data_preprocessing",
587
- "model_validation", "hyperparameter_tuning"
544
+ "data_preprocessing", "feature_engineering", "statistical_inference", "clustering", "hyperparameter_tuning"
588
545
  ]
589
546
 
590
547
  # --- 2. Construct the Generalized Prompt for the LLM ---
591
548
  task_description = refined_question
592
549
 
593
550
  user_prompt = f"""
594
- Analyze the following task description:
551
+ Analyze and classify the following task description:
595
552
  ---
596
553
  {task_description}
597
554
  ---
@@ -604,7 +561,7 @@ def classify_ml_job_agent(refined_question, dataset_profile):
604
561
  ML Jobs List: {', '.join(ml_task_list)}
605
562
 
606
563
  Respond ONLY with a valid JSON array of strings containing the selected ML job names.
607
- Example Response: ["natural_language_processing", "classification", "feature_engineering"]
564
+ Example Response: ["data_preprocessing", "regression", "classification", "feature_engineering"]
608
565
  """
609
566
 
610
567
  if dataset_profile:
@@ -612,13 +569,20 @@ def classify_ml_job_agent(refined_question, dataset_profile):
612
569
 
613
570
  llm_profile = _prof.get_profile("classification") or _prof.get_profile("admin")
614
571
  if not llm_profile:
615
- return "ERROR"
572
+ return (
573
+ "<div class='smx-alert smx-alert-warn'>"
574
+ "No LLM profile is configured for Classification. Please, do that in the Admin panel or contact your Administrator."
575
+ "</div>"
576
+ )
577
+
616
578
 
617
579
  llm_profile['client'] = _prof.get_client(llm_profile)
618
580
 
619
- # Extract raw content
620
581
  tasks = ml_response(user_prompt, system_prompt, llm_profile)
621
- return tasks
582
+ try:
583
+ return ast.literal_eval(tasks)
584
+ except Exception:
585
+ return tasks
622
586
 
623
587
 
624
588
  def text_formatter_agent(text):
syntaxmatrix/core.py CHANGED
@@ -599,7 +599,7 @@ class SyntaxMUI:
599
599
  from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
600
600
 
601
601
  if not self._classification_profile:
602
- classification_profile = _prof.get_profile('classification') or _prof.get_profile('chat') or _prof.get_profile('admin')
602
+ classification_profile = _prof.get_profile('classification') or _prof.get_profile('admin')
603
603
  if not classification_profile:
604
604
  return {"Error": "Set a profile for Classification"}
605
605
  self._classification_profile = classification_profile
@@ -1317,11 +1317,11 @@ class SyntaxMUI:
1317
1317
  """)
1318
1318
 
1319
1319
  if not self._coding_profile:
1320
- coding_profile = _prof.get_profile("coding") or _prof.get_profile("admin")
1320
+ coding_profile = _prof.get_profile("coding") # or _prof.get_profile("admin")
1321
1321
  if not coding_profile:
1322
1322
  return (
1323
1323
  '<div class="smx-alert smx-alert-warn">'
1324
- 'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
1324
+ 'No LLM profile configured for <code>coding</code> <br>'
1325
1325
  'Please, add the LLM profile inside the admin panel or contact your Administrator.'
1326
1326
  '</div>'
1327
1327
  )
syntaxmatrix/routes.py CHANGED
@@ -65,6 +65,7 @@ _CLIENT_DIR = detect_project_root()
65
65
  _stream_q = queue.Queue()
66
66
  _stream_cancelled = {}
67
67
  _last_result_html = {} # { session_id: html_doc }
68
+ _last_resized_csv = {} # { resize_id: bytes for last resized CSV per browser session }
68
69
 
69
70
  # single, reused formatter: inline styles, padding, rounded corners, scroll
70
71
  _FMT = _HtmlFmt(
@@ -3047,7 +3048,7 @@ def setup_routes(smx):
3047
3048
  }) + "\n\n"
3048
3049
 
3049
3050
  except GeneratorExit:
3050
- smx.info("Client aborted the stream.")
3051
+ return "Client aborted the stream."
3051
3052
  except Exception as e:
3052
3053
  smx.error(f"Stream error: {e}")
3053
3054
  yield "data: " + json.dumps({"event": "error", "error": str(e)}) + "\n\n"
@@ -5610,8 +5611,19 @@ def setup_routes(smx):
5610
5611
  dataset_profile = f"modality: tabular; columns: {columns_summary}"
5611
5612
 
5612
5613
  refined_question = refine_question_agent(askai_question, dataset_context)
5613
- tags = classify_ml_job_agent(refined_question, dataset_profile)
5614
-
5614
+ tags = []
5615
+ if refined_question.lower() == "incompatible" or refined_question.lower() == "mismatch":
5616
+ return ("""
5617
+ <div style="position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
5618
+ <h1 style="margin: 0 0 10px 0;">Oops: Context mismatch</h1>
5619
+ <p style="margin: 0;">Please, upload the proper dataset for solution to your query.</p>
5620
+ <br>
5621
+ <a class='button' href='/dashboard' style='text-decoration:none;'>Return</a>
5622
+ </div>
5623
+ """)
5624
+ else:
5625
+ tags = classify_ml_job_agent(refined_question, dataset_profile)
5626
+
5615
5627
  ai_code = smx.ai_generate_code(refined_question, tags, df)
5616
5628
  llm_usage = smx.get_last_llm_usage()
5617
5629
  ai_code = auto_inject_template(ai_code, tags, df)
@@ -6513,8 +6525,8 @@ def setup_routes(smx):
6513
6525
  cell["highlighted_code"] = Markup(_pygmentize(cell["code"]))
6514
6526
 
6515
6527
  highlighted_ai_code = _pygmentize(ai_code)
6516
- tasks = [tag.replace("_", " ").replace('"', '').capitalize() for tag in tags]
6517
-
6528
+ smxAI = "Orion"
6529
+
6518
6530
  return render_template(
6519
6531
  "dashboard.html",
6520
6532
  section=section,
@@ -6525,7 +6537,8 @@ def setup_routes(smx):
6525
6537
  highlighted_ai_code=highlighted_ai_code if ai_code else None,
6526
6538
  askai_question=smx.sanitize_rough_to_markdown_task(askai_question),
6527
6539
  refined_question=refined_question,
6528
- tasks=tasks,
6540
+ tasks=tags,
6541
+ smxAI=smxAI,
6529
6542
  data_cells=data_cells,
6530
6543
  session_id=session_id,
6531
6544
  llm_usage=llm_usage
@@ -6589,6 +6602,179 @@ def setup_routes(smx):
6589
6602
  # go back to the dashboard; dashboard() will auto-select the next file
6590
6603
  return redirect(url_for("dashboard"))
6591
6604
 
6605
+ # ── DATASET RESIZE (independent helper page) -------------------------
6606
+
6607
+
6608
+ @smx.app.route("/dataset/resize", methods=["GET", "POST"])
6609
+ def dataset_resize():
6610
+ """
6611
+ User uploads any CSV and picks a target size (percentage of rows).
6612
+ We keep the last resized CSV in memory and expose a download link.
6613
+ """
6614
+ # One id per browser session to index _last_resized_csv
6615
+ resize_id = session.get("dataset_resize_id")
6616
+ if not resize_id:
6617
+ resize_id = str(uuid.uuid4())
6618
+ session["dataset_resize_id"] = resize_id
6619
+
6620
+ resize_info = None # stats we pass down to the template
6621
+
6622
+ if request.method == "POST":
6623
+ file = request.files.get("dataset_file")
6624
+ target_pct_raw = (request.form.get("target_pct") or "").strip()
6625
+ strat_col = (request.form.get("strat_col") or "").strip()
6626
+
6627
+ error_msg = None
6628
+ df = None
6629
+
6630
+ # --- Basic validation ---
6631
+ if not file or file.filename == "":
6632
+ error_msg = "Please choose a CSV file."
6633
+ elif not file.filename.lower().endswith(".csv"):
6634
+ error_msg = "Only CSV files are supported."
6635
+
6636
+ # --- Read CSV into a DataFrame ---
6637
+ if not error_msg:
6638
+ try:
6639
+ df = pd.read_csv(file)
6640
+ except Exception as e:
6641
+ error_msg = f"Could not read CSV: {e}"
6642
+
6643
+ # --- Parse target percentage ---
6644
+ pct = None
6645
+ if not error_msg:
6646
+ try:
6647
+ pct = float(target_pct_raw)
6648
+ except Exception:
6649
+ error_msg = "Target size must be a number between 1 and 100."
6650
+
6651
+ if not error_msg and (pct <= 0 or pct > 100):
6652
+ error_msg = "Target size must be between 1 and 100."
6653
+
6654
+ if error_msg:
6655
+ flash(error_msg, "error")
6656
+ else:
6657
+ frac = pct / 100.0
6658
+ n_orig = len(df)
6659
+ n_target = max(1, int(round(n_orig * frac)))
6660
+
6661
+ df_resized = None
6662
+ used_strat = False
6663
+
6664
+ # --- Advanced: stratified sampling by a column (behind 'Show advanced options') ---
6665
+ if strat_col and strat_col in df.columns and n_orig > 0:
6666
+ used_strat = True
6667
+ groups = df.groupby(strat_col, sort=False)
6668
+
6669
+ # First pass: proportional allocation with rounding and minimum 1 per non-empty group
6670
+ allocations = {}
6671
+ total_alloc = 0
6672
+ for key, group in groups:
6673
+ size = len(group)
6674
+ if size <= 0:
6675
+ allocations[key] = 0
6676
+ continue
6677
+ alloc = int(round(size * frac))
6678
+ if alloc == 0 and size > 0:
6679
+ alloc = 1
6680
+ if alloc > size:
6681
+ alloc = size
6682
+ allocations[key] = alloc
6683
+ total_alloc += alloc
6684
+
6685
+ keys = list(allocations.keys())
6686
+
6687
+ # Adjust downwards if we overshot
6688
+ if total_alloc > n_target:
6689
+ idx = 0
6690
+ while total_alloc > n_target and any(v > 1 for v in allocations.values()):
6691
+ k = keys[idx % len(keys)]
6692
+ if allocations[k] > 1:
6693
+ allocations[k] -= 1
6694
+ total_alloc -= 1
6695
+ idx += 1
6696
+
6697
+ # Adjust upwards if we undershot and we still have room in groups
6698
+ if total_alloc < n_target and keys:
6699
+ idx = 0
6700
+ while total_alloc < n_target:
6701
+ k = keys[idx % len(keys)]
6702
+ group_size = len(groups.get_group(k))
6703
+ if allocations[k] < group_size:
6704
+ allocations[k] += 1
6705
+ total_alloc += 1
6706
+ idx += 1
6707
+ if idx > len(keys) * 3:
6708
+ break
6709
+
6710
+ sampled_parts = []
6711
+ for key, group in groups:
6712
+ n_g = allocations.get(key, 0)
6713
+ if n_g > 0:
6714
+ sampled_parts.append(group.sample(n=n_g, random_state=0))
6715
+
6716
+ if sampled_parts:
6717
+ df_resized = (
6718
+ pd.concat(sampled_parts, axis=0)
6719
+ .sample(frac=1.0, random_state=0)
6720
+ .reset_index(drop=True)
6721
+ )
6722
+
6723
+ # --- Default: simple random sample over all rows ---
6724
+ if df_resized is None:
6725
+ if n_target >= n_orig:
6726
+ df_resized = df.copy()
6727
+ else:
6728
+ df_resized = df.sample(n=n_target, random_state=0).reset_index(drop=True)
6729
+ if strat_col and strat_col not in df.columns:
6730
+ flash(
6731
+ f"Column '{strat_col}' not found. Used simple random sampling instead.",
6732
+ "warning",
6733
+ )
6734
+
6735
+ # --- Serialise to CSV in memory and stash in _last_resized_csv ---
6736
+ buf = _std_io.BytesIO()
6737
+ df_resized.to_csv(buf, index=False)
6738
+ buf.seek(0)
6739
+ _last_resized_csv[resize_id] = buf.getvalue()
6740
+
6741
+ resize_info = {
6742
+ "rows_in": n_orig,
6743
+ "rows_out": len(df_resized),
6744
+ "pct": pct,
6745
+ "used_strat": used_strat,
6746
+ "strat_col": strat_col if used_strat else "",
6747
+ }
6748
+ flash("Dataset resized successfully. Use the download link below.", "success")
6749
+
6750
+ return render_template("dataset_resize.html", resize_info=resize_info)
6751
+
6752
+ @smx.app.route("/dataset/resize/download", methods=["GET"])
6753
+ def download_resized_dataset():
6754
+ """Download the last resized dataset for this browser session as a CSV."""
6755
+ resize_id = session.get("dataset_resize_id")
6756
+ if not resize_id:
6757
+ return ("No resized dataset available.", 404)
6758
+
6759
+ data = _last_resized_csv.get(resize_id)
6760
+ if not data:
6761
+ return ("No resized dataset available.", 404)
6762
+
6763
+ buf = _std_io.BytesIO(data)
6764
+ buf.seek(0)
6765
+ stamp = datetime.now().strftime("%Y%m%d-%H%M%S-%f")
6766
+ filename = f"resized_dataset_{stamp}.csv"
6767
+
6768
+ # Drop it from memory once downloaded
6769
+ _last_resized_csv.pop(resize_id, None)
6770
+
6771
+ return send_file(
6772
+ buf,
6773
+ mimetype="text/csv; charset=utf-8",
6774
+ as_attachment=True,
6775
+ download_name=filename,
6776
+ )
6777
+
6592
6778
 
6593
6779
  def _pdf_fallback_reportlab(full_html: str):
6594
6780
  """ReportLab fallback: extract text + base64 <img> and lay them out."""
@@ -36,12 +36,14 @@ PROVIDERS_MODELS = {
36
36
 
37
37
  ],
38
38
  #4
39
- "deepseek": [
39
+ "deepseek": [
40
+ "deepseek-reasoner",
40
41
  "deepseek-chat",
41
42
  ],
42
43
  #5
43
44
  "moonshot": [
44
- "kimi-k2-0905-preview",
45
+ "kimi-k2-thinking",
46
+ "kimi-k2-instruct",
45
47
  ],
46
48
  #6
47
49
  "alibaba": [
@@ -57,7 +59,6 @@ PROVIDERS_MODELS = {
57
59
  "claude-sonnet-4-5",
58
60
  "claude-sonnet-4-0",
59
61
  "claude-3-5-haiku-latest",
60
- "claude-3-haiku-20240307",
61
62
  ]
62
63
  }
63
64