syntaxmatrix 1.4.6__py3-none-any.whl → 2.5.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. syntaxmatrix/__init__.py +13 -8
  2. syntaxmatrix/agentic/__init__.py +0 -0
  3. syntaxmatrix/agentic/agent_tools.py +24 -0
  4. syntaxmatrix/agentic/agents.py +810 -0
  5. syntaxmatrix/agentic/code_tools_registry.py +37 -0
  6. syntaxmatrix/agentic/model_templates.py +1790 -0
  7. syntaxmatrix/auth.py +308 -14
  8. syntaxmatrix/commentary.py +328 -0
  9. syntaxmatrix/core.py +993 -375
  10. syntaxmatrix/dataset_preprocessing.py +218 -0
  11. syntaxmatrix/db.py +92 -95
  12. syntaxmatrix/display.py +95 -121
  13. syntaxmatrix/generate_page.py +634 -0
  14. syntaxmatrix/gpt_models_latest.py +46 -0
  15. syntaxmatrix/history_store.py +26 -29
  16. syntaxmatrix/kernel_manager.py +96 -17
  17. syntaxmatrix/llm_store.py +1 -1
  18. syntaxmatrix/plottings.py +6 -0
  19. syntaxmatrix/profiles.py +64 -8
  20. syntaxmatrix/project_root.py +55 -43
  21. syntaxmatrix/routes.py +5072 -1398
  22. syntaxmatrix/session.py +19 -0
  23. syntaxmatrix/settings/logging.py +40 -0
  24. syntaxmatrix/settings/model_map.py +300 -33
  25. syntaxmatrix/settings/prompts.py +273 -62
  26. syntaxmatrix/settings/string_navbar.py +3 -3
  27. syntaxmatrix/static/docs.md +272 -0
  28. syntaxmatrix/static/icons/favicon.png +0 -0
  29. syntaxmatrix/static/icons/hero_bg.jpg +0 -0
  30. syntaxmatrix/templates/dashboard.html +608 -147
  31. syntaxmatrix/templates/docs.html +71 -0
  32. syntaxmatrix/templates/error.html +2 -3
  33. syntaxmatrix/templates/login.html +1 -0
  34. syntaxmatrix/templates/register.html +1 -0
  35. syntaxmatrix/ui_modes.py +14 -0
  36. syntaxmatrix/utils.py +2482 -159
  37. syntaxmatrix/vectorizer.py +16 -12
  38. {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/METADATA +20 -17
  39. syntaxmatrix-2.5.5.4.dist-info/RECORD +68 -0
  40. syntaxmatrix/model_templates.py +0 -30
  41. syntaxmatrix/static/icons/favicon.ico +0 -0
  42. syntaxmatrix-1.4.6.dist-info/RECORD +0 -54
  43. {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/WHEEL +0 -0
  44. {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/licenses/LICENSE.txt +0 -0
  45. {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/top_level.txt +0 -0
syntaxmatrix/core.py CHANGED
@@ -1,102 +1,134 @@
1
1
  from __future__ import annotations
2
- import os, webbrowser, uuid, secrets
2
+ import ast
3
+ import textwrap
4
+ import os, webbrowser, uuid, secrets, re
3
5
 
4
- from flask import Flask, session, request, has_request_context
5
- from .history_store import SQLHistoryStore as Store, PersistentHistoryStore as _Store
6
+ from flask import Flask, Response, session, request, has_request_context
7
+ from syntaxmatrix.agentic.agents import mlearning_agent
8
+ from syntaxmatrix.history_store import SQLHistoryStore as Store, PersistentHistoryStore as _Store
6
9
  from collections import OrderedDict
7
10
  from syntaxmatrix.llm_store import save_embed_model, load_embed_model, delete_embed_key
8
11
  from . import db, routes
9
12
  from .themes import DEFAULT_THEMES
10
- from .plottings import render_plotly, pyplot
13
+ from .ui_modes import UI_MODES
14
+ from .plottings import render_plotly, pyplot, describe_plotly
11
15
  from .file_processor import process_admin_pdf_files
12
- from google import genai
13
- from openai import OpenAI
16
+ from google.genai import types
14
17
  from .vector_db import query_embeddings
15
18
  from .vectorizer import embed_text
16
- from syntaxmatrix.settings.prompts import SMX_PROMPT_PROFILE, SMX_PROMPT_INSTRUCTIONS
17
- from typing import List
19
+ from syntaxmatrix.settings.prompts import SMXAI_CHAT_ID, SMXAI_CHAT_INSTRUCTIONS, SMXAI_WEBSITE_DESCRIPTION
20
+ from typing import List, Generator
18
21
  from .auth import init_auth_db
19
- from . import profiles as prof
20
- from syntaxmatrix.utils import strip_describe_slice, drop_bad_classification_metrics
22
+ from . import profiles as _prof
21
23
  from syntaxmatrix.smiv import SMIV
22
24
  from .project_root import detect_project_root
25
+ from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
23
26
  from dotenv import load_dotenv
27
+ from html import unescape
28
+ from .plottings import render_plotly, pyplot, describe_plotly, describe_matplotlib
29
+ from threading import RLock
30
+ from syntaxmatrix.settings.model_map import GPT_MODELS_LATEST
24
31
 
25
32
 
26
33
  # ──────── framework‐local storage paths ────────
27
34
  # this ensures the key & data always live under the package dir,
28
35
  # regardless of where the developer `cd` into before launching.
29
36
  _CLIENT_DIR = detect_project_root()
30
- _HISTORY_DIR = os.path.join(_CLIENT_DIR, "data", "smx_history")
37
+ _HISTORY_DIR = os.path.join(_CLIENT_DIR, "smx_history")
31
38
  os.makedirs(_HISTORY_DIR, exist_ok=True)
32
- _SECRET_PATH = os.path.join(_CLIENT_DIR, "data", ".smx_secret_key")
33
39
 
34
- dotenv_path = os.path.join(str(_CLIENT_DIR.parent), ".env")
40
+ _SECRET_PATH = os.path.join(_CLIENT_DIR, ".smx_secret_key")
35
41
 
36
- if os.path.isfile(dotenv_path):
37
- load_dotenv(dotenv_path, override=True)
42
+ _CLIENT_DOTENV_PATH = os.path.join(str(_CLIENT_DIR.parent), ".env")
43
+ if os.path.isfile(_CLIENT_DOTENV_PATH):
44
+ load_dotenv(_CLIENT_DOTENV_PATH, override=True)
45
+
46
+ _ICONS_PATH = os.path.join(_CLIENT_DIR, "static", "icons")
47
+ os.makedirs(_ICONS_PATH, exist_ok=True)
38
48
 
39
49
  EDA_OUTPUT = {} # global buffer for EDA output by session
40
50
 
41
51
  class SyntaxMUI:
42
- def __init__(
43
- self,
44
- host="127.0.0.1",
45
- port="5050",
52
+ def __init__(self,
53
+ host="127.0.0.1",
54
+ port="5080",
46
55
  user_icon="👩🏿‍🦲",
47
- bot_icon='<img src="../static/icons/favicon.ico" alt="bot icon" width="20"/>',
48
- favicon='<img src="../static/icons/favicon.ico" width="15"/>',
49
- site_logo='<img src="../static/icons/logo.png" width="30" alt="SMX Logo"/>',
56
+ bot_icon="<img src='/static/icons/favicon.png' width=20' alt='bot'/>",
57
+ favicon="/static/icons/favicon.png",
58
+ site_logo="<img src='/static/icons/logo.png' width='30' alt='logo'/>",
50
59
  site_title="SyntaxMatrix",
51
- project_title="smxAI Engine",
52
- theme_name="light"
60
+ project_name="smxAI",
61
+ theme_name="light",
62
+ ui_mode = "default"
53
63
  ):
54
- self.app = Flask(__name__)
55
- self.get_app_secrete()
64
+ self.app = Flask(__name__)
56
65
  self.host = host
57
66
  self.port = port
67
+
68
+ self.get_app_secrete()
58
69
  self.user_icon = user_icon
59
70
  self.bot_icon = bot_icon
71
+ self.site_logo = site_logo
60
72
  self.favicon = favicon
61
73
  self.site_title = site_title
62
- self.site_logo = site_logo
63
- self.project_title = project_title
64
- self.ui_mode = "default"
74
+ self.project_name = project_name
75
+ self.ui_mode = ui_mode
65
76
  self.theme_toggle_enabled = False
66
- self.prompt_profile = SMX_PROMPT_PROFILE
67
- self.prompt_instructions = SMX_PROMPT_INSTRUCTIONS
77
+ self.user_files_enabled = False
78
+ self.smxai_identity = SMXAI_CHAT_ID
79
+ self.smxai_instructions = SMXAI_CHAT_INSTRUCTIONS
80
+ self.website_description = SMXAI_WEBSITE_DESCRIPTION
81
+ self._eda_output = {} # {chat_id: html}
82
+ self._eda_lock = RLock()
83
+
68
84
  db.init_db()
69
85
  self.page = ""
70
86
  self.pages = db.get_pages()
71
87
  init_auth_db()
88
+
72
89
  self.widgets = OrderedDict()
73
90
  self.theme = DEFAULT_THEMES.get(theme_name, DEFAULT_THEMES["light"])
74
91
  self.system_output_buffer = "" # Ephemeral buffer initialized
75
92
  self.app_token = str(uuid.uuid4()) # NEW: Unique token for each app launch.
76
93
  self.admin_pdf_chunks = {} # In-memory store for admin PDF chunks
77
94
  self.user_file_chunks = {} # In-memory store of user‑uploaded chunks, scoped per chat session
95
+
96
+ self._last_llm_usage = None
78
97
  routes.setup_routes(self)
79
98
 
80
- self.chat_profile = None
81
- self.labeller_profile = None
82
- self.classifier_profile = None
83
- self.coder_profile = None
84
- self.summarizer_profile = None
99
+ self._admin_profile = {}
100
+ self._chat_profile = {}
101
+ self._coding_profile = {}
102
+ self._classification_profile = {}
103
+ self._summarization_profile = {}
104
+
105
+ self._gpt_models_latest_prev_resp_ids = {}
106
+ self.is_streaming = False
107
+ self.stream_args = {}
108
+
109
+ self._recent_visual_summaries = []
110
+
111
+ self.placeholder = ""
85
112
 
113
+ @staticmethod
86
114
  def init_app(app):
87
- import os, secrets
115
+ import secrets
88
116
  if not app.secret_key:
89
- app.secret_key = secrets.token_urlsafe(32)
90
-
117
+ app.secret_key = secrets.token_urlsafe(32)
91
118
 
92
- def get_app_secrete(self):
119
+ def get_app_secrete(self):
93
120
  if os.path.exists(_SECRET_PATH):
94
121
  self.app.secret_key = open(_SECRET_PATH, "r", encoding="utf-8").read().strip()
95
122
  else:
96
123
  new_key = secrets.token_urlsafe(32)
97
- open(_SECRET_PATH, "w", encoding="utf-8").write(new_key)
124
+ with open(_SECRET_PATH, "w", encoding="utf-8") as f:
125
+ f.write(new_key)
126
+ try:
127
+ os.chmod(_SECRET_PATH, 0o600)
128
+ except Exception:
129
+ pass
98
130
  self.app.secret_key = new_key
99
-
131
+
100
132
 
101
133
  def _get_visual_context(self):
102
134
  """Return the concatenated summaries for prompt injection."""
@@ -105,15 +137,75 @@ class SyntaxMUI:
105
137
  joined = "\n• " + "\n• ".join(self._recent_visual_summaries)
106
138
  return f"\n\nRecent visualizations:{joined}"
107
139
 
140
+ # add to class
141
+ def _add_visual_summary(self, summary: str) -> None:
142
+ if not summary:
143
+ return
144
+ if not hasattr(self, "_recent_visual_summaries"):
145
+ self._recent_visual_summaries = []
146
+ # keep last 6
147
+ self._recent_visual_summaries = (self._recent_visual_summaries + [summary])[-6:]
108
148
 
109
149
  def set_plottings(self, fig_or_html, note=None):
110
- sid = session.get("current_session", {}).get("id", "default")
150
+ # prefer current chat id; fall back to per-browser sid; finally "default"
151
+ sid = self.get_session_id() or self._sid() or "default"
152
+
153
+ # Clear for this session if empty/falsy
111
154
  if not fig_or_html or (isinstance(fig_or_html, str) and fig_or_html.strip() == ""):
112
- EDA_OUTPUT[sid] = ""
155
+ with self._eda_lock:
156
+ self._eda_output.pop(sid, None)
113
157
  return
114
158
 
115
159
  html = None
116
160
 
161
+ # ---- Plotly Figure support ----
162
+ try:
163
+ import plotly.graph_objs as go
164
+ if isinstance(fig_or_html, go.Figure):
165
+ html = fig_or_html.to_html(full_html=False)
166
+ except ImportError:
167
+ pass
168
+
169
+ # ---- Matplotlib Figure support ----
170
+ if html is None and hasattr(fig_or_html, "savefig"):
171
+ html = pyplot(fig_or_html)
172
+
173
+ # ---- Bytes (PNG etc.) support ----
174
+ if html is None and isinstance(fig_or_html, bytes):
175
+ import base64
176
+ img_b64 = base64.b64encode(fig_or_html).decode()
177
+ html = f"<img src='data:image/png;base64,{img_b64}'/>"
178
+
179
+ # ---- HTML string support ----
180
+ if html is None and isinstance(fig_or_html, str):
181
+ html = fig_or_html
182
+
183
+ if html is None:
184
+ raise TypeError("Unsupported object type for plotting.")
185
+
186
+ if note:
187
+ html += f"<div style='margin-top:10px; text-align:center; color:#888;'><strong>{note}</strong></div>"
188
+
189
+ wrapper = f'''
190
+ <div style="
191
+ position:relative; max-width:650px; margin:30px auto 20px auto;
192
+ padding:20px 28px 10px 28px; background:#fffefc;
193
+ border:2px solid #2da1da38; border-radius:16px;
194
+ box-shadow:0 3px 18px rgba(90,130,230,0.06); min-height:40px;">
195
+ <button id="eda-close-btn" onclick="closeEdaPanel()" style="
196
+ position: absolute; top: 20px; right: 12px;
197
+ font-size: 1.25em; background: transparent;
198
+ border: none; color: #888; cursor: pointer;
199
+ z-index: 2; transition: color 0.2s;">&times;</button>
200
+ {html}
201
+ </div>
202
+ '''
203
+
204
+ with self._eda_lock:
205
+ self._eda_output[sid] = wrapper
206
+
207
+ html = None
208
+
117
209
  # ---- Plotly Figure support ----
118
210
  try:
119
211
  import plotly.graph_objs as go
@@ -160,8 +252,9 @@ class SyntaxMUI:
160
252
 
161
253
 
162
254
  def get_plottings(self):
163
- sid = session.get("current_session", {}).get("id", "default")
164
- return EDA_OUTPUT.get(sid, "")
255
+ sid = self.get_session_id() or self._sid() or "default"
256
+ with self._eda_lock:
257
+ return self._eda_output.get(sid, "")
165
258
 
166
259
 
167
260
  def load_sys_chunks(self, directory: str = "uploads/sys"):
@@ -188,109 +281,90 @@ class SyntaxMUI:
188
281
 
189
282
 
190
283
  def set_ui_mode(self, mode):
191
- if mode not in ["default", "card", "bubble", "smx"]:
284
+ if mode not in self.get_ui_modes(): # ["default", "card", "bubble", "smx"]:
192
285
  raise ValueError("UI mode must be one of: 'default', 'card', 'bubble', 'smx'.")
193
286
  self.ui_mode = mode
194
287
 
195
-
196
288
  @staticmethod
197
- def list_ui_modes():
198
- return "default", "card", "bubble", "smx"
289
+ def get_ui_modes():
290
+ return list(UI_MODES.keys())
291
+ # return "default", "card", "bubble", "smx"
199
292
 
200
-
201
293
  @staticmethod
202
- def list_themes():
294
+ def get_themes():
203
295
  return list(DEFAULT_THEMES.keys())
204
-
205
296
 
206
- def set_theme(self, theme_name, theme):
297
+
298
+ def set_theme(self, theme_name, theme=None):
207
299
  if theme_name in DEFAULT_THEMES:
208
300
  self.theme = DEFAULT_THEMES[theme_name]
209
301
  elif isinstance(theme, dict):
210
- self.theme["custom"] = theme
211
302
  DEFAULT_THEMES[theme_name] = theme
303
+ self.theme = DEFAULT_THEMES[theme_name]
212
304
  else:
213
305
  self.theme = DEFAULT_THEMES["light"]
214
- raise ValueError("Theme must be 'light', 'dark', or a custom dict.")
215
-
306
+ self.error("Theme must be 'light', 'dark', or a custom dict.")
216
307
 
308
+
217
309
  def enable_theme_toggle(self):
218
- self.theme_toggle_enabled = True
310
+ self.theme_toggle_enabled = True
219
311
 
220
-
221
- def disable_theme_toggle(self):
222
- self.theme_toggle_enabled = False
312
+ def enable_user_files(self):
313
+ self.user_files_enabled = True
223
314
 
224
-
225
- def columns(self, components):
315
+ @staticmethod
316
+ def columns(components):
226
317
  col_html = "<div style='display:flex; gap:10px;'>"
227
318
  for comp in components:
228
319
  col_html += f"<div style='flex:1;'>{comp}</div>"
229
320
  col_html += "</div>"
230
321
  return col_html
231
-
232
-
233
- def set_favicon(self, icon):
234
- self.favicon = icon
235
-
236
322
 
237
323
  def set_site_title(self, title):
238
324
  self.site_title = title
239
325
 
326
+ def set_project_name(self, project_name):
327
+ self.project_name = project_name
328
+
329
+ def set_favicon(self, icon):
330
+ self.favicon = icon
240
331
 
241
332
  def set_site_logo(self, logo):
242
333
  self.site_logo = logo
243
334
 
244
-
245
- def set_project_title(self, project_title):
246
- self.project_title = project_title
247
-
248
-
249
335
  def set_user_icon(self, icon):
250
336
  self.user_icon = icon
251
337
 
252
-
253
338
  def set_bot_icon(self, icon):
254
339
  self.bot_icon = icon
255
340
 
256
-
257
- def text_input(self, key, label, placeholder="Ask me anything"):
341
+ def text_input(self, key, id, label, placeholder=""):
342
+ if not placeholder:
343
+ placeholder = f"Ask {self.project_name} anything"
258
344
  if key not in self.widgets:
259
- self.widgets[key] = {"type": "text_input", "key": key, "label": label, "placeholder": placeholder}
260
-
261
-
262
- def get_text_input_value(self, key, default=""):
263
- q = session.get(key, default)
264
-
265
- classifier_profile = prof.get_profile("classifier") or prof.get_profile("chat")
266
- if not classifier_profile:
267
- self.error("ERROR: There is no LLM profile set yet.")
268
- return q, None
269
-
270
- intent = self._classify_query(q)
271
- return q, intent
272
-
345
+ self.widgets[key] = {
346
+ "type": "text_input", "key": key, "id": id,
347
+ "label": label, "placeholder": placeholder
348
+ }
273
349
 
274
350
  def clear_text_input_value(self, key):
275
351
  session[key] = ""
276
352
  session.modified = True
277
353
 
278
354
 
279
- def button(self, key, label, callback=None, stream=False):
355
+ def button(self, key, id, label, callback, stream=False):
356
+ if stream == True:
357
+ self.is_streaming = True
280
358
  self.widgets[key] = {
281
- "type": "button", "key": key,
282
- "label": label, "callback": callback,
283
- "stream": stream
359
+ "type": "button", "key": key, "id": id, "label": label, "callback": callback, "stream":stream
284
360
  }
285
361
 
286
-
287
- def file_uploader(self, key, label, accept_multiple_files=False, callback=None):
362
+ def file_uploader(self, key, id, label, accept_multiple_files):
288
363
  if key not in self.widgets:
289
364
  self.widgets[key] = {
290
365
  "type": "file_upload",
291
- "key": key, "label": label,
366
+ "key": key, "id":id, "label": label,
292
367
  "accept_multiple": accept_multiple_files,
293
- "callback": callback
294
368
  }
295
369
 
296
370
 
@@ -326,57 +400,44 @@ class SyntaxMUI:
326
400
  session.modified = True
327
401
  return sid
328
402
 
329
-
330
403
  def get_chat_history(self) -> list[tuple[str, str]]:
331
- # now load the history for the _current_ chat session
404
+ # Load the history for the _current_ chat session
332
405
  sid = self._sid()
333
406
  cid = self.get_session_id()
407
+ if session.get("user_id"):
408
+ # Logged-in: use SQLHistoryStore (Store). Locking handled inside history_store.py
409
+ return Store.load(str(session["user_id"]), cid)
410
+ # Anonymous: use PersistentHistoryStore (_Store) JSON files
334
411
  return _Store.load(sid, cid)
335
-
412
+
336
413
 
337
414
  def set_chat_history(self, history: list[tuple[str, str]], *, max_items: int | None = None) -> list[tuple[str, str]]:
338
415
  sid = self._sid()
339
416
  cid = self.get_session_id()
340
- _Store.save(sid, cid, history)
341
- session["chat_history"] = history[-30:] # still mirror a thin copy into Flask’s session cookie for the UI
342
- session.modified = True
343
-
344
417
  if session.get("user_id"):
345
- user_id = session["user_id"]
346
- cid = session["current_session"]["id"]
347
- title = session["current_session"]["title"]
348
- # persist both title + history
349
- Store.save(user_id, cid, session["chat_history"], title)
350
-
351
- return history if max_items is None else history[-max_items:]
418
+ # Logged-in: chats.db via Store (SQLHistoryStore)
419
+ Store.save(str(session["user_id"]), cid, history)
420
+ else:
421
+ # Anonymous: file-backed via _Store (PersistentHistoryStore)
422
+ _Store.save(sid, cid, history)
352
423
 
353
424
 
354
425
  def clear_chat_history(self):
355
- """
356
- Clear both the UI slice *and* the server-side history bucket
357
- for this session_id + chat_id.
358
- """
359
426
  if has_request_context():
360
- # 1) Clear the in-memory store
361
- from .history_store import PersistentHistoryStore as _Store
362
- sid = self._sid() # your per-browser session ID
363
- cid = self.get_session_id() # current chat UUID
364
- _Store.save(sid, cid, []) # wipe server history
365
-
366
- # 2) Clear the cookie slice shown in the UI
367
- session["chat_history"] = []
368
- # 3) Also clear out the “current_session” and past_sessions histories
369
- if "current_session" in session:
370
- session["current_session"]["history"] = []
371
- if "past_sessions" in session:
372
- session["past_sessions"] = [
373
- {**s, "history": []} if s.get("id") == cid else s
374
- for s in session["past_sessions"]
375
- ]
427
+ sid = self._sid()
428
+ cid = self.get_session_id()
429
+
430
+ # delete the chat from the correct backend (DB for logged-in, file for anonymous)
431
+ if session.get("user_id"):
432
+ Store.delete(session["user_id"], cid)
433
+ else:
434
+ _Store.delete(sid, cid)
435
+
436
+ # rotate to a fresh empty chat (session remains metadata-only)
437
+ new_cid = str(uuid.uuid4())
438
+ session["current_session"] = {"id": new_cid, "title": "Current"}
439
+ session["active_chat_id"] = new_cid
376
440
  session.modified = True
377
- else:
378
- self._fallback_chat_history = []
379
-
380
441
 
381
442
  def bot_message(self, content, max_length=20):
382
443
  history = self.get_chat_history()
@@ -403,19 +464,14 @@ class SyntaxMUI:
403
464
  def write(self, content):
404
465
  self.bot_message(content)
405
466
 
467
+ def stream_write(self, chunk: str, end=False):
468
+ """Push a token to the SSE queue and, when end=True,
469
+ persist the whole thing to chat_history."""
470
+ from .routes import _stream_q
471
+ _stream_q.put(chunk) # live update
472
+ if end: # final flush → history
473
+ self.bot_message(chunk) # persists the final message
406
474
 
407
- def markdown(self, md_text):
408
- try:
409
- import markdown
410
- html = markdown.markdown(md_text)
411
- except ImportError:
412
- html = md_text
413
- self.write(html)
414
-
415
-
416
- def latex(self, math_text):
417
- self.write(f"\\({math_text}\\)")
418
-
419
475
 
420
476
  def error(self, content):
421
477
  self.bot_message(f'<div style="color:red; font-weight:bold;">{content}</div>')
@@ -432,12 +488,15 @@ class SyntaxMUI:
432
488
  def info(self, content):
433
489
  self.bot_message(f'<div style="color:blue;">{content}</div>')
434
490
 
435
-
491
+
436
492
  def get_session_id(self):
437
- """Return current chat’s UUID (so we can key uploaded chunks)."""
493
+ """Return the chat id that is currently *active* in the UI."""
494
+ # Prefer a sticky id set by /load_session or when a new chat is started.
495
+ sticky = session.get("active_chat_id")
496
+ if sticky:
497
+ return sticky
438
498
  return session.get("current_session", {}).get("id")
439
499
 
440
-
441
500
  def add_user_chunks(self, session_id, chunks):
442
501
  """Append these text‐chunks under that session’s key."""
443
502
  self.user_file_chunks.setdefault(session_id, []).extend(chunks)
@@ -451,32 +510,25 @@ class SyntaxMUI:
451
510
  def clear_user_chunks(self, session_id):
452
511
  """Remove all stored chunks for a session (on chat‑clear or delete)."""
453
512
  self.user_file_chunks.pop(session_id, None)
454
-
455
-
456
- def stream_write(self, chunk: str, end=False):
457
- """Push a token to the SSE queue and, when end=True,
458
- persist the whole thing to chat_history."""
459
- from .routes import _stream_q
460
- _stream_q.put(chunk) # live update
461
- if end: # final flush → history
462
- self.bot_message(chunk) # persists the final message
463
-
464
513
 
465
514
  # ──────────────────────────────────────────────────────────────
466
515
  # *********** LLM CLIENT HELPERS **********************
467
516
  # ──────────────────────────────────────────────────────────────
468
517
  def set_prompt_profile(self, profile):
469
- self.prompt_profile = profile
518
+ self.ai_chat_id = profile
470
519
 
471
520
 
472
521
  def set_prompt_instructions(self, instructions):
473
- self.prompt_instructions = instructions
474
-
522
+ self.ai_chat_instructions = instructions
523
+
524
+
525
+ def set_website_description(self, desc):
526
+ self.website_description = desc
527
+
475
528
 
476
529
  def embed_query(self, q):
477
530
  return embed_text(q)
478
531
 
479
-
480
532
  def smiv_index(self, sid):
481
533
  chunks = self.get_user_chunks(sid) or []
482
534
  count = len(chunks)
@@ -502,7 +554,6 @@ class SyntaxMUI:
502
554
  self._user_index_counts[sid] = count
503
555
  return self._user_indices[sid]
504
556
 
505
-
506
557
  def load_embed_model(self):
507
558
  client = load_embed_model()
508
559
  os.environ["PROVIDER"] = client["provider"]
@@ -510,276 +561,843 @@ class SyntaxMUI:
510
561
  os.environ["OPENAI_API_KEY"] = client["api_key"]
511
562
  return client
512
563
 
513
-
514
564
  def save_embed_model(self, provider:str, model:str, api_key:str):
515
565
  return save_embed_model(provider, model, api_key)
516
566
 
517
-
518
567
  def delete_embed_key(self):
519
568
  return delete_embed_key()
520
569
 
521
570
 
522
- def get_client(self, profile):
523
- provider = profile["provider"].lower()
524
- api_key = profile["api_key"]
525
-
526
- if provider == "openai":
527
- return OpenAI(api_key=api_key)
528
- elif provider == "google":
529
- # return OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
530
- return genai.Client(api_key=api_key)
531
- elif provider == "xai":
532
- return OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
533
- elif provider == "deepseek":
534
- return OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
535
- elif provider == "moonshotai":
536
- return OpenAI(api_key=api_key, base_url="https://api.moonshot.ai/v1")
571
+ def get_gpt_models_latest(self):
572
+ return GPT_MODELS_LATEST
573
+
574
+ def get_text_input_value(self, key, default=""):
575
+ q = session.get(key, default)
537
576
 
577
+ intent = self.classify_query_intent(q)
578
+ intent = intent.strip().lower() if intent else ""
579
+ if intent not in {"none","user_docs","system_docs","hybrid"}:
580
+ self.error("Classify agency error")
581
+ return q, None
582
+ return q, intent
583
+
584
+ def enable_stream(self):
585
+ self.is_streaming = True
586
+
587
+ def stream(self):
588
+ return self.is_streaming
589
+
590
+ def get_stream_args(self):
591
+ return self.stream_args
592
+
593
+
594
+ def classify_query_intent(self, query: str) -> str:
595
+ from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
596
+
597
+ if not self._classification_profile:
598
+ classification_profile = _prof.get_profile('classification') or _prof.get_profile('chat') or _prof.get_profile('admin')
599
+ if not classification_profile:
600
+ return {"Error": "Set a profile for Classification"}
601
+ self._classification_profile = classification_profile
602
+ self._classification_profile['client'] = _prof.get_client(classification_profile)
603
+
604
+ _client = self._classification_profile['client']
605
+ _provider = self._classification_profile['provider']
606
+ _model = self._classification_profile['model']
607
+
608
+ # New instruction format with hybrid option
609
+ _intent_profile = "You are an intent classifier. Respond ONLY with the intent name."
610
+ _instructions = f"""
611
+ Classify the given query into ONE of these intents You must return ONLY the intent name with no comment or any preamble:
612
+ - "none": Casual chat/greetings
613
+ - "user_docs": Requires user-uploaded documents
614
+ - "system_docs": Requires company knowledge/docs
615
+ - "hybrid": Requires BOTH user docs AND company docs
616
+
617
+ Examples:
618
+ Query: "Hi there!" → none
619
+ Query: "Explain my uploaded contract" → user_docs
620
+ Query: "What's our refund policy?" → system_docs
621
+ Query: "How does my proposal align with company guidelines?" → hybrid
622
+ Query: "What is the weather today?" → none
623
+ Query: "Cross-reference the customer feedback from my uploaded survey results with our product's feature list in the official documentation." → hybrid
624
+
625
+ Now classify:
626
+ Query: "{query}"
627
+ Intent:
628
+ """
629
+ openai_sdk_messages = [
630
+ {"role": "system", "content": _intent_profile},
631
+ {"role": "user", "content": _instructions}
632
+ ]
633
+
634
+ def google_classify_query():
635
+ response = _client.models.generate_content(
636
+ model=_model,
637
+ contents=f"{_intent_profile}\n{_instructions}\n\n"
638
+ )
639
+ return response.text.strip().lower()
640
+
641
+ def gpt_models_latest_classify_query(reasoning_effort = "medium", verbosity = "low"):
642
+
643
+ args = set_args(
644
+ model=_model,
645
+ instructions=_intent_profile,
646
+ input=_instructions,
647
+ reasoning_effort=reasoning_effort,
648
+ verbosity=verbosity,
649
+ )
650
+ try:
651
+ resp = _client.responses.create(**args)
652
+ answer = _out(resp).strip().lower()
653
+ return answer if answer else ""
654
+ except Exception as e:
655
+ return f"Error!"
656
+
657
+ def anthropic_classify_query():
658
+ try:
659
+ response = _client.messages.create(
660
+ model=_model,
661
+ max_tokens=1024,
662
+ system=_intent_profile,
663
+ messages=[{"role": "user", "content":_instructions}],
664
+ stream=False,
665
+ )
666
+ return response.content[0].text.strip()
667
+
668
+ except Exception as e:
669
+ return f"Error: {str(e)}"
670
+
671
+ def openai_sdk_classify_query():
672
+ try:
673
+ response = _client.chat.completions.create(
674
+ model=_model,
675
+ messages=openai_sdk_messages,
676
+ temperature=0,
677
+ max_tokens=100
678
+ )
679
+ intent = response.choices[0].message.content.strip().lower()
680
+ return intent if intent else ""
681
+ except Exception as e:
682
+ return f"Error!"
683
+
684
+ if _provider == "google":
685
+ intent = google_classify_query()
686
+ return intent
687
+ if _model in self.get_gpt_models_latest():
688
+ intent = gpt_models_latest_classify_query()
689
+ return intent
690
+ if _provider == "anthropic":
691
+ intent = anthropic_classify_query()
692
+ return intent
693
+ else:
694
+ intent = openai_sdk_classify_query()
695
+ return intent
696
+
538
697
 
539
- # @staticmethod
540
698
  def generate_contextual_title(self, chat_history):
541
699
 
542
- if not self.labeller_profile:
543
- labeller_profile = prof.get_profile('labeller') or prof.get_profile('chat') or {}
544
- if not labeller_profile:
545
- return
700
+ if not self._summarization_profile:
701
+ summarization_profile = _prof.get_profile('summarization') or _prof.get_profile('chat') or _prof.get_profile('admin')
702
+ if not summarization_profile:
703
+ return {"Error": "Chat profile not set yet."}
546
704
 
547
- self.labeller_profile = labeller_profile
548
- self.labeller_profile['client'] = self.get_client(labeller_profile)
705
+ self._summarization_profile = summarization_profile
706
+ self._summarization_profile['client'] = _prof.get_client(summarization_profile)
549
707
 
550
708
  conversation = "\n".join([f"{role}: {msg}" for role, msg in chat_history])
551
- instructions = f"""
552
- PROMPT_PROFILE: You are a title generator.
553
- INSTRUCTIONS: Generate a contextual title (5 short words max) from the given Conversation History: \n{conversation}.\n\n
554
- The title should be concise, relevant, and capture the essence of the conversation, and with no preamble.
555
- return only the title.
556
- """
557
- client = self.labeller_profile['client']
558
- model = self.labeller_profile['model']
709
+ _title_profile = "You are a title generator that creates concise and relevant titles for the given conversations."
710
+ _instructions = f"""
711
+ Generate a contextual title (5 short words max) from the given Conversation History
712
+ The title should be concise - with no preamble, relevant, and capture the essence of this Conversation: \n{conversation}.\n\n
713
+ return only the title.
714
+ """
559
715
 
716
+ _client = self._summarization_profile['client']
717
+ _provider = self._summarization_profile['provider']
718
+ _model = self._summarization_profile['model']
719
+
560
720
  def google_generated_title():
561
- response = client.models.generate_content(
562
- model=model,
563
- contents=instructions
564
- )
565
- return response.text
721
+ try:
722
+ response = _client.models.generate_content(
723
+ model=_model,
724
+ contents=f"{_title_profile}\n{_instructions}"
725
+ )
726
+ return response.text.strip()
727
+ except Exception as e:
728
+ return f"Summary agent error!"
729
+
730
+ def gpt_models_latest_generated_title():
731
+ try:
732
+ args = set_args(
733
+ model=_model,
734
+ instructions=_title_profile,
735
+ input=_instructions,
736
+ # reasoning_effort=reasoning_effort,
737
+ # verbosity=verbosity,
738
+ )
739
+
740
+ resp = _client.responses.create(**args)
741
+ return _out(resp).strip()
742
+ except Exception as e:
743
+ return f"Summary agent error!"
566
744
 
745
+ def anthropic_generated_title():
746
+ try:
747
+ response = _client.messages.create(
748
+ model=_model,
749
+ max_tokens=50,
750
+ system=_title_profile,
751
+ messages=[{"role": "user", "content":_instructions}],
752
+ stream=False,
753
+ )
754
+ return response.content[0].text.strip()
755
+ except Exception as e:
756
+ return f"Summary agent error!"
757
+
567
758
  def openai_sdk_generated_title():
568
759
  prompt = [
569
- {
570
- "role": "system",
571
- "content": instructions
572
- },
760
+ { "role": "system", "content": _title_profile },
761
+ { "role": "user", "content": _instructions },
573
762
  ]
574
-
575
- response = client.chat.completions.create(
576
- model=model,
577
- messages=prompt,
578
- temperature=0,
579
- max_tokens=50
580
- )
581
-
582
- title = response.choices[0].message.content.strip().lower()
583
- return title
763
+ try:
764
+ response = _client.chat.completions.create(
765
+ model=_model,
766
+ messages=prompt,
767
+ temperature=0.3,
768
+ max_tokens=50
769
+ )
770
+ title = response.choices[0].message.content.strip().lower()
771
+ return title if title else ""
772
+ except Exception as e:
773
+ return f"Summary agent error!"
584
774
 
585
- if self.labeller_profile['provider'] == "google":
775
+ if _provider == "google":
586
776
  title = google_generated_title()
777
+ elif _model in self.get_gpt_models_latest():
778
+ title = gpt_models_latest_generated_title()
779
+ elif _provider == "anthropic":
780
+ title = anthropic_generated_title()
587
781
  else:
588
782
  title = openai_sdk_generated_title()
589
783
  return title
784
+
590
785
 
786
+ def stream_process_query(self, query, context, conversations, sources):
787
+ self.stream_args['query'] = query
788
+ self.stream_args['context'] = context
789
+ self.stream_args['conversations'] = conversations
790
+ self.stream_args['sources'] = sources
791
+
591
792
 
592
- def _classify_query(self, query: str) -> str:
793
+ def process_query_stream(self, query: str, context: str, history: list, stream=True) -> Generator[str, None, None]:
794
+
795
+ if not self._chat_profile:
796
+ chat_profile = _prof.get_profile("chat") or _prof.get_profile("admin")
797
+ if not chat_profile:
798
+ yield """<p style='color:red;'>Error: Chat profile is not configured. Add a chat profile inside the admin panel or contact your administrator.</p>
799
+ """
800
+ return None
801
+ self._chat_profile = chat_profile
802
+ self._chat_profile['client'] = _prof.get_client(chat_profile)
803
+
804
+ _provider = self._chat_profile['provider']
805
+ _client = self._chat_profile['client']
806
+ _model = self._chat_profile['model']
807
+
808
+ _contents = f"""
809
+ {self.smxai_instructions}\n\n
810
+ Question: {query}\n
811
+ Context: {context}\n\n
812
+ History: {history}\n\n
813
+ Use conversation continuity if available.
814
+ """
593
815
 
594
- if not self.classifier_profile:
595
- classifier_profile = prof.get_profile('classifier') or prof.get_profile('chat') or {}
596
- if not classifier_profile:
597
- return
598
-
599
- self.classifier_profile = classifier_profile
600
- self.classifier_profile['client'] = self.get_client(classifier_profile)
601
-
602
- exp = [
603
- { "query":"Hi there!", "intent": "none" },
604
- { "query": "Summarize my uploaded marketing deck.", "intent": "user_doc" },
605
- { "query": "What’s the SLA for our email-delivery service?", "intent": "system_docs" },
606
- { "query": "What are my colleaues' surnames, in the contact list I sent you?", "intent": "hybrid" }
607
- ]
816
+ try:
817
+ if _provider == "google": # Google, non openai skd series
818
+
819
+ for chunk in _client.models.generate_content_stream(
820
+ model=_model,
821
+ contents=_contents,
822
+ config=types.GenerateContentConfig(
823
+ system_instruction=self.smxai_identity,
824
+ temperature=0.3,
825
+ max_output_tokens=1024,
826
+ ),
827
+ ):
828
+
829
+ yield chunk.text
608
830
 
609
- instructions = f"""
610
- You are an intent router. Classify questions into exactly one of the following intents:
611
- i. `base`
612
- ii. `user_docs`
613
- iii. `system_docs`
614
-
615
- 1. Return `base` if the query is a greeting or an opening to a casual chat.
616
-
617
- 2. Return `user_docs` if the user is asking about content the user personally uploaded.
618
-
619
- 3. Return `system_docs` if the user is asking about factual or technical details
620
- about your company and requires that you to look into the system or company files.
621
-
622
- Follow the above instructions and criteria and determine the intent of the following Query:\n{query}\n\n
623
- See the Few-shot exmples below and learn from them.
624
-
625
- Few-shot £xamples below.\n\n{exp}
626
- """
627
-
628
- prompt = {
629
- "role": "system",
630
- "content": instructions
631
- },
632
-
633
- def google_classify_query():
634
- response = self.classifier_profile['client'].models.generate_content(
635
- model=self.classifier_profile['model'],
636
- contents=instructions
637
- )
638
- return response.text
639
-
640
- def openai_sdk_classify_query():
641
- response = self.classifier_profile['client'].chat.completions.create(
642
- model=self.classifier_profile['model'],
643
- messages=prompt,
644
- temperature=0,
645
- max_tokens=100
646
- )
647
- intent = response.choices[0].message.content.strip().lower()
648
- return intent
649
-
650
- if self.classifier_profile['provider'] == "google":
651
- intent = google_classify_query()
652
- return intent
653
- else:
654
- intent = openai_sdk_classify_query()
655
- return intent
656
-
657
-
831
+ elif _provider == "openai" and _model in self.get_gpt_models_latest(): # GPt 5 series
832
+ input_prompt = (
833
+ f"{self.smxai_instructions}\n\n"
834
+ f"Generate a response to this query:\n{query}\n"
835
+ f"based on this given context:\n{context}\n\n"
836
+ f"(Use conversation continuity if available.)"
837
+ )
838
+ sid = self.get_session_id()
839
+ prev_id = self._gpt_models_latest_prev_resp_ids.get(sid)
840
+ args = set_args(model=_model, instructions=self.smxai_identity, input=input_prompt, previous_id=prev_id, store=True)
841
+
842
+ with _client.responses.stream(**args) as s:
843
+ for event in s:
844
+ if event.type == "response.output_text.delta" and event.delta:
845
+ yield event.delta
846
+ elif event.type == "response.error":
847
+ raise RuntimeError(str(event.error))
848
+ final = s.get_final_response()
849
+ if getattr(final, "id", None):
850
+ self._gpt_models_latest_prev_resp_ids[sid] = final.id
851
+
852
+ elif _provider == "anthropic":
853
+ with _client.messages.stream(
854
+ max_tokens=1024,
855
+ messages=[{"role": "user", "content":f"{self.smxai_identity}\n\n {_contents}"},],
856
+ model=_model,
857
+ ) as stream:
858
+ for text in stream.text_stream:
859
+ yield text # end="", flush=True
860
+
861
+ else: # Assumes standard openai_sdk
862
+ openai_sdk_prompt = [
863
+ {"role": "system", "content": self.smxai_identity},
864
+ {"role": "user", "content": f"{self.smxai_instructions}\n\nGenerate response to this query: {query}\nbased on this context:\n{context}\nand history:\n{history}\n\nUse conversation continuity if available.)"},
865
+ ]
866
+ response = _client.chat.completions.create(
867
+ model=_model,
868
+ messages=openai_sdk_prompt,
869
+ stream=True,
870
+ )
871
+ for chunk in response:
872
+ token = getattr(chunk.choices[0].delta, "content", "")
873
+ if token:
874
+ yield token
875
+ except Exception as e:
876
+ yield f"Error during streaming: {type(e).__name__}: {e}"
877
+
658
878
  def process_query(self, query, context, history, stream=False):
659
-
660
- if not self.chat_profile:
661
- chat_profile = prof.get_profile("chat") or {}
879
+
880
+ if not self._chat_profile:
881
+ chat_profile = _prof.get_profile("chat") or _prof.get_profile("admin")
662
882
  if not chat_profile:
663
- self.error("Error: setup a chat profile")
883
+ return """<p style='color:red;'>Error: Chat profile is not configured. Add a chat profile inside the admin panel or contact your administrator.</p>
884
+ """
664
885
  return
665
-
666
- client = self.get_client(chat_profile)
667
- self.chat_profile = chat_profile
668
- self.chat_profile['client'] = client
669
-
670
- google_prompt = f"""
671
- {self.prompt_profile}\n\n
672
- {self.prompt_instructions}\n\n
886
+
887
+ self._chat_profile = chat_profile
888
+ self._chat_profile['client'] = _prof.get_client(chat_profile)
889
+ _provider = self._chat_profile['provider']
890
+ _client = self._chat_profile['client']
891
+ _model = self._chat_profile['model']
892
+ _contents = f"""
893
+ {self.smxai_instructions}\n\n
673
894
  Question: {query}\n
674
- Context: {context}\n
675
- History: {history}
895
+ Context: {context}\n\n
896
+ History: {history}\n\n
897
+ Use conversation continuity if available.
676
898
  """
677
899
 
678
900
  openai_sdk_prompt = [
679
- {"role": "system", "content": self.prompt_profile},
680
- {"role": "user", "content": self.prompt_instructions},
681
- {"role": "assistant", "content": f"Query: {query}\n\nContext1: {context}\n\n"
682
- f"History: {history}\n\nAnswer: "}
901
+ {"role": "system", "content": self.smxai_identity},
902
+ {"role": "user", "content": f"""{self.smxai_instructions}\n\n
903
+ Generate response to this query: {query}\n
904
+ based on this context:\n{context}\n
905
+ and history:\n{history}\n\n
906
+ Use conversation continuity if available.)
907
+ """
908
+ },
683
909
  ]
684
910
 
685
911
  def google_process_query():
686
- response = self.chat_profile['client'].models.generate_content(
687
- model=self.chat_profile['model'],
688
- contents=google_prompt
912
+ try:
913
+ response = _client.models.generate_content(
914
+ model=_model,
915
+ contents=_contents,
916
+ config=types.GenerateContentConfig(
917
+ system_instruction=self.smxai_identity,
918
+ temperature=0.3,
919
+ max_output_tokens=1024,
920
+ ),
921
+ )
922
+ answer = response.text
923
+
924
+ # answer = strip_html(answer)
925
+ return answer
926
+ except Exception as e:
927
+ return f"Error: {str(e)}"
928
+
929
+ def gpt_models_latest_process_query(previous_id: str | None, reasoning_effort = "minimal", verbosity = "low"):
930
+ """
931
+ Returns (answer_text, new_response_id)
932
+ """
933
+ # Prepare the prompt with conversation history and context
934
+ input = (
935
+ f"{self.smxai_instructions}\n\n"
936
+ f"Generate a response to this query:\n{query}\n"
937
+ f"based on this given context:\n{context}\n\n"
938
+ f"(Use conversation continuity if available.)"
689
939
  )
690
- answer = response.text
691
- return answer
692
-
940
+
941
+ sid = self.get_session_id()
942
+ prev_id = self._gpt_models_latest_prev_resp_ids.get(sid)
943
+
944
+ args = set_args(
945
+ model=_model,
946
+ instructions=self.smxai_identity,
947
+ input=input,
948
+ previous_id=prev_id,
949
+ store=True,
950
+ reasoning_effort=reasoning_effort,
951
+ verbosity=verbosity
952
+ )
953
+ try:
954
+ # Non-stream path
955
+ resp = _client.responses.create(**args)
956
+ answer = _out(resp)
957
+ if getattr(resp, "id", None):
958
+ self._gpt_models_latest_prev_resp_ids[sid] = resp.id
959
+
960
+ # answer = strip_html(answer)
961
+ return answer
962
+
963
+ except Exception as e:
964
+ return f"Error: {type(e).__name__}: {e}"
965
+
966
+ def anthropic_process_query():
967
+ try:
968
+ response = _client.messages.create(
969
+ model=_model,
970
+ max_tokens=1024,
971
+ system=self.self.smxai_identity,
972
+ messages=[{"role": "user", "content":_contents}],
973
+ stream=False,
974
+ )
975
+ return response.content[0].text.strip()
976
+
977
+ except Exception as e:
978
+ return f"Error: {str(e)}"
979
+
693
980
  def openai_sdk_process_query():
694
981
 
695
982
  try:
696
- response = self.chat_profile['client'].chat.completions.create(
697
- model=self.chat_profile['model'],
983
+ response = _client.chat.completions.create(
984
+ model=_model,
698
985
  messages=openai_sdk_prompt,
699
- temperature=0.1,
700
- max_tokens=1024,
701
- stream=stream
986
+ stream=False,
702
987
  )
703
988
 
704
- if stream:
705
- # -------- token streaming --------
706
- parts = []
707
- for chunk in response:
708
- token = getattr(chunk.choices[0].delta, "content", "")
709
- if not token:
710
- continue
711
- parts.append(token)
712
- self.stream_write(token)
713
-
714
- self.stream_write("[END]") # close the SSE bubble
715
- answer = "".join(parts)
716
- return answer
717
- else:
718
- # -------- one-shot buffered --------
719
- answer = response.choices[0].message.content
720
- return answer
989
+ # -------- one-shot buffered --------
990
+ answer = response.choices[0].message.content .strip()
991
+ return answer
721
992
  except Exception as e:
722
993
  return f"Error: {str(e)}"
723
-
724
- if self.chat_profile['provider'] == "google":
994
+
995
+ if _provider == "google":
725
996
  return google_process_query()
726
- else:
727
- return openai_sdk_process_query()
728
-
997
+ if _provider == "openai" and _model in self.get_gpt_models_latest():
998
+ return gpt_models_latest_process_query(self._gpt_models_latest_prev_resp_ids.get(self.get_session_id()))
999
+ if _provider == "anthropic":
1000
+ return anthropic_process_query()
1001
+ return openai_sdk_process_query()
729
1002
 
730
- def ai_generate_code(self, question, df):
731
-
732
- if not self.coder_profile:
733
- coder_profile = prof.get_profile('coder') or prof.get_profile('chat') or {}
734
- if not coder_profile:
735
- return
1003
+
1004
+ def repair_python_cell(self, py_code: str) -> str:
736
1005
 
737
- self.coder_profile = coder_profile
738
- self.coder_profile['client'] = self.get_client(coder_profile)
739
-
740
- context = f"Columns: {list(df.columns)}\n\nDtypes: {df.dtypes.astype(str).to_dict()}\n\n"
741
- instructions = f"""
742
- You are an expert Python data analyst. Given the dataframe `df` with the following Context:\n{context}\n\n
743
- Write clean, working Python code that answers the question below.
744
- DO NOT explain, just output the code only (Add overview comment or text at the bottom)
745
- Question: {question}\n
746
- Output only the working code needed. Assume df is already defined.
747
- Produce at least one visible result: (syntaxmatrix.display.show(), display(), plt.show()).
1006
+ _CELL_REPAIR_RULES = """
1007
+ Fix the Python cell to satisfy:
1008
+ - Single valid cell; imports at the top.
1009
+ - Do not import or invoke or use 'python-dotenv' or 'dotenv' because it's not needed.
1010
+ - No top-level statements between if/elif/else branches.
1011
+ - Regression must use either sklearn with train_test_split (then X_test exists) and R^2/MAE/RMSE,
1012
+ or statsmodels OLS. No accuracy_score in regression.
1013
+ - Keep all plotting + savefig + BytesIO + display inside the branch that created the figure.
1014
+ - Return ONLY the corrected cell.
748
1015
  """
749
-
750
- def google_generate_code():
751
- response = self.coder_profile['client'].models.generate_content(
752
- model=self.coder_profile['model'],
753
- contents=instructions
1016
+ code = textwrap.dedent(py_code or "").strip()
1017
+ needs_fix = False
1018
+ if re.search(r"\baccuracy_score\b", code) and re.search(r"\bLinearRegression\b|\bOLS\b", code):
1019
+ needs_fix = True
1020
+ if re.search(r"\bX_test\b", code) and not re.search(r"\bX_test\s*=", code):
1021
+ needs_fix = True
1022
+ try:
1023
+ ast.parse(code)
1024
+ except SyntaxError:
1025
+ needs_fix = True
1026
+ if not needs_fix:
1027
+ return code
1028
+ _prompt = f"```python\n{code}\n```"
1029
+
1030
+ repair_profile = _prof.get_profile("vision2text") or _prof.get_profile("admin")
1031
+ if not repair_profile:
1032
+ return (
1033
+ '<div class="smx-alert smx-alert-warn">'
1034
+ 'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
1035
+ 'Please, add the LLM profile inside the admin panel or contact your Administrator.'
1036
+ '</div>'
754
1037
  )
755
- return response.text
756
1038
 
757
- def others_generate_code():
758
- response = self.coder_profile['client'].chat.completions.create(
759
- model=self.coder_profile['model'],
760
- messages=[{"role": "user", "content": instructions}],
761
- temperature=0.0,
762
- max_tokens=2048,
1039
+ _client = _prof.get_client(repair_profile)
1040
+ _provider = repair_profile['provider'].lower()
1041
+ _model = repair_profile['model']
1042
+
1043
+ #1 Google
1044
+ if _provider == "google":
1045
+ from google.genai import types
1046
+
1047
+ fixed = _client.models.generate_content(
1048
+ model=_model,
1049
+ contents=_prompt,
1050
+ config=types.GenerateContentConfig(
1051
+ system_instruction=_CELL_REPAIR_RULES,
1052
+ temperature=0.8,
1053
+ max_output_tokens=1024,
1054
+ ),
1055
+ )
1056
+
1057
+ #2 Openai
1058
+ elif _provider == "openai" and _model in GPT_MODELS_LATEST:
1059
+
1060
+ args = set_args(
1061
+ model=_model,
1062
+ instructions=_CELL_REPAIR_RULES,
1063
+ input=[{"role": "user", "content": _prompt}],
1064
+ previous_id=None,
1065
+ store=False,
1066
+ reasoning_effort="medium",
1067
+ verbosity="medium",
1068
+ )
1069
+ fixed = _out(_client.responses.create(**args))
1070
+
1071
+ # Anthropic
1072
+ elif _provider == "anthropic":
1073
+
1074
+ fixed = _client.messages.create(
1075
+ model=_model,
1076
+ max_tokens=1024,
1077
+ system=_CELL_REPAIR_RULES,
1078
+ messages=[{"role": "user", "content":_prompt}],
1079
+ stream=False,
1080
+ )
1081
+
1082
+ # OpenAI SDK
1083
+ else:
1084
+ fixed = _client.chat.completions.create(
1085
+ model=_model,
1086
+ messages=[
1087
+ {"role": "system", "content":_CELL_REPAIR_RULES},
1088
+ {"role": "user", "content":_prompt},
1089
+ ],
1090
+ max_tokens=1024,
763
1091
  )
764
- return response.choices[0].message.content
1092
+
1093
+ try:
1094
+ ast.parse(fixed);
1095
+ return fixed
1096
+ except Exception:
1097
+ return code
1098
+
1099
+ def get_last_llm_usage(self):
1100
+ return getattr(self, "_last_llm_usage", None)
1101
+
1102
+ def ai_generate_code(self, refined_question, tasks, df):
1103
+
1104
+ def normalise_llm_code(s: str) -> str:
1105
+ s = s.replace("\t", " ")
1106
+ s = textwrap.dedent(s)
1107
+ lines = s.splitlines()
1108
+
1109
+ # drop leading blank lines
1110
+ while lines and not lines[0].strip():
1111
+ lines.pop(0)
1112
+
1113
+ # if everything is still indented >=4 spaces, shift left
1114
+ indents = [len(l) - len(l.lstrip(" ")) for l in lines if l.strip()]
1115
+ if indents and min(indents) >= 4:
1116
+ m = min(indents)
1117
+ lines = [l[m:] if len(l) >= m else l for l in lines]
1118
+
1119
+ return "\n".join(lines)
765
1120
 
766
- if self.coder_profile['provider'] == 'google':
767
- code = google_generate_code()
768
- else:
769
- code = others_generate_code()
1121
+ CONTEXT = f"Columns: {list(df.columns)}\n\nDtypes: {df.dtypes.astype(str).to_dict()}\n\n"
1122
+ AVAILABLE_COLUMNS = list(df.columns)
770
1123
 
771
- if "```python" in code:
772
- code = code.split("```python")[1].split("```")[0].strip()
773
- elif "```" in code:
774
- code = code.split("```")[1].split("```")[0].strip()
1124
+ # --- SMX: normalise tasks coming from intent agent ---
1125
+ if isinstance(tasks, str):
1126
+ import json, ast, re
1127
+ try:
1128
+ tasks_parsed = json.loads(tasks)
1129
+ except Exception:
1130
+ try:
1131
+ tasks_parsed = ast.literal_eval(tasks)
1132
+ except Exception:
1133
+ tasks_parsed = re.findall(r"[A-Za-z_]+", tasks)
1134
+ tasks = tasks_parsed
1135
+ if not isinstance(tasks, list):
1136
+ tasks = [str(tasks)]
1137
+ tasks = [str(t).strip().lower() for t in tasks if str(t).strip()]
1138
+
1139
+ ai_profile = """
1140
+ - You are a Python expert specializing in data science and machine learning.
1141
+ - Your task is to generate a single, complete, production-quality, executable Python script for a Jupyter-like Python kernel, based on the given instructions.
1142
+ - The dataset is already loaded as a pandas DataFrame named `df` (no file I/O or file uploads).
1143
+ - Make a copy of `df` and name it `df_copy`. Make sure `df_copy` is preprocessed and cleaned, named `df_cleaned`, if not already done so. Then use `df_cleaned` to perform the ML tasks described in the given context.
1144
+ - Select your features and target, from `df_cleaned`, with care and name it `required_cols`
1145
+ - Create your 'df_filtered by doing: df_filtered = df_cleaned[required_cols].
1146
+ - Use the {TEMPLATE_CATALOGUE} below to educate yourself on which visualizations you will implement in the code.
1147
+ - The final output MUST be the complete, executable Python code only, enclosed in a single markdown code block (```python ... ```), which is required to fulfill the user's request. See the {tasks} below.
1148
+ - Do not include any explanatory text or markdown outside the code block.
1149
+ """
1150
+
1151
+ TEMPLATE_CATALOGUE = """
1152
+ ### Available SyntaxMatrix templates (use these instead of inventing new helpers)
1153
+
1154
+ Visualisation templates (dataset-agnostic):
1155
+ - viz_pie(df, category_col=None, top_k=8): pie/donut shares within a category.
1156
+ - viz_stacked_bar(df, x=None, hue=None, normalise=True): composition across groups.
1157
+ - viz_count_bar(df, category_col=None, top_k=12): counts/denominators by category.
1158
+ - viz_box(df, x=None, y=None): spread/outliers of numeric by category.
1159
+ - viz_scatter(df, x=None, y=None, hue=None): relationship between two numeric vars.
1160
+ - viz_distribution(df, col=None): histogram-style distribution for numeric.
1161
+ - viz_kde(df, col=None): density curve for numeric.
1162
+ - viz_area(df, time_col=None, y_col=None): area/trend over time.
1163
+ - viz_line(df, x=None, y=None, hue=None): line/trend plot.
1164
+
1165
+ ML/stat templates:
1166
+ - classification(df): standard classification pipeline + metrics + plots.
1167
+ - regression(df): standard regression pipeline + metrics + plots.
1168
+ - clustering(df): clustering workflow + cluster plots.
1169
+ - anomaly_detection(df)
1170
+ - ts_anomaly_detection(df)
1171
+ - time_series_forecasting(df)
1172
+ - time_series_classification(df, entity_col, time_col, target_col)
1173
+ - dimensionality_reduction(df)
1174
+ - feature_selection(df)
1175
+ - eda_overview(df)
1176
+ - eda_correlation(df)
1177
+ - multilabel_classification(df, label_cols)
1178
+ - recommendation(df)
1179
+ - topic_modelling(df)
1180
+ """
1181
+
1182
+ instructions = (
1183
+ "### Context"
1184
+ f"- DataFrame - (`df`): {df}"
1185
+ f"- Schema (names → dtypes): {CONTEXT}"
1186
+ f"- Row count: {len(df)}"
1187
+ f"- Task description: {refined_question}"
1188
+ f"- Tasks: {tasks}"
1189
+ f"- Available columns: {AVAILABLE_COLUMNS}"
1190
+ f"- Template catalogue: {TEMPLATE_CATALOGUE}"
1191
+
1192
+ """
1193
+ ### Template rules
1194
+ - You MAY call a template if it matches the task.
1195
+ - Do NOT invent template names.
1196
+ - If no template fits, write minimal direct pandas/sklearn/seaborn code instead.
1197
+ - Keep the solution short: avoid writing wrappers/utilities already handled by SyntaxMatrix hardener.
1198
+
1199
+ #### Template selection hint examples:
1200
+ - If the task asks for pie/donut/composition shares → use viz_pie.
1201
+ - If it asks for denominators/counts per category → viz_count_bar.
1202
+ - If it asks for spread/outliers/comparison across groups → viz_box.
1203
+ - If it asks for relationship / “X vs Y” → viz_scatter.
1204
+ - If it asks for trend over time → viz_line or viz_area.
1205
+
1206
+ ### Hard requirements
1207
+ 1) Code only. No markdown, no comments, no explanations.
1208
+ 2) Import everything you use explicitly.
1209
+ - Use pandas/numpy/matplotlib by default.
1210
+ - Seaborn may be unavailable at runtime; **do not import seaborn inside your code**.
1211
+ - If you call sns.*, assume sns is already defined by the framework.
1212
+ 3) Avoid deprecated / removed APIs**, e.g.:
1213
+ - pandas: do not use `.append`, `.ix`, `.as_matrix`; prefer current patterns.
1214
+ - seaborn: do not use `distplot`; avoid `pairplot` on very large data unless sampling.
1215
+ - scikit-learn: import from `sklearn.model_selection` (not `sklearn.cross_validation`);
1216
+ set `random_state=42` where relevant.
1217
+ 4) Be defensive, but avoid hard-failing on optional fields:
1218
+ - If the primary column, needed to answer the question, is missing, review your copy of the `df` again.
1219
+ Make sure that you selected the proper column.
1220
+ Never use a column/variable which isn't available or defined.
1221
+ - If a secondary/extra column is missing, show a warning with `show(...)` and continue using available fields.
1222
+ - Handle missing values sensibly (drop rows for simple EDA; use `ColumnTransformer` + `SimpleImputer` for modelling).
1223
+ - For categorical features in ML, use `OneHotEncoder(handle_unknown="ignore")`
1224
+ inside a `Pipeline`/`ColumnTransformer` (no `LabelEncoder` on features).
1225
+ 5) Keep it fast (kernel timeout ~8s):
1226
+ - For plots on large frames (>20k rows), downsample to ~1,000 rows
1227
+ (`df.sample(1000, random_state=42)`) unless aggregation is more appropriate.
1228
+ - Prefer vectorised ops; avoid O(n²) Python loops.
1229
+ 6) Keep the solution compact:
1230
+ - Do not define large helper libraries or long “required column” sets.
1231
+ - Aim for ≤120 lines excluding imports.
1232
+ 7) Always produce at least one visible result at the end:
1233
+ - If plotting with matplotlib/seaborn: call `plt.tight_layout(); plt.show()`.
1234
+ - If producing a table or metrics:
1235
+ `from syntaxmatrix.display import show` then `show(object_or_dataframe)`.
1236
+ 8) Follow task type conventions:
1237
+ - **EDA/Stats**: compute the requested stat, then show a relevant table
1238
+ (e.g., summary/crosstab) or plot.
1239
+ - **Classification**: train/valid split (`train_test_split`), pipeline with scaling/encoding,
1240
+ fit, show accuracy and a confusion matrix via
1241
+ `ConfusionMatrixDisplay.from_estimator(...); plt.show()`.
1242
+ Also show `classification_report` as a dataframe if short.
1243
+ - **Regression**: train/valid split, pipeline as needed, fit, show R² and MAE;
1244
+ plot predicted vs actual scatter.
1245
+ - **Correlation/Chi-square/ANOVA**: compute the statistic + p-value and show a concise
1246
+ result table (with `show(...)`) and, when sensible, a small plot (heatmap/bar).
1247
+ 9) Don't mutate or recreate target columns if they already exist.
1248
+ 10) Keep variable names short and clear; prefer `num_cols` / `cat_cols` discovery by dtype.
1249
+ 11) You MUST NOT reference any column outside Available columns: {AVAILABLE_COLUMNS}.
1250
+ 12) If asked to predict/classify, choose the target by matching the task text to Allowed columns
1251
+ and never invent a new name.
1252
+
1253
+ #### Cohort rules
1254
+ When you generate plots for cohorts or categories, you MUST obey these rules:
1255
+ 1) ALWAYS guard cohort masks:
1256
+ - After you define something like:
1257
+ _mask_a = (df['BMI'] < 18.5) & df['BMI'].notna()
1258
+ _mask_b = ~(df['BMI'] < 18.5) & df['BMI'].notna()
1259
+ compute their sizes:
1260
+ n_a = int(_mask_a.sum())
1261
+ n_b = int(_mask_b.sum())
1262
+ - If a mask has no rows (or almost none), do NOT draw an empty plot.
1263
+ Instead call:
1264
+ show(f"Skipping cohort '{label}': no rows after filtering.")
1265
+ and return.
1266
+
1267
+ 2) Before any groupby / crosstab for a plot:
1268
+ - Fill missing categories so groupby does not drop everything:
1269
+ df[col] = df[col].fillna("Unknown")
1270
+ - After building the table:
1271
+ tab = tmp.groupby([...]).size().unstack(...).fillna(0)
1272
+ ALWAYS check:
1273
+ if tab.empty:
1274
+ show(f"Skipping plot for {col}: no data after grouping.")
1275
+ continue
1276
+ Only call .plot(...) if the table is non-empty.
1277
+
1278
+ 3) For value_counts-based plots:
1279
+ - If the Series is empty after filtering (len(s) == 0),
1280
+ do NOT draw a figure. Just call:
1281
+ show(f"No data available to plot for {col} in this cohort.")
1282
+ and skip.
1283
+
1284
+ 4) Never try to “hide” an error with a blank plot.
1285
+ A blank chart is treated as a bug. If there is no data, explain it
1286
+ clearly using show(...), and avoid calling matplotlib/Seaborn.
1287
+
1288
+ 5) Never use print(...). All user-visible diagnostics go through show(...).
1289
+
1290
+
1291
+ ### Output
1292
+ Return only runnable Python that:
1293
+ - Imports what it needs,
1294
+ - Validates columns,
1295
+ - Visualize tables, charts, and graphs, each with appropriate caption.
1296
+ - Solution: {tasks} to solve {refined_question},
1297
+ - And ends with at least 3 visible output (`show(...)` and/or `plt.show()`).
1298
+ """)
1299
+
1300
+ if not self._coding_profile:
1301
+ coding_profile = _prof.get_profile("coding") or _prof.get_profile("admin")
1302
+ if not coding_profile:
1303
+ return (
1304
+ '<div class="smx-alert smx-alert-warn">'
1305
+ 'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
1306
+ 'Please, add the LLM profile inside the admin panel or contact your Administrator.'
1307
+ '</div>'
1308
+ )
1309
+
1310
+ self._coding_profile = coding_profile
1311
+ self._coding_profile['client'] = _prof.get_client(coding_profile)
1312
+
1313
+ # code = mlearning_agent(instructions, ai_profile, self._coding_profile)
1314
+ code, usage = mlearning_agent(instructions, ai_profile, self._coding_profile)
1315
+ self._last_llm_usage = usage
1316
+
1317
+ if code:
1318
+ import re
1319
+ code = normalise_llm_code(code)
1320
+
1321
+ m = re.search(r"```(?:python)?\s*(.*?)\s*```", code, re.DOTALL | re.IGNORECASE)
1322
+ if m:
1323
+ code = m.group(1).strip()
1324
+
1325
+ if "import io" not in code and "io.BytesIO" in code:
1326
+ lines = code.split('\n')
1327
+ import_lines = []
1328
+ other_lines = []
1329
+
1330
+ for line in lines:
1331
+ if line.strip().startswith('import ') or line.strip().startswith('from '):
1332
+ import_lines.append(line)
1333
+ else:
1334
+ other_lines.append(line)
1335
+
1336
+ if "import io" not in '\n'.join(import_lines):
1337
+ import_lines.append('import io')
1338
+
1339
+ code = '\n'.join(import_lines + [''] + other_lines)
1340
+
1341
+ TEMPLATE_NAMES = [
1342
+ "viz_pie","viz_stacked_bar","viz_count_bar","viz_box","viz_scatter",
1343
+ "viz_distribution","viz_kde","viz_area","viz_line",
1344
+ "classification","regression","clustering","anomaly_detection",
1345
+ "ts_anomaly_detection","time_series_forecasting","time_series_classification",
1346
+ "dimensionality_reduction","feature_selection","eda_overview","eda_correlation",
1347
+ "multilabel_classification","recommendation","topic_modelling"
1348
+ ]
1349
+
1350
+ used = [t for t in TEMPLATE_NAMES if re.search(rf"\\b{t}\\s*\\(", code)]
1351
+ if used:
1352
+ import_line = (
1353
+ "from syntaxmatrix.agentic.model_templates import " +
1354
+ ", ".join(sorted(set(used)))
1355
+ )
1356
+ if import_line not in code:
1357
+ code = import_line + "\n" + code
1358
+
1359
+ return code.strip()
1360
+
1361
+ return "Error: AI code generation failed."
1362
+
1363
+
1364
+ def sanitize_rough_to_markdown_task(self, rough: str) -> str:
1365
+ """
1366
+ Return only the Task text (no tags).
1367
+ Behaviour:
1368
+ - If <Task>...</Task> exists: return its inner text.
1369
+ - If not: return the input with <rough> wrapper and any <Error> blocks removed.
1370
+ - Never raises; always returns a string.
1371
+ """
1372
+ s = ("" if rough is None else str(rough)).strip()
1373
+
1374
+ def _find_ci(hay, needle, start=0):
1375
+ return hay.lower().find(needle.lower(), start)
1376
+
1377
+ # Prefer explicit <Task>...</Task>
1378
+ i = _find_ci(s, "<task")
1379
+ if i != -1:
1380
+ j = s.find(">", i)
1381
+ k = _find_ci(s, "</task>", j + 1)
1382
+ if j != -1 and k != -1:
1383
+ return s[j + 1:k].strip()
1384
+ # Otherwise strip any <Error>...</Error> blocks (if present)
1385
+ out = s
1386
+ while True:
1387
+ e1 = _find_ci(out, "<error")
1388
+ if e1 == -1:
1389
+ break
1390
+ e1_end = out.find(">", e1)
1391
+ e2 = _find_ci(out, "</error>", (e1_end + 1) if e1_end != -1 else e1 + 1)
1392
+ if e1_end == -1 or e2 == -1:
1393
+ break
1394
+ out = out[:e1] + out[e2 + len("</error>"):]
1395
+
1396
+ # Drop optional <rough> wrapper
1397
+ return out.replace("<rough>", "").replace("</rough>", "").strip()
775
1398
 
776
- code = strip_describe_slice(code)
777
- code = drop_bad_classification_metrics(code, df)
778
- return code.strip()
779
-
780
1399
 
781
1400
  def run(self):
782
1401
  url = f"http://{self.host}:{self.port}/"
783
1402
  webbrowser.open(url)
784
1403
  self.app.run(host=self.host, port=self.port, debug=False)
785
-