syntaxmatrix 1.4.6__py3-none-any.whl → 2.5.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syntaxmatrix/__init__.py +13 -8
- syntaxmatrix/agentic/__init__.py +0 -0
- syntaxmatrix/agentic/agent_tools.py +24 -0
- syntaxmatrix/agentic/agents.py +810 -0
- syntaxmatrix/agentic/code_tools_registry.py +37 -0
- syntaxmatrix/agentic/model_templates.py +1790 -0
- syntaxmatrix/auth.py +308 -14
- syntaxmatrix/commentary.py +328 -0
- syntaxmatrix/core.py +993 -375
- syntaxmatrix/dataset_preprocessing.py +218 -0
- syntaxmatrix/db.py +92 -95
- syntaxmatrix/display.py +95 -121
- syntaxmatrix/generate_page.py +634 -0
- syntaxmatrix/gpt_models_latest.py +46 -0
- syntaxmatrix/history_store.py +26 -29
- syntaxmatrix/kernel_manager.py +96 -17
- syntaxmatrix/llm_store.py +1 -1
- syntaxmatrix/plottings.py +6 -0
- syntaxmatrix/profiles.py +64 -8
- syntaxmatrix/project_root.py +55 -43
- syntaxmatrix/routes.py +5072 -1398
- syntaxmatrix/session.py +19 -0
- syntaxmatrix/settings/logging.py +40 -0
- syntaxmatrix/settings/model_map.py +300 -33
- syntaxmatrix/settings/prompts.py +273 -62
- syntaxmatrix/settings/string_navbar.py +3 -3
- syntaxmatrix/static/docs.md +272 -0
- syntaxmatrix/static/icons/favicon.png +0 -0
- syntaxmatrix/static/icons/hero_bg.jpg +0 -0
- syntaxmatrix/templates/dashboard.html +608 -147
- syntaxmatrix/templates/docs.html +71 -0
- syntaxmatrix/templates/error.html +2 -3
- syntaxmatrix/templates/login.html +1 -0
- syntaxmatrix/templates/register.html +1 -0
- syntaxmatrix/ui_modes.py +14 -0
- syntaxmatrix/utils.py +2482 -159
- syntaxmatrix/vectorizer.py +16 -12
- {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/METADATA +20 -17
- syntaxmatrix-2.5.5.4.dist-info/RECORD +68 -0
- syntaxmatrix/model_templates.py +0 -30
- syntaxmatrix/static/icons/favicon.ico +0 -0
- syntaxmatrix-1.4.6.dist-info/RECORD +0 -54
- {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/WHEEL +0 -0
- {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/licenses/LICENSE.txt +0 -0
- {syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/top_level.txt +0 -0
syntaxmatrix/core.py
CHANGED
|
@@ -1,102 +1,134 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
import
|
|
2
|
+
import ast
|
|
3
|
+
import textwrap
|
|
4
|
+
import os, webbrowser, uuid, secrets, re
|
|
3
5
|
|
|
4
|
-
from flask import Flask, session, request, has_request_context
|
|
5
|
-
from .
|
|
6
|
+
from flask import Flask, Response, session, request, has_request_context
|
|
7
|
+
from syntaxmatrix.agentic.agents import mlearning_agent
|
|
8
|
+
from syntaxmatrix.history_store import SQLHistoryStore as Store, PersistentHistoryStore as _Store
|
|
6
9
|
from collections import OrderedDict
|
|
7
10
|
from syntaxmatrix.llm_store import save_embed_model, load_embed_model, delete_embed_key
|
|
8
11
|
from . import db, routes
|
|
9
12
|
from .themes import DEFAULT_THEMES
|
|
10
|
-
from .
|
|
13
|
+
from .ui_modes import UI_MODES
|
|
14
|
+
from .plottings import render_plotly, pyplot, describe_plotly
|
|
11
15
|
from .file_processor import process_admin_pdf_files
|
|
12
|
-
from google import
|
|
13
|
-
from openai import OpenAI
|
|
16
|
+
from google.genai import types
|
|
14
17
|
from .vector_db import query_embeddings
|
|
15
18
|
from .vectorizer import embed_text
|
|
16
|
-
from syntaxmatrix.settings.prompts import
|
|
17
|
-
from typing import List
|
|
19
|
+
from syntaxmatrix.settings.prompts import SMXAI_CHAT_ID, SMXAI_CHAT_INSTRUCTIONS, SMXAI_WEBSITE_DESCRIPTION
|
|
20
|
+
from typing import List, Generator
|
|
18
21
|
from .auth import init_auth_db
|
|
19
|
-
from . import profiles as
|
|
20
|
-
from syntaxmatrix.utils import strip_describe_slice, drop_bad_classification_metrics
|
|
22
|
+
from . import profiles as _prof
|
|
21
23
|
from syntaxmatrix.smiv import SMIV
|
|
22
24
|
from .project_root import detect_project_root
|
|
25
|
+
from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
|
|
23
26
|
from dotenv import load_dotenv
|
|
27
|
+
from html import unescape
|
|
28
|
+
from .plottings import render_plotly, pyplot, describe_plotly, describe_matplotlib
|
|
29
|
+
from threading import RLock
|
|
30
|
+
from syntaxmatrix.settings.model_map import GPT_MODELS_LATEST
|
|
24
31
|
|
|
25
32
|
|
|
26
33
|
# ──────── framework‐local storage paths ────────
|
|
27
34
|
# this ensures the key & data always live under the package dir,
|
|
28
35
|
# regardless of where the developer `cd` into before launching.
|
|
29
36
|
_CLIENT_DIR = detect_project_root()
|
|
30
|
-
_HISTORY_DIR = os.path.join(_CLIENT_DIR, "
|
|
37
|
+
_HISTORY_DIR = os.path.join(_CLIENT_DIR, "smx_history")
|
|
31
38
|
os.makedirs(_HISTORY_DIR, exist_ok=True)
|
|
32
|
-
_SECRET_PATH = os.path.join(_CLIENT_DIR, "data", ".smx_secret_key")
|
|
33
39
|
|
|
34
|
-
|
|
40
|
+
_SECRET_PATH = os.path.join(_CLIENT_DIR, ".smx_secret_key")
|
|
35
41
|
|
|
36
|
-
|
|
37
|
-
|
|
42
|
+
_CLIENT_DOTENV_PATH = os.path.join(str(_CLIENT_DIR.parent), ".env")
|
|
43
|
+
if os.path.isfile(_CLIENT_DOTENV_PATH):
|
|
44
|
+
load_dotenv(_CLIENT_DOTENV_PATH, override=True)
|
|
45
|
+
|
|
46
|
+
_ICONS_PATH = os.path.join(_CLIENT_DIR, "static", "icons")
|
|
47
|
+
os.makedirs(_ICONS_PATH, exist_ok=True)
|
|
38
48
|
|
|
39
49
|
EDA_OUTPUT = {} # global buffer for EDA output by session
|
|
40
50
|
|
|
41
51
|
class SyntaxMUI:
|
|
42
|
-
def __init__(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
port="5050",
|
|
52
|
+
def __init__(self,
|
|
53
|
+
host="127.0.0.1",
|
|
54
|
+
port="5080",
|
|
46
55
|
user_icon="👩🏿🦲",
|
|
47
|
-
bot_icon=
|
|
48
|
-
favicon=
|
|
49
|
-
site_logo=
|
|
56
|
+
bot_icon="<img src='/static/icons/favicon.png' width=20' alt='bot'/>",
|
|
57
|
+
favicon="/static/icons/favicon.png",
|
|
58
|
+
site_logo="<img src='/static/icons/logo.png' width='30' alt='logo'/>",
|
|
50
59
|
site_title="SyntaxMatrix",
|
|
51
|
-
|
|
52
|
-
theme_name="light"
|
|
60
|
+
project_name="smxAI",
|
|
61
|
+
theme_name="light",
|
|
62
|
+
ui_mode = "default"
|
|
53
63
|
):
|
|
54
|
-
self.app = Flask(__name__)
|
|
55
|
-
self.get_app_secrete()
|
|
64
|
+
self.app = Flask(__name__)
|
|
56
65
|
self.host = host
|
|
57
66
|
self.port = port
|
|
67
|
+
|
|
68
|
+
self.get_app_secrete()
|
|
58
69
|
self.user_icon = user_icon
|
|
59
70
|
self.bot_icon = bot_icon
|
|
71
|
+
self.site_logo = site_logo
|
|
60
72
|
self.favicon = favicon
|
|
61
73
|
self.site_title = site_title
|
|
62
|
-
self.
|
|
63
|
-
self.
|
|
64
|
-
self.ui_mode = "default"
|
|
74
|
+
self.project_name = project_name
|
|
75
|
+
self.ui_mode = ui_mode
|
|
65
76
|
self.theme_toggle_enabled = False
|
|
66
|
-
self.
|
|
67
|
-
self.
|
|
77
|
+
self.user_files_enabled = False
|
|
78
|
+
self.smxai_identity = SMXAI_CHAT_ID
|
|
79
|
+
self.smxai_instructions = SMXAI_CHAT_INSTRUCTIONS
|
|
80
|
+
self.website_description = SMXAI_WEBSITE_DESCRIPTION
|
|
81
|
+
self._eda_output = {} # {chat_id: html}
|
|
82
|
+
self._eda_lock = RLock()
|
|
83
|
+
|
|
68
84
|
db.init_db()
|
|
69
85
|
self.page = ""
|
|
70
86
|
self.pages = db.get_pages()
|
|
71
87
|
init_auth_db()
|
|
88
|
+
|
|
72
89
|
self.widgets = OrderedDict()
|
|
73
90
|
self.theme = DEFAULT_THEMES.get(theme_name, DEFAULT_THEMES["light"])
|
|
74
91
|
self.system_output_buffer = "" # Ephemeral buffer initialized
|
|
75
92
|
self.app_token = str(uuid.uuid4()) # NEW: Unique token for each app launch.
|
|
76
93
|
self.admin_pdf_chunks = {} # In-memory store for admin PDF chunks
|
|
77
94
|
self.user_file_chunks = {} # In-memory store of user‑uploaded chunks, scoped per chat session
|
|
95
|
+
|
|
96
|
+
self._last_llm_usage = None
|
|
78
97
|
routes.setup_routes(self)
|
|
79
98
|
|
|
80
|
-
self.
|
|
81
|
-
self.
|
|
82
|
-
self.
|
|
83
|
-
self.
|
|
84
|
-
self.
|
|
99
|
+
self._admin_profile = {}
|
|
100
|
+
self._chat_profile = {}
|
|
101
|
+
self._coding_profile = {}
|
|
102
|
+
self._classification_profile = {}
|
|
103
|
+
self._summarization_profile = {}
|
|
104
|
+
|
|
105
|
+
self._gpt_models_latest_prev_resp_ids = {}
|
|
106
|
+
self.is_streaming = False
|
|
107
|
+
self.stream_args = {}
|
|
108
|
+
|
|
109
|
+
self._recent_visual_summaries = []
|
|
110
|
+
|
|
111
|
+
self.placeholder = ""
|
|
85
112
|
|
|
113
|
+
@staticmethod
|
|
86
114
|
def init_app(app):
|
|
87
|
-
import
|
|
115
|
+
import secrets
|
|
88
116
|
if not app.secret_key:
|
|
89
|
-
app.secret_key = secrets.token_urlsafe(32)
|
|
90
|
-
|
|
117
|
+
app.secret_key = secrets.token_urlsafe(32)
|
|
91
118
|
|
|
92
|
-
def get_app_secrete(self):
|
|
119
|
+
def get_app_secrete(self):
|
|
93
120
|
if os.path.exists(_SECRET_PATH):
|
|
94
121
|
self.app.secret_key = open(_SECRET_PATH, "r", encoding="utf-8").read().strip()
|
|
95
122
|
else:
|
|
96
123
|
new_key = secrets.token_urlsafe(32)
|
|
97
|
-
open(_SECRET_PATH, "w", encoding="utf-8")
|
|
124
|
+
with open(_SECRET_PATH, "w", encoding="utf-8") as f:
|
|
125
|
+
f.write(new_key)
|
|
126
|
+
try:
|
|
127
|
+
os.chmod(_SECRET_PATH, 0o600)
|
|
128
|
+
except Exception:
|
|
129
|
+
pass
|
|
98
130
|
self.app.secret_key = new_key
|
|
99
|
-
|
|
131
|
+
|
|
100
132
|
|
|
101
133
|
def _get_visual_context(self):
|
|
102
134
|
"""Return the concatenated summaries for prompt injection."""
|
|
@@ -105,15 +137,75 @@ class SyntaxMUI:
|
|
|
105
137
|
joined = "\n• " + "\n• ".join(self._recent_visual_summaries)
|
|
106
138
|
return f"\n\nRecent visualizations:{joined}"
|
|
107
139
|
|
|
140
|
+
# add to class
|
|
141
|
+
def _add_visual_summary(self, summary: str) -> None:
|
|
142
|
+
if not summary:
|
|
143
|
+
return
|
|
144
|
+
if not hasattr(self, "_recent_visual_summaries"):
|
|
145
|
+
self._recent_visual_summaries = []
|
|
146
|
+
# keep last 6
|
|
147
|
+
self._recent_visual_summaries = (self._recent_visual_summaries + [summary])[-6:]
|
|
108
148
|
|
|
109
149
|
def set_plottings(self, fig_or_html, note=None):
|
|
110
|
-
|
|
150
|
+
# prefer current chat id; fall back to per-browser sid; finally "default"
|
|
151
|
+
sid = self.get_session_id() or self._sid() or "default"
|
|
152
|
+
|
|
153
|
+
# Clear for this session if empty/falsy
|
|
111
154
|
if not fig_or_html or (isinstance(fig_or_html, str) and fig_or_html.strip() == ""):
|
|
112
|
-
|
|
155
|
+
with self._eda_lock:
|
|
156
|
+
self._eda_output.pop(sid, None)
|
|
113
157
|
return
|
|
114
158
|
|
|
115
159
|
html = None
|
|
116
160
|
|
|
161
|
+
# ---- Plotly Figure support ----
|
|
162
|
+
try:
|
|
163
|
+
import plotly.graph_objs as go
|
|
164
|
+
if isinstance(fig_or_html, go.Figure):
|
|
165
|
+
html = fig_or_html.to_html(full_html=False)
|
|
166
|
+
except ImportError:
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
# ---- Matplotlib Figure support ----
|
|
170
|
+
if html is None and hasattr(fig_or_html, "savefig"):
|
|
171
|
+
html = pyplot(fig_or_html)
|
|
172
|
+
|
|
173
|
+
# ---- Bytes (PNG etc.) support ----
|
|
174
|
+
if html is None and isinstance(fig_or_html, bytes):
|
|
175
|
+
import base64
|
|
176
|
+
img_b64 = base64.b64encode(fig_or_html).decode()
|
|
177
|
+
html = f"<img src='data:image/png;base64,{img_b64}'/>"
|
|
178
|
+
|
|
179
|
+
# ---- HTML string support ----
|
|
180
|
+
if html is None and isinstance(fig_or_html, str):
|
|
181
|
+
html = fig_or_html
|
|
182
|
+
|
|
183
|
+
if html is None:
|
|
184
|
+
raise TypeError("Unsupported object type for plotting.")
|
|
185
|
+
|
|
186
|
+
if note:
|
|
187
|
+
html += f"<div style='margin-top:10px; text-align:center; color:#888;'><strong>{note}</strong></div>"
|
|
188
|
+
|
|
189
|
+
wrapper = f'''
|
|
190
|
+
<div style="
|
|
191
|
+
position:relative; max-width:650px; margin:30px auto 20px auto;
|
|
192
|
+
padding:20px 28px 10px 28px; background:#fffefc;
|
|
193
|
+
border:2px solid #2da1da38; border-radius:16px;
|
|
194
|
+
box-shadow:0 3px 18px rgba(90,130,230,0.06); min-height:40px;">
|
|
195
|
+
<button id="eda-close-btn" onclick="closeEdaPanel()" style="
|
|
196
|
+
position: absolute; top: 20px; right: 12px;
|
|
197
|
+
font-size: 1.25em; background: transparent;
|
|
198
|
+
border: none; color: #888; cursor: pointer;
|
|
199
|
+
z-index: 2; transition: color 0.2s;">×</button>
|
|
200
|
+
{html}
|
|
201
|
+
</div>
|
|
202
|
+
'''
|
|
203
|
+
|
|
204
|
+
with self._eda_lock:
|
|
205
|
+
self._eda_output[sid] = wrapper
|
|
206
|
+
|
|
207
|
+
html = None
|
|
208
|
+
|
|
117
209
|
# ---- Plotly Figure support ----
|
|
118
210
|
try:
|
|
119
211
|
import plotly.graph_objs as go
|
|
@@ -160,8 +252,9 @@ class SyntaxMUI:
|
|
|
160
252
|
|
|
161
253
|
|
|
162
254
|
def get_plottings(self):
|
|
163
|
-
sid =
|
|
164
|
-
|
|
255
|
+
sid = self.get_session_id() or self._sid() or "default"
|
|
256
|
+
with self._eda_lock:
|
|
257
|
+
return self._eda_output.get(sid, "")
|
|
165
258
|
|
|
166
259
|
|
|
167
260
|
def load_sys_chunks(self, directory: str = "uploads/sys"):
|
|
@@ -188,109 +281,90 @@ class SyntaxMUI:
|
|
|
188
281
|
|
|
189
282
|
|
|
190
283
|
def set_ui_mode(self, mode):
|
|
191
|
-
if mode not in ["default", "card", "bubble", "smx"]:
|
|
284
|
+
if mode not in self.get_ui_modes(): # ["default", "card", "bubble", "smx"]:
|
|
192
285
|
raise ValueError("UI mode must be one of: 'default', 'card', 'bubble', 'smx'.")
|
|
193
286
|
self.ui_mode = mode
|
|
194
287
|
|
|
195
|
-
|
|
196
288
|
@staticmethod
|
|
197
|
-
def
|
|
198
|
-
return
|
|
289
|
+
def get_ui_modes():
|
|
290
|
+
return list(UI_MODES.keys())
|
|
291
|
+
# return "default", "card", "bubble", "smx"
|
|
199
292
|
|
|
200
|
-
|
|
201
293
|
@staticmethod
|
|
202
|
-
def
|
|
294
|
+
def get_themes():
|
|
203
295
|
return list(DEFAULT_THEMES.keys())
|
|
204
|
-
|
|
205
296
|
|
|
206
|
-
|
|
297
|
+
|
|
298
|
+
def set_theme(self, theme_name, theme=None):
|
|
207
299
|
if theme_name in DEFAULT_THEMES:
|
|
208
300
|
self.theme = DEFAULT_THEMES[theme_name]
|
|
209
301
|
elif isinstance(theme, dict):
|
|
210
|
-
self.theme["custom"] = theme
|
|
211
302
|
DEFAULT_THEMES[theme_name] = theme
|
|
303
|
+
self.theme = DEFAULT_THEMES[theme_name]
|
|
212
304
|
else:
|
|
213
305
|
self.theme = DEFAULT_THEMES["light"]
|
|
214
|
-
|
|
215
|
-
|
|
306
|
+
self.error("Theme must be 'light', 'dark', or a custom dict.")
|
|
216
307
|
|
|
308
|
+
|
|
217
309
|
def enable_theme_toggle(self):
|
|
218
|
-
self.theme_toggle_enabled = True
|
|
310
|
+
self.theme_toggle_enabled = True
|
|
219
311
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
self.theme_toggle_enabled = False
|
|
312
|
+
def enable_user_files(self):
|
|
313
|
+
self.user_files_enabled = True
|
|
223
314
|
|
|
224
|
-
|
|
225
|
-
def columns(
|
|
315
|
+
@staticmethod
|
|
316
|
+
def columns(components):
|
|
226
317
|
col_html = "<div style='display:flex; gap:10px;'>"
|
|
227
318
|
for comp in components:
|
|
228
319
|
col_html += f"<div style='flex:1;'>{comp}</div>"
|
|
229
320
|
col_html += "</div>"
|
|
230
321
|
return col_html
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
def set_favicon(self, icon):
|
|
234
|
-
self.favicon = icon
|
|
235
|
-
|
|
236
322
|
|
|
237
323
|
def set_site_title(self, title):
|
|
238
324
|
self.site_title = title
|
|
239
325
|
|
|
326
|
+
def set_project_name(self, project_name):
|
|
327
|
+
self.project_name = project_name
|
|
328
|
+
|
|
329
|
+
def set_favicon(self, icon):
|
|
330
|
+
self.favicon = icon
|
|
240
331
|
|
|
241
332
|
def set_site_logo(self, logo):
|
|
242
333
|
self.site_logo = logo
|
|
243
334
|
|
|
244
|
-
|
|
245
|
-
def set_project_title(self, project_title):
|
|
246
|
-
self.project_title = project_title
|
|
247
|
-
|
|
248
|
-
|
|
249
335
|
def set_user_icon(self, icon):
|
|
250
336
|
self.user_icon = icon
|
|
251
337
|
|
|
252
|
-
|
|
253
338
|
def set_bot_icon(self, icon):
|
|
254
339
|
self.bot_icon = icon
|
|
255
340
|
|
|
256
|
-
|
|
257
|
-
|
|
341
|
+
def text_input(self, key, id, label, placeholder=""):
|
|
342
|
+
if not placeholder:
|
|
343
|
+
placeholder = f"Ask {self.project_name} anything"
|
|
258
344
|
if key not in self.widgets:
|
|
259
|
-
self.widgets[key] = {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
q = session.get(key, default)
|
|
264
|
-
|
|
265
|
-
classifier_profile = prof.get_profile("classifier") or prof.get_profile("chat")
|
|
266
|
-
if not classifier_profile:
|
|
267
|
-
self.error("ERROR: There is no LLM profile set yet.")
|
|
268
|
-
return q, None
|
|
269
|
-
|
|
270
|
-
intent = self._classify_query(q)
|
|
271
|
-
return q, intent
|
|
272
|
-
|
|
345
|
+
self.widgets[key] = {
|
|
346
|
+
"type": "text_input", "key": key, "id": id,
|
|
347
|
+
"label": label, "placeholder": placeholder
|
|
348
|
+
}
|
|
273
349
|
|
|
274
350
|
def clear_text_input_value(self, key):
|
|
275
351
|
session[key] = ""
|
|
276
352
|
session.modified = True
|
|
277
353
|
|
|
278
354
|
|
|
279
|
-
def button(self, key, label, callback
|
|
355
|
+
def button(self, key, id, label, callback, stream=False):
|
|
356
|
+
if stream == True:
|
|
357
|
+
self.is_streaming = True
|
|
280
358
|
self.widgets[key] = {
|
|
281
|
-
"type": "button", "key": key,
|
|
282
|
-
"label": label, "callback": callback,
|
|
283
|
-
"stream": stream
|
|
359
|
+
"type": "button", "key": key, "id": id, "label": label, "callback": callback, "stream":stream
|
|
284
360
|
}
|
|
285
361
|
|
|
286
|
-
|
|
287
|
-
def file_uploader(self, key, label, accept_multiple_files=False, callback=None):
|
|
362
|
+
def file_uploader(self, key, id, label, accept_multiple_files):
|
|
288
363
|
if key not in self.widgets:
|
|
289
364
|
self.widgets[key] = {
|
|
290
365
|
"type": "file_upload",
|
|
291
|
-
"key": key, "label": label,
|
|
366
|
+
"key": key, "id":id, "label": label,
|
|
292
367
|
"accept_multiple": accept_multiple_files,
|
|
293
|
-
"callback": callback
|
|
294
368
|
}
|
|
295
369
|
|
|
296
370
|
|
|
@@ -326,57 +400,44 @@ class SyntaxMUI:
|
|
|
326
400
|
session.modified = True
|
|
327
401
|
return sid
|
|
328
402
|
|
|
329
|
-
|
|
330
403
|
def get_chat_history(self) -> list[tuple[str, str]]:
|
|
331
|
-
#
|
|
404
|
+
# Load the history for the _current_ chat session
|
|
332
405
|
sid = self._sid()
|
|
333
406
|
cid = self.get_session_id()
|
|
407
|
+
if session.get("user_id"):
|
|
408
|
+
# Logged-in: use SQLHistoryStore (Store). Locking handled inside history_store.py
|
|
409
|
+
return Store.load(str(session["user_id"]), cid)
|
|
410
|
+
# Anonymous: use PersistentHistoryStore (_Store) JSON files
|
|
334
411
|
return _Store.load(sid, cid)
|
|
335
|
-
|
|
412
|
+
|
|
336
413
|
|
|
337
414
|
def set_chat_history(self, history: list[tuple[str, str]], *, max_items: int | None = None) -> list[tuple[str, str]]:
|
|
338
415
|
sid = self._sid()
|
|
339
416
|
cid = self.get_session_id()
|
|
340
|
-
_Store.save(sid, cid, history)
|
|
341
|
-
session["chat_history"] = history[-30:] # still mirror a thin copy into Flask’s session cookie for the UI
|
|
342
|
-
session.modified = True
|
|
343
|
-
|
|
344
417
|
if session.get("user_id"):
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
#
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
return history if max_items is None else history[-max_items:]
|
|
418
|
+
# Logged-in: chats.db via Store (SQLHistoryStore)
|
|
419
|
+
Store.save(str(session["user_id"]), cid, history)
|
|
420
|
+
else:
|
|
421
|
+
# Anonymous: file-backed via _Store (PersistentHistoryStore)
|
|
422
|
+
_Store.save(sid, cid, history)
|
|
352
423
|
|
|
353
424
|
|
|
354
425
|
def clear_chat_history(self):
|
|
355
|
-
"""
|
|
356
|
-
Clear both the UI slice *and* the server-side history bucket
|
|
357
|
-
for this session_id + chat_id.
|
|
358
|
-
"""
|
|
359
426
|
if has_request_context():
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
{**s, "history": []} if s.get("id") == cid else s
|
|
374
|
-
for s in session["past_sessions"]
|
|
375
|
-
]
|
|
427
|
+
sid = self._sid()
|
|
428
|
+
cid = self.get_session_id()
|
|
429
|
+
|
|
430
|
+
# delete the chat from the correct backend (DB for logged-in, file for anonymous)
|
|
431
|
+
if session.get("user_id"):
|
|
432
|
+
Store.delete(session["user_id"], cid)
|
|
433
|
+
else:
|
|
434
|
+
_Store.delete(sid, cid)
|
|
435
|
+
|
|
436
|
+
# rotate to a fresh empty chat (session remains metadata-only)
|
|
437
|
+
new_cid = str(uuid.uuid4())
|
|
438
|
+
session["current_session"] = {"id": new_cid, "title": "Current"}
|
|
439
|
+
session["active_chat_id"] = new_cid
|
|
376
440
|
session.modified = True
|
|
377
|
-
else:
|
|
378
|
-
self._fallback_chat_history = []
|
|
379
|
-
|
|
380
441
|
|
|
381
442
|
def bot_message(self, content, max_length=20):
|
|
382
443
|
history = self.get_chat_history()
|
|
@@ -403,19 +464,14 @@ class SyntaxMUI:
|
|
|
403
464
|
def write(self, content):
|
|
404
465
|
self.bot_message(content)
|
|
405
466
|
|
|
467
|
+
def stream_write(self, chunk: str, end=False):
|
|
468
|
+
"""Push a token to the SSE queue and, when end=True,
|
|
469
|
+
persist the whole thing to chat_history."""
|
|
470
|
+
from .routes import _stream_q
|
|
471
|
+
_stream_q.put(chunk) # live update
|
|
472
|
+
if end: # final flush → history
|
|
473
|
+
self.bot_message(chunk) # persists the final message
|
|
406
474
|
|
|
407
|
-
def markdown(self, md_text):
|
|
408
|
-
try:
|
|
409
|
-
import markdown
|
|
410
|
-
html = markdown.markdown(md_text)
|
|
411
|
-
except ImportError:
|
|
412
|
-
html = md_text
|
|
413
|
-
self.write(html)
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
def latex(self, math_text):
|
|
417
|
-
self.write(f"\\({math_text}\\)")
|
|
418
|
-
|
|
419
475
|
|
|
420
476
|
def error(self, content):
|
|
421
477
|
self.bot_message(f'<div style="color:red; font-weight:bold;">{content}</div>')
|
|
@@ -432,12 +488,15 @@ class SyntaxMUI:
|
|
|
432
488
|
def info(self, content):
|
|
433
489
|
self.bot_message(f'<div style="color:blue;">{content}</div>')
|
|
434
490
|
|
|
435
|
-
|
|
491
|
+
|
|
436
492
|
def get_session_id(self):
|
|
437
|
-
"""Return
|
|
493
|
+
"""Return the chat id that is currently *active* in the UI."""
|
|
494
|
+
# Prefer a sticky id set by /load_session or when a new chat is started.
|
|
495
|
+
sticky = session.get("active_chat_id")
|
|
496
|
+
if sticky:
|
|
497
|
+
return sticky
|
|
438
498
|
return session.get("current_session", {}).get("id")
|
|
439
499
|
|
|
440
|
-
|
|
441
500
|
def add_user_chunks(self, session_id, chunks):
|
|
442
501
|
"""Append these text‐chunks under that session’s key."""
|
|
443
502
|
self.user_file_chunks.setdefault(session_id, []).extend(chunks)
|
|
@@ -451,32 +510,25 @@ class SyntaxMUI:
|
|
|
451
510
|
def clear_user_chunks(self, session_id):
|
|
452
511
|
"""Remove all stored chunks for a session (on chat‑clear or delete)."""
|
|
453
512
|
self.user_file_chunks.pop(session_id, None)
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
def stream_write(self, chunk: str, end=False):
|
|
457
|
-
"""Push a token to the SSE queue and, when end=True,
|
|
458
|
-
persist the whole thing to chat_history."""
|
|
459
|
-
from .routes import _stream_q
|
|
460
|
-
_stream_q.put(chunk) # live update
|
|
461
|
-
if end: # final flush → history
|
|
462
|
-
self.bot_message(chunk) # persists the final message
|
|
463
|
-
|
|
464
513
|
|
|
465
514
|
# ──────────────────────────────────────────────────────────────
|
|
466
515
|
# *********** LLM CLIENT HELPERS **********************
|
|
467
516
|
# ──────────────────────────────────────────────────────────────
|
|
468
517
|
def set_prompt_profile(self, profile):
|
|
469
|
-
self.
|
|
518
|
+
self.ai_chat_id = profile
|
|
470
519
|
|
|
471
520
|
|
|
472
521
|
def set_prompt_instructions(self, instructions):
|
|
473
|
-
self.
|
|
474
|
-
|
|
522
|
+
self.ai_chat_instructions = instructions
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def set_website_description(self, desc):
|
|
526
|
+
self.website_description = desc
|
|
527
|
+
|
|
475
528
|
|
|
476
529
|
def embed_query(self, q):
|
|
477
530
|
return embed_text(q)
|
|
478
531
|
|
|
479
|
-
|
|
480
532
|
def smiv_index(self, sid):
|
|
481
533
|
chunks = self.get_user_chunks(sid) or []
|
|
482
534
|
count = len(chunks)
|
|
@@ -502,7 +554,6 @@ class SyntaxMUI:
|
|
|
502
554
|
self._user_index_counts[sid] = count
|
|
503
555
|
return self._user_indices[sid]
|
|
504
556
|
|
|
505
|
-
|
|
506
557
|
def load_embed_model(self):
|
|
507
558
|
client = load_embed_model()
|
|
508
559
|
os.environ["PROVIDER"] = client["provider"]
|
|
@@ -510,276 +561,843 @@ class SyntaxMUI:
|
|
|
510
561
|
os.environ["OPENAI_API_KEY"] = client["api_key"]
|
|
511
562
|
return client
|
|
512
563
|
|
|
513
|
-
|
|
514
564
|
def save_embed_model(self, provider:str, model:str, api_key:str):
|
|
515
565
|
return save_embed_model(provider, model, api_key)
|
|
516
566
|
|
|
517
|
-
|
|
518
567
|
def delete_embed_key(self):
|
|
519
568
|
return delete_embed_key()
|
|
520
569
|
|
|
521
570
|
|
|
522
|
-
def
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
return OpenAI(api_key=api_key)
|
|
528
|
-
elif provider == "google":
|
|
529
|
-
# return OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
|
530
|
-
return genai.Client(api_key=api_key)
|
|
531
|
-
elif provider == "xai":
|
|
532
|
-
return OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
|
|
533
|
-
elif provider == "deepseek":
|
|
534
|
-
return OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
|
|
535
|
-
elif provider == "moonshotai":
|
|
536
|
-
return OpenAI(api_key=api_key, base_url="https://api.moonshot.ai/v1")
|
|
571
|
+
def get_gpt_models_latest(self):
|
|
572
|
+
return GPT_MODELS_LATEST
|
|
573
|
+
|
|
574
|
+
def get_text_input_value(self, key, default=""):
|
|
575
|
+
q = session.get(key, default)
|
|
537
576
|
|
|
577
|
+
intent = self.classify_query_intent(q)
|
|
578
|
+
intent = intent.strip().lower() if intent else ""
|
|
579
|
+
if intent not in {"none","user_docs","system_docs","hybrid"}:
|
|
580
|
+
self.error("Classify agency error")
|
|
581
|
+
return q, None
|
|
582
|
+
return q, intent
|
|
583
|
+
|
|
584
|
+
def enable_stream(self):
|
|
585
|
+
self.is_streaming = True
|
|
586
|
+
|
|
587
|
+
def stream(self):
|
|
588
|
+
return self.is_streaming
|
|
589
|
+
|
|
590
|
+
def get_stream_args(self):
|
|
591
|
+
return self.stream_args
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def classify_query_intent(self, query: str) -> str:
|
|
595
|
+
from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
|
|
596
|
+
|
|
597
|
+
if not self._classification_profile:
|
|
598
|
+
classification_profile = _prof.get_profile('classification') or _prof.get_profile('chat') or _prof.get_profile('admin')
|
|
599
|
+
if not classification_profile:
|
|
600
|
+
return {"Error": "Set a profile for Classification"}
|
|
601
|
+
self._classification_profile = classification_profile
|
|
602
|
+
self._classification_profile['client'] = _prof.get_client(classification_profile)
|
|
603
|
+
|
|
604
|
+
_client = self._classification_profile['client']
|
|
605
|
+
_provider = self._classification_profile['provider']
|
|
606
|
+
_model = self._classification_profile['model']
|
|
607
|
+
|
|
608
|
+
# New instruction format with hybrid option
|
|
609
|
+
_intent_profile = "You are an intent classifier. Respond ONLY with the intent name."
|
|
610
|
+
_instructions = f"""
|
|
611
|
+
Classify the given query into ONE of these intents You must return ONLY the intent name with no comment or any preamble:
|
|
612
|
+
- "none": Casual chat/greetings
|
|
613
|
+
- "user_docs": Requires user-uploaded documents
|
|
614
|
+
- "system_docs": Requires company knowledge/docs
|
|
615
|
+
- "hybrid": Requires BOTH user docs AND company docs
|
|
616
|
+
|
|
617
|
+
Examples:
|
|
618
|
+
Query: "Hi there!" → none
|
|
619
|
+
Query: "Explain my uploaded contract" → user_docs
|
|
620
|
+
Query: "What's our refund policy?" → system_docs
|
|
621
|
+
Query: "How does my proposal align with company guidelines?" → hybrid
|
|
622
|
+
Query: "What is the weather today?" → none
|
|
623
|
+
Query: "Cross-reference the customer feedback from my uploaded survey results with our product's feature list in the official documentation." → hybrid
|
|
624
|
+
|
|
625
|
+
Now classify:
|
|
626
|
+
Query: "{query}"
|
|
627
|
+
Intent:
|
|
628
|
+
"""
|
|
629
|
+
openai_sdk_messages = [
|
|
630
|
+
{"role": "system", "content": _intent_profile},
|
|
631
|
+
{"role": "user", "content": _instructions}
|
|
632
|
+
]
|
|
633
|
+
|
|
634
|
+
def google_classify_query():
|
|
635
|
+
response = _client.models.generate_content(
|
|
636
|
+
model=_model,
|
|
637
|
+
contents=f"{_intent_profile}\n{_instructions}\n\n"
|
|
638
|
+
)
|
|
639
|
+
return response.text.strip().lower()
|
|
640
|
+
|
|
641
|
+
def gpt_models_latest_classify_query(reasoning_effort = "medium", verbosity = "low"):
|
|
642
|
+
|
|
643
|
+
args = set_args(
|
|
644
|
+
model=_model,
|
|
645
|
+
instructions=_intent_profile,
|
|
646
|
+
input=_instructions,
|
|
647
|
+
reasoning_effort=reasoning_effort,
|
|
648
|
+
verbosity=verbosity,
|
|
649
|
+
)
|
|
650
|
+
try:
|
|
651
|
+
resp = _client.responses.create(**args)
|
|
652
|
+
answer = _out(resp).strip().lower()
|
|
653
|
+
return answer if answer else ""
|
|
654
|
+
except Exception as e:
|
|
655
|
+
return f"Error!"
|
|
656
|
+
|
|
657
|
+
def anthropic_classify_query():
|
|
658
|
+
try:
|
|
659
|
+
response = _client.messages.create(
|
|
660
|
+
model=_model,
|
|
661
|
+
max_tokens=1024,
|
|
662
|
+
system=_intent_profile,
|
|
663
|
+
messages=[{"role": "user", "content":_instructions}],
|
|
664
|
+
stream=False,
|
|
665
|
+
)
|
|
666
|
+
return response.content[0].text.strip()
|
|
667
|
+
|
|
668
|
+
except Exception as e:
|
|
669
|
+
return f"Error: {str(e)}"
|
|
670
|
+
|
|
671
|
+
def openai_sdk_classify_query():
|
|
672
|
+
try:
|
|
673
|
+
response = _client.chat.completions.create(
|
|
674
|
+
model=_model,
|
|
675
|
+
messages=openai_sdk_messages,
|
|
676
|
+
temperature=0,
|
|
677
|
+
max_tokens=100
|
|
678
|
+
)
|
|
679
|
+
intent = response.choices[0].message.content.strip().lower()
|
|
680
|
+
return intent if intent else ""
|
|
681
|
+
except Exception as e:
|
|
682
|
+
return f"Error!"
|
|
683
|
+
|
|
684
|
+
if _provider == "google":
|
|
685
|
+
intent = google_classify_query()
|
|
686
|
+
return intent
|
|
687
|
+
if _model in self.get_gpt_models_latest():
|
|
688
|
+
intent = gpt_models_latest_classify_query()
|
|
689
|
+
return intent
|
|
690
|
+
if _provider == "anthropic":
|
|
691
|
+
intent = anthropic_classify_query()
|
|
692
|
+
return intent
|
|
693
|
+
else:
|
|
694
|
+
intent = openai_sdk_classify_query()
|
|
695
|
+
return intent
|
|
696
|
+
|
|
538
697
|
|
|
539
|
-
# @staticmethod
|
|
540
698
|
def generate_contextual_title(self, chat_history):
|
|
541
699
|
|
|
542
|
-
if not self.
|
|
543
|
-
|
|
544
|
-
if not
|
|
545
|
-
return
|
|
700
|
+
if not self._summarization_profile:
|
|
701
|
+
summarization_profile = _prof.get_profile('summarization') or _prof.get_profile('chat') or _prof.get_profile('admin')
|
|
702
|
+
if not summarization_profile:
|
|
703
|
+
return {"Error": "Chat profile not set yet."}
|
|
546
704
|
|
|
547
|
-
self.
|
|
548
|
-
self.
|
|
705
|
+
self._summarization_profile = summarization_profile
|
|
706
|
+
self._summarization_profile['client'] = _prof.get_client(summarization_profile)
|
|
549
707
|
|
|
550
708
|
conversation = "\n".join([f"{role}: {msg}" for role, msg in chat_history])
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
client = self.labeller_profile['client']
|
|
558
|
-
model = self.labeller_profile['model']
|
|
709
|
+
_title_profile = "You are a title generator that creates concise and relevant titles for the given conversations."
|
|
710
|
+
_instructions = f"""
|
|
711
|
+
Generate a contextual title (5 short words max) from the given Conversation History
|
|
712
|
+
The title should be concise - with no preamble, relevant, and capture the essence of this Conversation: \n{conversation}.\n\n
|
|
713
|
+
return only the title.
|
|
714
|
+
"""
|
|
559
715
|
|
|
716
|
+
_client = self._summarization_profile['client']
|
|
717
|
+
_provider = self._summarization_profile['provider']
|
|
718
|
+
_model = self._summarization_profile['model']
|
|
719
|
+
|
|
560
720
|
def google_generated_title():
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
721
|
+
try:
|
|
722
|
+
response = _client.models.generate_content(
|
|
723
|
+
model=_model,
|
|
724
|
+
contents=f"{_title_profile}\n{_instructions}"
|
|
725
|
+
)
|
|
726
|
+
return response.text.strip()
|
|
727
|
+
except Exception as e:
|
|
728
|
+
return f"Summary agent error!"
|
|
729
|
+
|
|
730
|
+
def gpt_models_latest_generated_title():
|
|
731
|
+
try:
|
|
732
|
+
args = set_args(
|
|
733
|
+
model=_model,
|
|
734
|
+
instructions=_title_profile,
|
|
735
|
+
input=_instructions,
|
|
736
|
+
# reasoning_effort=reasoning_effort,
|
|
737
|
+
# verbosity=verbosity,
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
resp = _client.responses.create(**args)
|
|
741
|
+
return _out(resp).strip()
|
|
742
|
+
except Exception as e:
|
|
743
|
+
return f"Summary agent error!"
|
|
566
744
|
|
|
745
|
+
def anthropic_generated_title():
|
|
746
|
+
try:
|
|
747
|
+
response = _client.messages.create(
|
|
748
|
+
model=_model,
|
|
749
|
+
max_tokens=50,
|
|
750
|
+
system=_title_profile,
|
|
751
|
+
messages=[{"role": "user", "content":_instructions}],
|
|
752
|
+
stream=False,
|
|
753
|
+
)
|
|
754
|
+
return response.content[0].text.strip()
|
|
755
|
+
except Exception as e:
|
|
756
|
+
return f"Summary agent error!"
|
|
757
|
+
|
|
567
758
|
def openai_sdk_generated_title():
|
|
568
759
|
prompt = [
|
|
569
|
-
{
|
|
570
|
-
|
|
571
|
-
"content": instructions
|
|
572
|
-
},
|
|
760
|
+
{ "role": "system", "content": _title_profile },
|
|
761
|
+
{ "role": "user", "content": _instructions },
|
|
573
762
|
]
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
763
|
+
try:
|
|
764
|
+
response = _client.chat.completions.create(
|
|
765
|
+
model=_model,
|
|
766
|
+
messages=prompt,
|
|
767
|
+
temperature=0.3,
|
|
768
|
+
max_tokens=50
|
|
769
|
+
)
|
|
770
|
+
title = response.choices[0].message.content.strip().lower()
|
|
771
|
+
return title if title else ""
|
|
772
|
+
except Exception as e:
|
|
773
|
+
return f"Summary agent error!"
|
|
584
774
|
|
|
585
|
-
if
|
|
775
|
+
if _provider == "google":
|
|
586
776
|
title = google_generated_title()
|
|
777
|
+
elif _model in self.get_gpt_models_latest():
|
|
778
|
+
title = gpt_models_latest_generated_title()
|
|
779
|
+
elif _provider == "anthropic":
|
|
780
|
+
title = anthropic_generated_title()
|
|
587
781
|
else:
|
|
588
782
|
title = openai_sdk_generated_title()
|
|
589
783
|
return title
|
|
784
|
+
|
|
590
785
|
|
|
786
|
+
def stream_process_query(self, query, context, conversations, sources):
|
|
787
|
+
self.stream_args['query'] = query
|
|
788
|
+
self.stream_args['context'] = context
|
|
789
|
+
self.stream_args['conversations'] = conversations
|
|
790
|
+
self.stream_args['sources'] = sources
|
|
791
|
+
|
|
591
792
|
|
|
592
|
-
def
|
|
793
|
+
def process_query_stream(self, query: str, context: str, history: list, stream=True) -> Generator[str, None, None]:
|
|
794
|
+
|
|
795
|
+
if not self._chat_profile:
|
|
796
|
+
chat_profile = _prof.get_profile("chat") or _prof.get_profile("admin")
|
|
797
|
+
if not chat_profile:
|
|
798
|
+
yield """<p style='color:red;'>Error: Chat profile is not configured. Add a chat profile inside the admin panel or contact your administrator.</p>
|
|
799
|
+
"""
|
|
800
|
+
return None
|
|
801
|
+
self._chat_profile = chat_profile
|
|
802
|
+
self._chat_profile['client'] = _prof.get_client(chat_profile)
|
|
803
|
+
|
|
804
|
+
_provider = self._chat_profile['provider']
|
|
805
|
+
_client = self._chat_profile['client']
|
|
806
|
+
_model = self._chat_profile['model']
|
|
807
|
+
|
|
808
|
+
_contents = f"""
|
|
809
|
+
{self.smxai_instructions}\n\n
|
|
810
|
+
Question: {query}\n
|
|
811
|
+
Context: {context}\n\n
|
|
812
|
+
History: {history}\n\n
|
|
813
|
+
Use conversation continuity if available.
|
|
814
|
+
"""
|
|
593
815
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
816
|
+
try:
|
|
817
|
+
if _provider == "google": # Google, non openai skd series
|
|
818
|
+
|
|
819
|
+
for chunk in _client.models.generate_content_stream(
|
|
820
|
+
model=_model,
|
|
821
|
+
contents=_contents,
|
|
822
|
+
config=types.GenerateContentConfig(
|
|
823
|
+
system_instruction=self.smxai_identity,
|
|
824
|
+
temperature=0.3,
|
|
825
|
+
max_output_tokens=1024,
|
|
826
|
+
),
|
|
827
|
+
):
|
|
828
|
+
|
|
829
|
+
yield chunk.text
|
|
608
830
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
831
|
+
elif _provider == "openai" and _model in self.get_gpt_models_latest(): # GPt 5 series
|
|
832
|
+
input_prompt = (
|
|
833
|
+
f"{self.smxai_instructions}\n\n"
|
|
834
|
+
f"Generate a response to this query:\n{query}\n"
|
|
835
|
+
f"based on this given context:\n{context}\n\n"
|
|
836
|
+
f"(Use conversation continuity if available.)"
|
|
837
|
+
)
|
|
838
|
+
sid = self.get_session_id()
|
|
839
|
+
prev_id = self._gpt_models_latest_prev_resp_ids.get(sid)
|
|
840
|
+
args = set_args(model=_model, instructions=self.smxai_identity, input=input_prompt, previous_id=prev_id, store=True)
|
|
841
|
+
|
|
842
|
+
with _client.responses.stream(**args) as s:
|
|
843
|
+
for event in s:
|
|
844
|
+
if event.type == "response.output_text.delta" and event.delta:
|
|
845
|
+
yield event.delta
|
|
846
|
+
elif event.type == "response.error":
|
|
847
|
+
raise RuntimeError(str(event.error))
|
|
848
|
+
final = s.get_final_response()
|
|
849
|
+
if getattr(final, "id", None):
|
|
850
|
+
self._gpt_models_latest_prev_resp_ids[sid] = final.id
|
|
851
|
+
|
|
852
|
+
elif _provider == "anthropic":
|
|
853
|
+
with _client.messages.stream(
|
|
854
|
+
max_tokens=1024,
|
|
855
|
+
messages=[{"role": "user", "content":f"{self.smxai_identity}\n\n {_contents}"},],
|
|
856
|
+
model=_model,
|
|
857
|
+
) as stream:
|
|
858
|
+
for text in stream.text_stream:
|
|
859
|
+
yield text # end="", flush=True
|
|
860
|
+
|
|
861
|
+
else: # Assumes standard openai_sdk
|
|
862
|
+
openai_sdk_prompt = [
|
|
863
|
+
{"role": "system", "content": self.smxai_identity},
|
|
864
|
+
{"role": "user", "content": f"{self.smxai_instructions}\n\nGenerate response to this query: {query}\nbased on this context:\n{context}\nand history:\n{history}\n\nUse conversation continuity if available.)"},
|
|
865
|
+
]
|
|
866
|
+
response = _client.chat.completions.create(
|
|
867
|
+
model=_model,
|
|
868
|
+
messages=openai_sdk_prompt,
|
|
869
|
+
stream=True,
|
|
870
|
+
)
|
|
871
|
+
for chunk in response:
|
|
872
|
+
token = getattr(chunk.choices[0].delta, "content", "")
|
|
873
|
+
if token:
|
|
874
|
+
yield token
|
|
875
|
+
except Exception as e:
|
|
876
|
+
yield f"Error during streaming: {type(e).__name__}: {e}"
|
|
877
|
+
|
|
658
878
|
def process_query(self, query, context, history, stream=False):
|
|
659
|
-
|
|
660
|
-
if not self.
|
|
661
|
-
chat_profile =
|
|
879
|
+
|
|
880
|
+
if not self._chat_profile:
|
|
881
|
+
chat_profile = _prof.get_profile("chat") or _prof.get_profile("admin")
|
|
662
882
|
if not chat_profile:
|
|
663
|
-
|
|
883
|
+
return """<p style='color:red;'>Error: Chat profile is not configured. Add a chat profile inside the admin panel or contact your administrator.</p>
|
|
884
|
+
"""
|
|
664
885
|
return
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
self.
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
{self.
|
|
886
|
+
|
|
887
|
+
self._chat_profile = chat_profile
|
|
888
|
+
self._chat_profile['client'] = _prof.get_client(chat_profile)
|
|
889
|
+
_provider = self._chat_profile['provider']
|
|
890
|
+
_client = self._chat_profile['client']
|
|
891
|
+
_model = self._chat_profile['model']
|
|
892
|
+
_contents = f"""
|
|
893
|
+
{self.smxai_instructions}\n\n
|
|
673
894
|
Question: {query}\n
|
|
674
|
-
Context: {context}\n
|
|
675
|
-
History: {history}
|
|
895
|
+
Context: {context}\n\n
|
|
896
|
+
History: {history}\n\n
|
|
897
|
+
Use conversation continuity if available.
|
|
676
898
|
"""
|
|
677
899
|
|
|
678
900
|
openai_sdk_prompt = [
|
|
679
|
-
{"role": "system", "content": self.
|
|
680
|
-
{"role": "user",
|
|
681
|
-
|
|
682
|
-
|
|
901
|
+
{"role": "system", "content": self.smxai_identity},
|
|
902
|
+
{"role": "user", "content": f"""{self.smxai_instructions}\n\n
|
|
903
|
+
Generate response to this query: {query}\n
|
|
904
|
+
based on this context:\n{context}\n
|
|
905
|
+
and history:\n{history}\n\n
|
|
906
|
+
Use conversation continuity if available.)
|
|
907
|
+
"""
|
|
908
|
+
},
|
|
683
909
|
]
|
|
684
910
|
|
|
685
911
|
def google_process_query():
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
912
|
+
try:
|
|
913
|
+
response = _client.models.generate_content(
|
|
914
|
+
model=_model,
|
|
915
|
+
contents=_contents,
|
|
916
|
+
config=types.GenerateContentConfig(
|
|
917
|
+
system_instruction=self.smxai_identity,
|
|
918
|
+
temperature=0.3,
|
|
919
|
+
max_output_tokens=1024,
|
|
920
|
+
),
|
|
921
|
+
)
|
|
922
|
+
answer = response.text
|
|
923
|
+
|
|
924
|
+
# answer = strip_html(answer)
|
|
925
|
+
return answer
|
|
926
|
+
except Exception as e:
|
|
927
|
+
return f"Error: {str(e)}"
|
|
928
|
+
|
|
929
|
+
def gpt_models_latest_process_query(previous_id: str | None, reasoning_effort = "minimal", verbosity = "low"):
|
|
930
|
+
"""
|
|
931
|
+
Returns (answer_text, new_response_id)
|
|
932
|
+
"""
|
|
933
|
+
# Prepare the prompt with conversation history and context
|
|
934
|
+
input = (
|
|
935
|
+
f"{self.smxai_instructions}\n\n"
|
|
936
|
+
f"Generate a response to this query:\n{query}\n"
|
|
937
|
+
f"based on this given context:\n{context}\n\n"
|
|
938
|
+
f"(Use conversation continuity if available.)"
|
|
689
939
|
)
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
940
|
+
|
|
941
|
+
sid = self.get_session_id()
|
|
942
|
+
prev_id = self._gpt_models_latest_prev_resp_ids.get(sid)
|
|
943
|
+
|
|
944
|
+
args = set_args(
|
|
945
|
+
model=_model,
|
|
946
|
+
instructions=self.smxai_identity,
|
|
947
|
+
input=input,
|
|
948
|
+
previous_id=prev_id,
|
|
949
|
+
store=True,
|
|
950
|
+
reasoning_effort=reasoning_effort,
|
|
951
|
+
verbosity=verbosity
|
|
952
|
+
)
|
|
953
|
+
try:
|
|
954
|
+
# Non-stream path
|
|
955
|
+
resp = _client.responses.create(**args)
|
|
956
|
+
answer = _out(resp)
|
|
957
|
+
if getattr(resp, "id", None):
|
|
958
|
+
self._gpt_models_latest_prev_resp_ids[sid] = resp.id
|
|
959
|
+
|
|
960
|
+
# answer = strip_html(answer)
|
|
961
|
+
return answer
|
|
962
|
+
|
|
963
|
+
except Exception as e:
|
|
964
|
+
return f"Error: {type(e).__name__}: {e}"
|
|
965
|
+
|
|
966
|
+
def anthropic_process_query():
|
|
967
|
+
try:
|
|
968
|
+
response = _client.messages.create(
|
|
969
|
+
model=_model,
|
|
970
|
+
max_tokens=1024,
|
|
971
|
+
system=self.self.smxai_identity,
|
|
972
|
+
messages=[{"role": "user", "content":_contents}],
|
|
973
|
+
stream=False,
|
|
974
|
+
)
|
|
975
|
+
return response.content[0].text.strip()
|
|
976
|
+
|
|
977
|
+
except Exception as e:
|
|
978
|
+
return f"Error: {str(e)}"
|
|
979
|
+
|
|
693
980
|
def openai_sdk_process_query():
|
|
694
981
|
|
|
695
982
|
try:
|
|
696
|
-
response =
|
|
697
|
-
model=
|
|
983
|
+
response = _client.chat.completions.create(
|
|
984
|
+
model=_model,
|
|
698
985
|
messages=openai_sdk_prompt,
|
|
699
|
-
|
|
700
|
-
max_tokens=1024,
|
|
701
|
-
stream=stream
|
|
986
|
+
stream=False,
|
|
702
987
|
)
|
|
703
988
|
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
for chunk in response:
|
|
708
|
-
token = getattr(chunk.choices[0].delta, "content", "")
|
|
709
|
-
if not token:
|
|
710
|
-
continue
|
|
711
|
-
parts.append(token)
|
|
712
|
-
self.stream_write(token)
|
|
713
|
-
|
|
714
|
-
self.stream_write("[END]") # close the SSE bubble
|
|
715
|
-
answer = "".join(parts)
|
|
716
|
-
return answer
|
|
717
|
-
else:
|
|
718
|
-
# -------- one-shot buffered --------
|
|
719
|
-
answer = response.choices[0].message.content
|
|
720
|
-
return answer
|
|
989
|
+
# -------- one-shot buffered --------
|
|
990
|
+
answer = response.choices[0].message.content .strip()
|
|
991
|
+
return answer
|
|
721
992
|
except Exception as e:
|
|
722
993
|
return f"Error: {str(e)}"
|
|
723
|
-
|
|
724
|
-
if
|
|
994
|
+
|
|
995
|
+
if _provider == "google":
|
|
725
996
|
return google_process_query()
|
|
726
|
-
|
|
727
|
-
return
|
|
728
|
-
|
|
997
|
+
if _provider == "openai" and _model in self.get_gpt_models_latest():
|
|
998
|
+
return gpt_models_latest_process_query(self._gpt_models_latest_prev_resp_ids.get(self.get_session_id()))
|
|
999
|
+
if _provider == "anthropic":
|
|
1000
|
+
return anthropic_process_query()
|
|
1001
|
+
return openai_sdk_process_query()
|
|
729
1002
|
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
if not self.coder_profile:
|
|
733
|
-
coder_profile = prof.get_profile('coder') or prof.get_profile('chat') or {}
|
|
734
|
-
if not coder_profile:
|
|
735
|
-
return
|
|
1003
|
+
|
|
1004
|
+
def repair_python_cell(self, py_code: str) -> str:
|
|
736
1005
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
Output only the working code needed. Assume df is already defined.
|
|
747
|
-
Produce at least one visible result: (syntaxmatrix.display.show(), display(), plt.show()).
|
|
1006
|
+
_CELL_REPAIR_RULES = """
|
|
1007
|
+
Fix the Python cell to satisfy:
|
|
1008
|
+
- Single valid cell; imports at the top.
|
|
1009
|
+
- Do not import or invoke or use 'python-dotenv' or 'dotenv' because it's not needed.
|
|
1010
|
+
- No top-level statements between if/elif/else branches.
|
|
1011
|
+
- Regression must use either sklearn with train_test_split (then X_test exists) and R^2/MAE/RMSE,
|
|
1012
|
+
or statsmodels OLS. No accuracy_score in regression.
|
|
1013
|
+
- Keep all plotting + savefig + BytesIO + display inside the branch that created the figure.
|
|
1014
|
+
- Return ONLY the corrected cell.
|
|
748
1015
|
"""
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1016
|
+
code = textwrap.dedent(py_code or "").strip()
|
|
1017
|
+
needs_fix = False
|
|
1018
|
+
if re.search(r"\baccuracy_score\b", code) and re.search(r"\bLinearRegression\b|\bOLS\b", code):
|
|
1019
|
+
needs_fix = True
|
|
1020
|
+
if re.search(r"\bX_test\b", code) and not re.search(r"\bX_test\s*=", code):
|
|
1021
|
+
needs_fix = True
|
|
1022
|
+
try:
|
|
1023
|
+
ast.parse(code)
|
|
1024
|
+
except SyntaxError:
|
|
1025
|
+
needs_fix = True
|
|
1026
|
+
if not needs_fix:
|
|
1027
|
+
return code
|
|
1028
|
+
_prompt = f"```python\n{code}\n```"
|
|
1029
|
+
|
|
1030
|
+
repair_profile = _prof.get_profile("vision2text") or _prof.get_profile("admin")
|
|
1031
|
+
if not repair_profile:
|
|
1032
|
+
return (
|
|
1033
|
+
'<div class="smx-alert smx-alert-warn">'
|
|
1034
|
+
'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
|
|
1035
|
+
'Please, add the LLM profile inside the admin panel or contact your Administrator.'
|
|
1036
|
+
'</div>'
|
|
754
1037
|
)
|
|
755
|
-
return response.text
|
|
756
1038
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
1039
|
+
_client = _prof.get_client(repair_profile)
|
|
1040
|
+
_provider = repair_profile['provider'].lower()
|
|
1041
|
+
_model = repair_profile['model']
|
|
1042
|
+
|
|
1043
|
+
#1 Google
|
|
1044
|
+
if _provider == "google":
|
|
1045
|
+
from google.genai import types
|
|
1046
|
+
|
|
1047
|
+
fixed = _client.models.generate_content(
|
|
1048
|
+
model=_model,
|
|
1049
|
+
contents=_prompt,
|
|
1050
|
+
config=types.GenerateContentConfig(
|
|
1051
|
+
system_instruction=_CELL_REPAIR_RULES,
|
|
1052
|
+
temperature=0.8,
|
|
1053
|
+
max_output_tokens=1024,
|
|
1054
|
+
),
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
#2 Openai
|
|
1058
|
+
elif _provider == "openai" and _model in GPT_MODELS_LATEST:
|
|
1059
|
+
|
|
1060
|
+
args = set_args(
|
|
1061
|
+
model=_model,
|
|
1062
|
+
instructions=_CELL_REPAIR_RULES,
|
|
1063
|
+
input=[{"role": "user", "content": _prompt}],
|
|
1064
|
+
previous_id=None,
|
|
1065
|
+
store=False,
|
|
1066
|
+
reasoning_effort="medium",
|
|
1067
|
+
verbosity="medium",
|
|
1068
|
+
)
|
|
1069
|
+
fixed = _out(_client.responses.create(**args))
|
|
1070
|
+
|
|
1071
|
+
# Anthropic
|
|
1072
|
+
elif _provider == "anthropic":
|
|
1073
|
+
|
|
1074
|
+
fixed = _client.messages.create(
|
|
1075
|
+
model=_model,
|
|
1076
|
+
max_tokens=1024,
|
|
1077
|
+
system=_CELL_REPAIR_RULES,
|
|
1078
|
+
messages=[{"role": "user", "content":_prompt}],
|
|
1079
|
+
stream=False,
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
# OpenAI SDK
|
|
1083
|
+
else:
|
|
1084
|
+
fixed = _client.chat.completions.create(
|
|
1085
|
+
model=_model,
|
|
1086
|
+
messages=[
|
|
1087
|
+
{"role": "system", "content":_CELL_REPAIR_RULES},
|
|
1088
|
+
{"role": "user", "content":_prompt},
|
|
1089
|
+
],
|
|
1090
|
+
max_tokens=1024,
|
|
763
1091
|
)
|
|
764
|
-
|
|
1092
|
+
|
|
1093
|
+
try:
|
|
1094
|
+
ast.parse(fixed);
|
|
1095
|
+
return fixed
|
|
1096
|
+
except Exception:
|
|
1097
|
+
return code
|
|
1098
|
+
|
|
1099
|
+
def get_last_llm_usage(self):
|
|
1100
|
+
return getattr(self, "_last_llm_usage", None)
|
|
1101
|
+
|
|
1102
|
+
def ai_generate_code(self, refined_question, tasks, df):
|
|
1103
|
+
|
|
1104
|
+
def normalise_llm_code(s: str) -> str:
|
|
1105
|
+
s = s.replace("\t", " ")
|
|
1106
|
+
s = textwrap.dedent(s)
|
|
1107
|
+
lines = s.splitlines()
|
|
1108
|
+
|
|
1109
|
+
# drop leading blank lines
|
|
1110
|
+
while lines and not lines[0].strip():
|
|
1111
|
+
lines.pop(0)
|
|
1112
|
+
|
|
1113
|
+
# if everything is still indented >=4 spaces, shift left
|
|
1114
|
+
indents = [len(l) - len(l.lstrip(" ")) for l in lines if l.strip()]
|
|
1115
|
+
if indents and min(indents) >= 4:
|
|
1116
|
+
m = min(indents)
|
|
1117
|
+
lines = [l[m:] if len(l) >= m else l for l in lines]
|
|
1118
|
+
|
|
1119
|
+
return "\n".join(lines)
|
|
765
1120
|
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
else:
|
|
769
|
-
code = others_generate_code()
|
|
1121
|
+
CONTEXT = f"Columns: {list(df.columns)}\n\nDtypes: {df.dtypes.astype(str).to_dict()}\n\n"
|
|
1122
|
+
AVAILABLE_COLUMNS = list(df.columns)
|
|
770
1123
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
1124
|
+
# --- SMX: normalise tasks coming from intent agent ---
|
|
1125
|
+
if isinstance(tasks, str):
|
|
1126
|
+
import json, ast, re
|
|
1127
|
+
try:
|
|
1128
|
+
tasks_parsed = json.loads(tasks)
|
|
1129
|
+
except Exception:
|
|
1130
|
+
try:
|
|
1131
|
+
tasks_parsed = ast.literal_eval(tasks)
|
|
1132
|
+
except Exception:
|
|
1133
|
+
tasks_parsed = re.findall(r"[A-Za-z_]+", tasks)
|
|
1134
|
+
tasks = tasks_parsed
|
|
1135
|
+
if not isinstance(tasks, list):
|
|
1136
|
+
tasks = [str(tasks)]
|
|
1137
|
+
tasks = [str(t).strip().lower() for t in tasks if str(t).strip()]
|
|
1138
|
+
|
|
1139
|
+
ai_profile = """
|
|
1140
|
+
- You are a Python expert specializing in data science and machine learning.
|
|
1141
|
+
- Your task is to generate a single, complete, production-quality, executable Python script for a Jupyter-like Python kernel, based on the given instructions.
|
|
1142
|
+
- The dataset is already loaded as a pandas DataFrame named `df` (no file I/O or file uploads).
|
|
1143
|
+
- Make a copy of `df` and name it `df_copy`. Make sure `df_copy` is preprocessed and cleaned, named `df_cleaned`, if not already done so. Then use `df_cleaned` to perform the ML tasks described in the given context.
|
|
1144
|
+
- Select your features and target, from `df_cleaned`, with care and name it `required_cols`
|
|
1145
|
+
- Create your 'df_filtered by doing: df_filtered = df_cleaned[required_cols].
|
|
1146
|
+
- Use the {TEMPLATE_CATALOGUE} below to educate yourself on which visualizations you will implement in the code.
|
|
1147
|
+
- The final output MUST be the complete, executable Python code only, enclosed in a single markdown code block (```python ... ```), which is required to fulfill the user's request. See the {tasks} below.
|
|
1148
|
+
- Do not include any explanatory text or markdown outside the code block.
|
|
1149
|
+
"""
|
|
1150
|
+
|
|
1151
|
+
TEMPLATE_CATALOGUE = """
|
|
1152
|
+
### Available SyntaxMatrix templates (use these instead of inventing new helpers)
|
|
1153
|
+
|
|
1154
|
+
Visualisation templates (dataset-agnostic):
|
|
1155
|
+
- viz_pie(df, category_col=None, top_k=8): pie/donut shares within a category.
|
|
1156
|
+
- viz_stacked_bar(df, x=None, hue=None, normalise=True): composition across groups.
|
|
1157
|
+
- viz_count_bar(df, category_col=None, top_k=12): counts/denominators by category.
|
|
1158
|
+
- viz_box(df, x=None, y=None): spread/outliers of numeric by category.
|
|
1159
|
+
- viz_scatter(df, x=None, y=None, hue=None): relationship between two numeric vars.
|
|
1160
|
+
- viz_distribution(df, col=None): histogram-style distribution for numeric.
|
|
1161
|
+
- viz_kde(df, col=None): density curve for numeric.
|
|
1162
|
+
- viz_area(df, time_col=None, y_col=None): area/trend over time.
|
|
1163
|
+
- viz_line(df, x=None, y=None, hue=None): line/trend plot.
|
|
1164
|
+
|
|
1165
|
+
ML/stat templates:
|
|
1166
|
+
- classification(df): standard classification pipeline + metrics + plots.
|
|
1167
|
+
- regression(df): standard regression pipeline + metrics + plots.
|
|
1168
|
+
- clustering(df): clustering workflow + cluster plots.
|
|
1169
|
+
- anomaly_detection(df)
|
|
1170
|
+
- ts_anomaly_detection(df)
|
|
1171
|
+
- time_series_forecasting(df)
|
|
1172
|
+
- time_series_classification(df, entity_col, time_col, target_col)
|
|
1173
|
+
- dimensionality_reduction(df)
|
|
1174
|
+
- feature_selection(df)
|
|
1175
|
+
- eda_overview(df)
|
|
1176
|
+
- eda_correlation(df)
|
|
1177
|
+
- multilabel_classification(df, label_cols)
|
|
1178
|
+
- recommendation(df)
|
|
1179
|
+
- topic_modelling(df)
|
|
1180
|
+
"""
|
|
1181
|
+
|
|
1182
|
+
instructions = (
|
|
1183
|
+
"### Context"
|
|
1184
|
+
f"- DataFrame - (`df`): {df}"
|
|
1185
|
+
f"- Schema (names → dtypes): {CONTEXT}"
|
|
1186
|
+
f"- Row count: {len(df)}"
|
|
1187
|
+
f"- Task description: {refined_question}"
|
|
1188
|
+
f"- Tasks: {tasks}"
|
|
1189
|
+
f"- Available columns: {AVAILABLE_COLUMNS}"
|
|
1190
|
+
f"- Template catalogue: {TEMPLATE_CATALOGUE}"
|
|
1191
|
+
|
|
1192
|
+
"""
|
|
1193
|
+
### Template rules
|
|
1194
|
+
- You MAY call a template if it matches the task.
|
|
1195
|
+
- Do NOT invent template names.
|
|
1196
|
+
- If no template fits, write minimal direct pandas/sklearn/seaborn code instead.
|
|
1197
|
+
- Keep the solution short: avoid writing wrappers/utilities already handled by SyntaxMatrix hardener.
|
|
1198
|
+
|
|
1199
|
+
#### Template selection hint examples:
|
|
1200
|
+
- If the task asks for pie/donut/composition shares → use viz_pie.
|
|
1201
|
+
- If it asks for denominators/counts per category → viz_count_bar.
|
|
1202
|
+
- If it asks for spread/outliers/comparison across groups → viz_box.
|
|
1203
|
+
- If it asks for relationship / “X vs Y” → viz_scatter.
|
|
1204
|
+
- If it asks for trend over time → viz_line or viz_area.
|
|
1205
|
+
|
|
1206
|
+
### Hard requirements
|
|
1207
|
+
1) Code only. No markdown, no comments, no explanations.
|
|
1208
|
+
2) Import everything you use explicitly.
|
|
1209
|
+
- Use pandas/numpy/matplotlib by default.
|
|
1210
|
+
- Seaborn may be unavailable at runtime; **do not import seaborn inside your code**.
|
|
1211
|
+
- If you call sns.*, assume sns is already defined by the framework.
|
|
1212
|
+
3) Avoid deprecated / removed APIs**, e.g.:
|
|
1213
|
+
- pandas: do not use `.append`, `.ix`, `.as_matrix`; prefer current patterns.
|
|
1214
|
+
- seaborn: do not use `distplot`; avoid `pairplot` on very large data unless sampling.
|
|
1215
|
+
- scikit-learn: import from `sklearn.model_selection` (not `sklearn.cross_validation`);
|
|
1216
|
+
set `random_state=42` where relevant.
|
|
1217
|
+
4) Be defensive, but avoid hard-failing on optional fields:
|
|
1218
|
+
- If the primary column, needed to answer the question, is missing, review your copy of the `df` again.
|
|
1219
|
+
Make sure that you selected the proper column.
|
|
1220
|
+
Never use a column/variable which isn't available or defined.
|
|
1221
|
+
- If a secondary/extra column is missing, show a warning with `show(...)` and continue using available fields.
|
|
1222
|
+
- Handle missing values sensibly (drop rows for simple EDA; use `ColumnTransformer` + `SimpleImputer` for modelling).
|
|
1223
|
+
- For categorical features in ML, use `OneHotEncoder(handle_unknown="ignore")`
|
|
1224
|
+
inside a `Pipeline`/`ColumnTransformer` (no `LabelEncoder` on features).
|
|
1225
|
+
5) Keep it fast (kernel timeout ~8s):
|
|
1226
|
+
- For plots on large frames (>20k rows), downsample to ~1,000 rows
|
|
1227
|
+
(`df.sample(1000, random_state=42)`) unless aggregation is more appropriate.
|
|
1228
|
+
- Prefer vectorised ops; avoid O(n²) Python loops.
|
|
1229
|
+
6) Keep the solution compact:
|
|
1230
|
+
- Do not define large helper libraries or long “required column” sets.
|
|
1231
|
+
- Aim for ≤120 lines excluding imports.
|
|
1232
|
+
7) Always produce at least one visible result at the end:
|
|
1233
|
+
- If plotting with matplotlib/seaborn: call `plt.tight_layout(); plt.show()`.
|
|
1234
|
+
- If producing a table or metrics:
|
|
1235
|
+
`from syntaxmatrix.display import show` then `show(object_or_dataframe)`.
|
|
1236
|
+
8) Follow task type conventions:
|
|
1237
|
+
- **EDA/Stats**: compute the requested stat, then show a relevant table
|
|
1238
|
+
(e.g., summary/crosstab) or plot.
|
|
1239
|
+
- **Classification**: train/valid split (`train_test_split`), pipeline with scaling/encoding,
|
|
1240
|
+
fit, show accuracy and a confusion matrix via
|
|
1241
|
+
`ConfusionMatrixDisplay.from_estimator(...); plt.show()`.
|
|
1242
|
+
Also show `classification_report` as a dataframe if short.
|
|
1243
|
+
- **Regression**: train/valid split, pipeline as needed, fit, show R² and MAE;
|
|
1244
|
+
plot predicted vs actual scatter.
|
|
1245
|
+
- **Correlation/Chi-square/ANOVA**: compute the statistic + p-value and show a concise
|
|
1246
|
+
result table (with `show(...)`) and, when sensible, a small plot (heatmap/bar).
|
|
1247
|
+
9) Don't mutate or recreate target columns if they already exist.
|
|
1248
|
+
10) Keep variable names short and clear; prefer `num_cols` / `cat_cols` discovery by dtype.
|
|
1249
|
+
11) You MUST NOT reference any column outside Available columns: {AVAILABLE_COLUMNS}.
|
|
1250
|
+
12) If asked to predict/classify, choose the target by matching the task text to Allowed columns
|
|
1251
|
+
and never invent a new name.
|
|
1252
|
+
|
|
1253
|
+
#### Cohort rules
|
|
1254
|
+
When you generate plots for cohorts or categories, you MUST obey these rules:
|
|
1255
|
+
1) ALWAYS guard cohort masks:
|
|
1256
|
+
- After you define something like:
|
|
1257
|
+
_mask_a = (df['BMI'] < 18.5) & df['BMI'].notna()
|
|
1258
|
+
_mask_b = ~(df['BMI'] < 18.5) & df['BMI'].notna()
|
|
1259
|
+
compute their sizes:
|
|
1260
|
+
n_a = int(_mask_a.sum())
|
|
1261
|
+
n_b = int(_mask_b.sum())
|
|
1262
|
+
- If a mask has no rows (or almost none), do NOT draw an empty plot.
|
|
1263
|
+
Instead call:
|
|
1264
|
+
show(f"Skipping cohort '{label}': no rows after filtering.")
|
|
1265
|
+
and return.
|
|
1266
|
+
|
|
1267
|
+
2) Before any groupby / crosstab for a plot:
|
|
1268
|
+
- Fill missing categories so groupby does not drop everything:
|
|
1269
|
+
df[col] = df[col].fillna("Unknown")
|
|
1270
|
+
- After building the table:
|
|
1271
|
+
tab = tmp.groupby([...]).size().unstack(...).fillna(0)
|
|
1272
|
+
ALWAYS check:
|
|
1273
|
+
if tab.empty:
|
|
1274
|
+
show(f"Skipping plot for {col}: no data after grouping.")
|
|
1275
|
+
continue
|
|
1276
|
+
Only call .plot(...) if the table is non-empty.
|
|
1277
|
+
|
|
1278
|
+
3) For value_counts-based plots:
|
|
1279
|
+
- If the Series is empty after filtering (len(s) == 0),
|
|
1280
|
+
do NOT draw a figure. Just call:
|
|
1281
|
+
show(f"No data available to plot for {col} in this cohort.")
|
|
1282
|
+
and skip.
|
|
1283
|
+
|
|
1284
|
+
4) Never try to “hide” an error with a blank plot.
|
|
1285
|
+
A blank chart is treated as a bug. If there is no data, explain it
|
|
1286
|
+
clearly using show(...), and avoid calling matplotlib/Seaborn.
|
|
1287
|
+
|
|
1288
|
+
5) Never use print(...). All user-visible diagnostics go through show(...).
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
### Output
|
|
1292
|
+
Return only runnable Python that:
|
|
1293
|
+
- Imports what it needs,
|
|
1294
|
+
- Validates columns,
|
|
1295
|
+
- Visualize tables, charts, and graphs, each with appropriate caption.
|
|
1296
|
+
- Solution: {tasks} to solve {refined_question},
|
|
1297
|
+
- And ends with at least 3 visible output (`show(...)` and/or `plt.show()`).
|
|
1298
|
+
""")
|
|
1299
|
+
|
|
1300
|
+
if not self._coding_profile:
|
|
1301
|
+
coding_profile = _prof.get_profile("coding") or _prof.get_profile("admin")
|
|
1302
|
+
if not coding_profile:
|
|
1303
|
+
return (
|
|
1304
|
+
'<div class="smx-alert smx-alert-warn">'
|
|
1305
|
+
'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
|
|
1306
|
+
'Please, add the LLM profile inside the admin panel or contact your Administrator.'
|
|
1307
|
+
'</div>'
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
self._coding_profile = coding_profile
|
|
1311
|
+
self._coding_profile['client'] = _prof.get_client(coding_profile)
|
|
1312
|
+
|
|
1313
|
+
# code = mlearning_agent(instructions, ai_profile, self._coding_profile)
|
|
1314
|
+
code, usage = mlearning_agent(instructions, ai_profile, self._coding_profile)
|
|
1315
|
+
self._last_llm_usage = usage
|
|
1316
|
+
|
|
1317
|
+
if code:
|
|
1318
|
+
import re
|
|
1319
|
+
code = normalise_llm_code(code)
|
|
1320
|
+
|
|
1321
|
+
m = re.search(r"```(?:python)?\s*(.*?)\s*```", code, re.DOTALL | re.IGNORECASE)
|
|
1322
|
+
if m:
|
|
1323
|
+
code = m.group(1).strip()
|
|
1324
|
+
|
|
1325
|
+
if "import io" not in code and "io.BytesIO" in code:
|
|
1326
|
+
lines = code.split('\n')
|
|
1327
|
+
import_lines = []
|
|
1328
|
+
other_lines = []
|
|
1329
|
+
|
|
1330
|
+
for line in lines:
|
|
1331
|
+
if line.strip().startswith('import ') or line.strip().startswith('from '):
|
|
1332
|
+
import_lines.append(line)
|
|
1333
|
+
else:
|
|
1334
|
+
other_lines.append(line)
|
|
1335
|
+
|
|
1336
|
+
if "import io" not in '\n'.join(import_lines):
|
|
1337
|
+
import_lines.append('import io')
|
|
1338
|
+
|
|
1339
|
+
code = '\n'.join(import_lines + [''] + other_lines)
|
|
1340
|
+
|
|
1341
|
+
TEMPLATE_NAMES = [
|
|
1342
|
+
"viz_pie","viz_stacked_bar","viz_count_bar","viz_box","viz_scatter",
|
|
1343
|
+
"viz_distribution","viz_kde","viz_area","viz_line",
|
|
1344
|
+
"classification","regression","clustering","anomaly_detection",
|
|
1345
|
+
"ts_anomaly_detection","time_series_forecasting","time_series_classification",
|
|
1346
|
+
"dimensionality_reduction","feature_selection","eda_overview","eda_correlation",
|
|
1347
|
+
"multilabel_classification","recommendation","topic_modelling"
|
|
1348
|
+
]
|
|
1349
|
+
|
|
1350
|
+
used = [t for t in TEMPLATE_NAMES if re.search(rf"\\b{t}\\s*\\(", code)]
|
|
1351
|
+
if used:
|
|
1352
|
+
import_line = (
|
|
1353
|
+
"from syntaxmatrix.agentic.model_templates import " +
|
|
1354
|
+
", ".join(sorted(set(used)))
|
|
1355
|
+
)
|
|
1356
|
+
if import_line not in code:
|
|
1357
|
+
code = import_line + "\n" + code
|
|
1358
|
+
|
|
1359
|
+
return code.strip()
|
|
1360
|
+
|
|
1361
|
+
return "Error: AI code generation failed."
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
def sanitize_rough_to_markdown_task(self, rough: str) -> str:
|
|
1365
|
+
"""
|
|
1366
|
+
Return only the Task text (no tags).
|
|
1367
|
+
Behaviour:
|
|
1368
|
+
- If <Task>...</Task> exists: return its inner text.
|
|
1369
|
+
- If not: return the input with <rough> wrapper and any <Error> blocks removed.
|
|
1370
|
+
- Never raises; always returns a string.
|
|
1371
|
+
"""
|
|
1372
|
+
s = ("" if rough is None else str(rough)).strip()
|
|
1373
|
+
|
|
1374
|
+
def _find_ci(hay, needle, start=0):
|
|
1375
|
+
return hay.lower().find(needle.lower(), start)
|
|
1376
|
+
|
|
1377
|
+
# Prefer explicit <Task>...</Task>
|
|
1378
|
+
i = _find_ci(s, "<task")
|
|
1379
|
+
if i != -1:
|
|
1380
|
+
j = s.find(">", i)
|
|
1381
|
+
k = _find_ci(s, "</task>", j + 1)
|
|
1382
|
+
if j != -1 and k != -1:
|
|
1383
|
+
return s[j + 1:k].strip()
|
|
1384
|
+
# Otherwise strip any <Error>...</Error> blocks (if present)
|
|
1385
|
+
out = s
|
|
1386
|
+
while True:
|
|
1387
|
+
e1 = _find_ci(out, "<error")
|
|
1388
|
+
if e1 == -1:
|
|
1389
|
+
break
|
|
1390
|
+
e1_end = out.find(">", e1)
|
|
1391
|
+
e2 = _find_ci(out, "</error>", (e1_end + 1) if e1_end != -1 else e1 + 1)
|
|
1392
|
+
if e1_end == -1 or e2 == -1:
|
|
1393
|
+
break
|
|
1394
|
+
out = out[:e1] + out[e2 + len("</error>"):]
|
|
1395
|
+
|
|
1396
|
+
# Drop optional <rough> wrapper
|
|
1397
|
+
return out.replace("<rough>", "").replace("</rough>", "").strip()
|
|
775
1398
|
|
|
776
|
-
code = strip_describe_slice(code)
|
|
777
|
-
code = drop_bad_classification_metrics(code, df)
|
|
778
|
-
return code.strip()
|
|
779
|
-
|
|
780
1399
|
|
|
781
1400
|
def run(self):
|
|
782
1401
|
url = f"http://{self.host}:{self.port}/"
|
|
783
1402
|
webbrowser.open(url)
|
|
784
1403
|
self.app.run(host=self.host, port=self.port, debug=False)
|
|
785
|
-
|