PyPI - syntaxmatrix - Versions diffs - 1.4.6__py3-none-any.whl → 2.5.5.4__py3-none-any.whl - Mend

syntaxmatrix 1.4.6py3-none-any.whl → 2.5.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

syntaxmatrix/__init__.py +13 -8
syntaxmatrix/agentic/__init__.py +0 -0
syntaxmatrix/agentic/agent_tools.py +24 -0
syntaxmatrix/agentic/agents.py +810 -0
syntaxmatrix/agentic/code_tools_registry.py +37 -0
syntaxmatrix/agentic/model_templates.py +1790 -0
syntaxmatrix/auth.py +308 -14
syntaxmatrix/commentary.py +328 -0
syntaxmatrix/core.py +993 -375
syntaxmatrix/dataset_preprocessing.py +218 -0
syntaxmatrix/db.py +92 -95
syntaxmatrix/display.py +95 -121
syntaxmatrix/generate_page.py +634 -0
syntaxmatrix/gpt_models_latest.py +46 -0
syntaxmatrix/history_store.py +26 -29
syntaxmatrix/kernel_manager.py +96 -17
syntaxmatrix/llm_store.py +1 -1
syntaxmatrix/plottings.py +6 -0
syntaxmatrix/profiles.py +64 -8
syntaxmatrix/project_root.py +55 -43
syntaxmatrix/routes.py +5072 -1398
syntaxmatrix/session.py +19 -0
syntaxmatrix/settings/logging.py +40 -0
syntaxmatrix/settings/model_map.py +300 -33
syntaxmatrix/settings/prompts.py +273 -62
syntaxmatrix/settings/string_navbar.py +3 -3
syntaxmatrix/static/docs.md +272 -0
syntaxmatrix/static/icons/favicon.png +0 -0
syntaxmatrix/static/icons/hero_bg.jpg +0 -0
syntaxmatrix/templates/dashboard.html +608 -147
syntaxmatrix/templates/docs.html +71 -0
syntaxmatrix/templates/error.html +2 -3
syntaxmatrix/templates/login.html +1 -0
syntaxmatrix/templates/register.html +1 -0
syntaxmatrix/ui_modes.py +14 -0
syntaxmatrix/utils.py +2482 -159
syntaxmatrix/vectorizer.py +16 -12
{syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/METADATA +20 -17
syntaxmatrix-2.5.5.4.dist-info/RECORD +68 -0
syntaxmatrix/model_templates.py +0 -30
syntaxmatrix/static/icons/favicon.ico +0 -0
syntaxmatrix-1.4.6.dist-info/RECORD +0 -54
{syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/WHEEL +0 -0
{syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/licenses/LICENSE.txt +0 -0
{syntaxmatrix-1.4.6.dist-info → syntaxmatrix-2.5.5.4.dist-info}/top_level.txt +0 -0

syntaxmatrix/core.py CHANGED Viewed

@@ -1,102 +1,134 @@
 from __future__ import annotations
-import os, webbrowser, uuid, secrets
+import ast
+import textwrap
+import os, webbrowser, uuid, secrets, re
-from flask import Flask, session, request, has_request_context
-from .history_store import SQLHistoryStore as Store, PersistentHistoryStore as _Store
+from flask import Flask, Response, session, request, has_request_context
+from syntaxmatrix.agentic.agents import mlearning_agent
+from syntaxmatrix.history_store import SQLHistoryStore as Store, PersistentHistoryStore as _Store
 from collections import OrderedDict
 from syntaxmatrix.llm_store import save_embed_model, load_embed_model, delete_embed_key
 from . import db, routes
 from .themes import DEFAULT_THEMES
-from .plottings import render_plotly, pyplot
+from .ui_modes import UI_MODES
+from .plottings import render_plotly, pyplot, describe_plotly
 from .file_processor import process_admin_pdf_files
-from google import genai
-from openai import OpenAI
+from google.genai import types
 from .vector_db import query_embeddings
 from .vectorizer import embed_text
-from syntaxmatrix.settings.prompts import SMX_PROMPT_PROFILE, SMX_PROMPT_INSTRUCTIONS
-from typing import List
+from syntaxmatrix.settings.prompts import SMXAI_CHAT_ID, SMXAI_CHAT_INSTRUCTIONS, SMXAI_WEBSITE_DESCRIPTION
+from typing import List, Generator
 from .auth import init_auth_db
-from . import profiles as prof
-from syntaxmatrix.utils import strip_describe_slice, drop_bad_classification_metrics
+from . import profiles as _prof
 from syntaxmatrix.smiv import SMIV
 from .project_root import detect_project_root
+from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
 from dotenv import load_dotenv
+from html import unescape
+from .plottings import render_plotly, pyplot, describe_plotly, describe_matplotlib
+from threading import RLock
+from syntaxmatrix.settings.model_map import GPT_MODELS_LATEST
 # ──────── framework‐local storage paths ────────
 # this ensures the key & data always live under the package dir,
 # regardless of where the developer `cd` into before launching.
 _CLIENT_DIR = detect_project_root()
-_HISTORY_DIR   = os.path.join(_CLIENT_DIR, "data", "smx_history")
+_HISTORY_DIR   = os.path.join(_CLIENT_DIR, "smx_history")
 os.makedirs(_HISTORY_DIR, exist_ok=True)
-_SECRET_PATH   = os.path.join(_CLIENT_DIR, "data", ".smx_secret_key")
-dotenv_path  = os.path.join(str(_CLIENT_DIR.parent), ".env")
+_SECRET_PATH = os.path.join(_CLIENT_DIR, ".smx_secret_key")
-if os.path.isfile(dotenv_path):
-    load_dotenv(dotenv_path, override=True)
+_CLIENT_DOTENV_PATH = os.path.join(str(_CLIENT_DIR.parent), ".env")
+if os.path.isfile(_CLIENT_DOTENV_PATH):
+    load_dotenv(_CLIENT_DOTENV_PATH, override=True)
+_ICONS_PATH = os.path.join(_CLIENT_DIR, "static", "icons")
+os.makedirs(_ICONS_PATH, exist_ok=True)
 EDA_OUTPUT = {}  # global buffer for EDA output by session
 class SyntaxMUI:
-    def __init__(
-            self,
-            host="127.0.0.1",
-            port="5050",
+    def __init__(self,
+           host="127.0.0.1",
+            port="5080",
             user_icon="👩🏿‍🦲",
-            bot_icon='<img src="../static/icons/favicon.ico" alt="bot icon" width="20"/>',
-            favicon='<img src="../static/icons/favicon.ico" width="15"/>',
-            site_logo='<img src="../static/icons/logo.png" width="30" alt="SMX Logo"/>',
+            bot_icon="<img src='/static/icons/favicon.png' width=20' alt='bot'/>",
+            favicon="/static/icons/favicon.png",
+            site_logo="<img src='/static/icons/logo.png' width='30' alt='logo'/>",
             site_title="SyntaxMatrix",
-            project_title="smxAI Engine",
-            theme_name="light"
+            project_name="smxAI",
+            theme_name="light",
+            ui_mode = "default"
         ):
-        self.app = Flask(__name__)
-        self.get_app_secrete()
+        self.app = Flask(__name__)
         self.host = host
         self.port = port
+        self.get_app_secrete()
         self.user_icon = user_icon
         self.bot_icon = bot_icon
+        self.site_logo = site_logo
         self.favicon = favicon
         self.site_title = site_title
-        self.site_logo = site_logo
-        self.project_title = project_title
-        self.ui_mode = "default"
+        self.project_name = project_name
+        self.ui_mode = ui_mode
         self.theme_toggle_enabled = False
-        self.prompt_profile = SMX_PROMPT_PROFILE
-        self.prompt_instructions = SMX_PROMPT_INSTRUCTIONS
+        self.user_files_enabled = False
+        self.smxai_identity = SMXAI_CHAT_ID
+        self.smxai_instructions = SMXAI_CHAT_INSTRUCTIONS
+        self.website_description = SMXAI_WEBSITE_DESCRIPTION
+        self._eda_output = {}      # {chat_id: html}
+        self._eda_lock = RLock()
         db.init_db()
         self.page = ""
         self.pages = db.get_pages()
         init_auth_db()
         self.widgets = OrderedDict()
         self.theme = DEFAULT_THEMES.get(theme_name, DEFAULT_THEMES["light"])
         self.system_output_buffer = ""  # Ephemeral buffer initialized
         self.app_token = str(uuid.uuid4())  # NEW: Unique token for each app launch.
         self.admin_pdf_chunks = {}   # In-memory store for admin PDF chunks
         self.user_file_chunks = {}  # In-memory store of user‑uploaded chunks, scoped per chat session
+        self._last_llm_usage = None
         routes.setup_routes(self)
-        self.chat_profile = None
-        self.labeller_profile = None
-        self.classifier_profile = None
-        self.coder_profile = None
-        self.summarizer_profile = None
+        self._admin_profile = {}
+        self._chat_profile = {}
+        self._coding_profile = {}
+        self._classification_profile = {}
+        self._summarization_profile = {}
+        self._gpt_models_latest_prev_resp_ids = {}
+        self.is_streaming = False
+        self.stream_args = {}
+        self._recent_visual_summaries = []
+        self.placeholder = ""
+    @staticmethod
     def init_app(app):
-        import os, secrets
+        import secrets
         if not app.secret_key:
-            app.secret_key = secrets.token_urlsafe(32)
+            app.secret_key = secrets.token_urlsafe(32)
-    def get_app_secrete(self):
+    def get_app_secrete(self):
         if os.path.exists(_SECRET_PATH):
             self.app.secret_key = open(_SECRET_PATH, "r", encoding="utf-8").read().strip()
         else:
             new_key = secrets.token_urlsafe(32)
-            open(_SECRET_PATH, "w", encoding="utf-8").write(new_key)
+            with open(_SECRET_PATH, "w", encoding="utf-8") as f:
+                f.write(new_key)
+            try:
+                os.chmod(_SECRET_PATH, 0o600)
+            except Exception:
+                pass
             self.app.secret_key = new_key
     def _get_visual_context(self):
         """Return the concatenated summaries for prompt injection."""
@@ -105,15 +137,75 @@ class SyntaxMUI:
         joined = "\n• " + "\n• ".join(self._recent_visual_summaries)
         return f"\n\nRecent visualizations:{joined}"
+    # add to class
+    def _add_visual_summary(self, summary: str) -> None:
+        if not summary:
+            return
+        if not hasattr(self, "_recent_visual_summaries"):
+            self._recent_visual_summaries = []
+        # keep last 6
+        self._recent_visual_summaries = (self._recent_visual_summaries + [summary])[-6:]
     def set_plottings(self, fig_or_html, note=None):
-        sid = session.get("current_session", {}).get("id", "default")
+        # prefer current chat id; fall back to per-browser sid; finally "default"
+        sid = self.get_session_id() or self._sid() or "default"
+        # Clear for this session if empty/falsy
         if not fig_or_html or (isinstance(fig_or_html, str) and fig_or_html.strip() == ""):
-            EDA_OUTPUT[sid] = ""
+            with self._eda_lock:
+                self._eda_output.pop(sid, None)
             return
         html = None
+        # ---- Plotly Figure support ----
+        try:
+            import plotly.graph_objs as go
+            if isinstance(fig_or_html, go.Figure):
+                html = fig_or_html.to_html(full_html=False)
+        except ImportError:
+            pass
+        # ---- Matplotlib Figure support ----
+        if html is None and hasattr(fig_or_html, "savefig"):
+            html = pyplot(fig_or_html)
+        # ---- Bytes (PNG etc.) support ----
+        if html is None and isinstance(fig_or_html, bytes):
+            import base64
+            img_b64 = base64.b64encode(fig_or_html).decode()
+            html = f"<img src='data:image/png;base64,{img_b64}'/>"
+        # ---- HTML string support ----
+        if html is None and isinstance(fig_or_html, str):
+            html = fig_or_html
+        if html is None:
+            raise TypeError("Unsupported object type for plotting.")
+        if note:
+            html += f"<div style='margin-top:10px; text-align:center; color:#888;'><strong>{note}</strong></div>"
+        wrapper = f'''
+        <div style="
+            position:relative; max-width:650px; margin:30px auto 20px auto;
+            padding:20px 28px 10px 28px; background:#fffefc;
+            border:2px solid #2da1da38; border-radius:16px;
+            box-shadow:0 3px 18px rgba(90,130,230,0.06); min-height:40px;">
+            <button id="eda-close-btn" onclick="closeEdaPanel()" style="
+                position: absolute; top: 20px; right: 12px;
+                font-size: 1.25em; background: transparent;
+                border: none; color: #888; cursor: pointer;
+                z-index: 2; transition: color 0.2s;">&times;</button>
+            {html}
+        </div>
+        '''
+        with self._eda_lock:
+            self._eda_output[sid] = wrapper
+        html = None
         # ---- Plotly Figure support ----
         try:
             import plotly.graph_objs as go
@@ -160,8 +252,9 @@ class SyntaxMUI:
     def get_plottings(self):
-        sid = session.get("current_session", {}).get("id", "default")
-        return EDA_OUTPUT.get(sid, "")
+        sid = self.get_session_id() or self._sid() or "default"
+        with self._eda_lock:
+            return self._eda_output.get(sid, "")
     def load_sys_chunks(self, directory: str = "uploads/sys"):
@@ -188,109 +281,90 @@ class SyntaxMUI:
     def set_ui_mode(self, mode):
-        if mode not in ["default", "card", "bubble", "smx"]:
+        if mode not in self.get_ui_modes():  # ["default", "card", "bubble", "smx"]:
             raise ValueError("UI mode must be one of: 'default', 'card', 'bubble', 'smx'.")
         self.ui_mode = mode
     @staticmethod
-    def list_ui_modes():
-        return "default", "card", "bubble", "smx"
+    def get_ui_modes():
+        return list(UI_MODES.keys())
+        # return "default", "card", "bubble", "smx"
     @staticmethod
-    def list_themes():
+    def get_themes():
         return list(DEFAULT_THEMES.keys())
-    def set_theme(self, theme_name, theme):
+    def set_theme(self, theme_name, theme=None):
         if theme_name in DEFAULT_THEMES:
             self.theme = DEFAULT_THEMES[theme_name]
         elif isinstance(theme, dict):
-            self.theme["custom"] = theme
             DEFAULT_THEMES[theme_name] = theme
+            self.theme = DEFAULT_THEMES[theme_name]
         else:
             self.theme = DEFAULT_THEMES["light"]
-            raise ValueError("Theme must be 'light', 'dark', or a custom dict.")
+            self.error("Theme must be 'light', 'dark', or a custom dict.")
     def enable_theme_toggle(self):
-        self.theme_toggle_enabled = True
+        self.theme_toggle_enabled = True
-    def disable_theme_toggle(self):
-        self.theme_toggle_enabled = False
+    def enable_user_files(self):
+        self.user_files_enabled = True
-    def columns(self, components):
+    @staticmethod
+    def columns(components):
         col_html = "<div style='display:flex; gap:10px;'>"
         for comp in components:
             col_html += f"<div style='flex:1;'>{comp}</div>"
         col_html += "</div>"
         return col_html
-    def set_favicon(self, icon):
-        self.favicon = icon
     def set_site_title(self, title):
         self.site_title = title
+    def set_project_name(self, project_name):
+        self.project_name = project_name
+    def set_favicon(self, icon):
+        self.favicon = icon
     def set_site_logo(self, logo):
         self.site_logo = logo
-    def set_project_title(self, project_title):
-        self.project_title = project_title
     def set_user_icon(self, icon):
         self.user_icon = icon
     def set_bot_icon(self, icon):
         self.bot_icon = icon
-    def text_input(self, key, label, placeholder="Ask me anything"):
+    def text_input(self, key, id, label, placeholder=""):
+        if not placeholder:
+            placeholder = f"Ask {self.project_name} anything"
         if key not in self.widgets:
-            self.widgets[key] = {"type": "text_input", "key": key, "label": label, "placeholder": placeholder}
-    def get_text_input_value(self, key, default=""):
-        q = session.get(key, default)
-        classifier_profile = prof.get_profile("classifier") or prof.get_profile("chat")
-        if not classifier_profile:
-            self.error("ERROR: There is no LLM profile set yet.")
-            return q, None
-        intent = self._classify_query(q)
-        return q, intent
+            self.widgets[key] = {
+                "type": "text_input", "key": key, "id": id,
+                "label": label, "placeholder": placeholder
+            }
     def clear_text_input_value(self, key):
         session[key] = ""
         session.modified = True
-    def button(self, key, label, callback=None, stream=False):
+    def button(self, key, id, label, callback, stream=False):
+        if stream == True:
+            self.is_streaming = True
         self.widgets[key] = {
-            "type": "button", "key": key,
-            "label": label,  "callback": callback,
-            "stream": stream
+            "type": "button", "key": key, "id": id, "label": label, "callback": callback, "stream":stream
         }
-    def file_uploader(self, key, label, accept_multiple_files=False, callback=None):
+    def file_uploader(self, key, id, label, accept_multiple_files):
         if key not in self.widgets:
             self.widgets[key] = {
                 "type": "file_upload",
-                "key": key, "label": label,
+                "key": key, "id":id, "label": label,
                 "accept_multiple": accept_multiple_files,
-               "callback": callback
         }
@@ -326,57 +400,44 @@ class SyntaxMUI:
         session.modified = True
         return sid
     def get_chat_history(self) -> list[tuple[str, str]]:
-        # now load the history for the _current_ chat session
+        # Load the history for the _current_ chat session
         sid = self._sid()
         cid = self.get_session_id()
+        if session.get("user_id"):
+            # Logged-in: use SQLHistoryStore (Store). Locking handled inside history_store.py
+            return Store.load(str(session["user_id"]), cid)
+        # Anonymous: use PersistentHistoryStore (_Store) JSON files
         return _Store.load(sid, cid)
     def set_chat_history(self, history: list[tuple[str, str]], *, max_items: int | None = None) -> list[tuple[str, str]]:
         sid = self._sid()
         cid = self.get_session_id()
-        _Store.save(sid, cid, history)
-        session["chat_history"] = history[-30:]  # still mirror a thin copy into Flask’s session cookie for the UI
-        session.modified = True
         if session.get("user_id"):
-            user_id = session["user_id"]
-            cid = session["current_session"]["id"]
-            title = session["current_session"]["title"]
-            # persist both title + history
-            Store.save(user_id, cid, session["chat_history"], title)
-        return history if max_items is None else history[-max_items:]
+            # Logged-in: chats.db via Store (SQLHistoryStore)
+            Store.save(str(session["user_id"]), cid, history)
+        else:
+            # Anonymous: file-backed via _Store (PersistentHistoryStore)
+            _Store.save(sid, cid, history)
     def clear_chat_history(self):
-        """
-        Clear both the UI slice *and* the server-side history bucket
-        for this session_id + chat_id.
-        """
         if has_request_context():
-            # 1) Clear the in-memory store
-            from .history_store import PersistentHistoryStore as _Store
-            sid = self._sid()                 # your per-browser session ID
-            cid = self.get_session_id()       # current chat UUID
-            _Store.save(sid, cid, [])         # wipe server history
-            # 2) Clear the cookie slice shown in the UI
-            session["chat_history"] = []
-            # 3) Also clear out the “current_session” and past_sessions histories
-            if "current_session" in session:
-                session["current_session"]["history"] = []
-            if "past_sessions" in session:
-                session["past_sessions"] = [
-                    {**s, "history": []} if s.get("id") == cid else s
-                    for s in session["past_sessions"]
-                ]
+            sid = self._sid()
+            cid = self.get_session_id()
+            # delete the chat from the correct backend (DB for logged-in, file for anonymous)
+            if session.get("user_id"):
+                Store.delete(session["user_id"], cid)
+            else:
+                _Store.delete(sid, cid)
+            # rotate to a fresh empty chat (session remains metadata-only)
+            new_cid = str(uuid.uuid4())
+            session["current_session"] = {"id": new_cid, "title": "Current"}
+            session["active_chat_id"] = new_cid
             session.modified = True
-        else:
-            self._fallback_chat_history = []
     def bot_message(self, content, max_length=20):
         history = self.get_chat_history()
@@ -403,19 +464,14 @@ class SyntaxMUI:
     def write(self, content):
         self.bot_message(content)
+    def stream_write(self, chunk: str, end=False):
+        """Push a token to the SSE queue and, when end=True,
+        persist the whole thing to chat_history."""
+        from .routes import _stream_q
+        _stream_q.put(chunk)              # live update
+        if end:                           # final flush → history
+            self.bot_message(chunk)       # persists the final message
-    def markdown(self, md_text):
-        try:
-            import markdown
-            html = markdown.markdown(md_text)
-        except ImportError:
-            html = md_text
-        self.write(html)
-    def latex(self, math_text):
-        self.write(f"\\({math_text}\\)")
     def error(self, content):
         self.bot_message(f'<div style="color:red; font-weight:bold;">{content}</div>')
@@ -432,12 +488,15 @@ class SyntaxMUI:
     def info(self, content):
         self.bot_message(f'<div style="color:blue;">{content}</div>')
     def get_session_id(self):
-        """Return current chat’s UUID (so we can key uploaded chunks)."""
+        """Return the chat id that is currently *active* in the UI."""
+        # Prefer a sticky id set by /load_session or when a new chat is started.
+        sticky = session.get("active_chat_id")
+        if sticky:
+            return sticky
         return session.get("current_session", {}).get("id")
     def add_user_chunks(self, session_id, chunks):
         """Append these text‐chunks under that session’s key."""
         self.user_file_chunks.setdefault(session_id, []).extend(chunks)
@@ -451,32 +510,25 @@ class SyntaxMUI:
     def clear_user_chunks(self, session_id):
         """Remove all stored chunks for a session (on chat‑clear or delete)."""
         self.user_file_chunks.pop(session_id, None)
-    def stream_write(self, chunk: str, end=False):
-        """Push a token to the SSE queue and, when end=True,
-        persist the whole thing to chat_history."""
-        from .routes import _stream_q
-        _stream_q.put(chunk)              # live update
-        if end:                           # final flush → history
-            self.bot_message(chunk)       # persists the final message
     # ──────────────────────────────────────────────────────────────
     #  *********** LLM CLIENT HELPERS  **********************
     # ──────────────────────────────────────────────────────────────
     def set_prompt_profile(self, profile):
-        self.prompt_profile = profile
+        self.ai_chat_id = profile
     def set_prompt_instructions(self, instructions):
-        self.prompt_instructions = instructions
+        self.ai_chat_instructions = instructions
+    def set_website_description(self, desc):
+        self.website_description = desc
     def embed_query(self, q):
         return embed_text(q)
     def smiv_index(self, sid):
             chunks = self.get_user_chunks(sid) or []
             count = len(chunks)
@@ -502,7 +554,6 @@ class SyntaxMUI:
                 self._user_index_counts[sid] = count
             return self._user_indices[sid]
     def load_embed_model(self):
         client = load_embed_model()
         os.environ["PROVIDER"] = client["provider"]
@@ -510,276 +561,843 @@ class SyntaxMUI:
         os.environ["OPENAI_API_KEY"] = client["api_key"]
         return client
     def save_embed_model(self, provider:str, model:str, api_key:str):
         return save_embed_model(provider, model, api_key)
     def delete_embed_key(self):
         return delete_embed_key()
-    def get_client(self, profile):
-        provider = profile["provider"].lower()
-        api_key = profile["api_key"]
-        if provider == "openai":
-            return OpenAI(api_key=api_key)
-        elif provider == "google":
-            # return OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
-            return  genai.Client(api_key=api_key)
-        elif provider == "xai":
-            return OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
-        elif provider == "deepseek":
-            return OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
-        elif provider == "moonshotai":
-            return OpenAI(api_key=api_key, base_url="https://api.moonshot.ai/v1")
+    def get_gpt_models_latest(self):
+        return GPT_MODELS_LATEST
+    def get_text_input_value(self, key, default=""):
+        q = session.get(key, default)
+        intent = self.classify_query_intent(q)
+        intent = intent.strip().lower() if intent else ""
+        if intent not in {"none","user_docs","system_docs","hybrid"}:
+            self.error("Classify agency error")
+            return q, None
+        return q, intent
+    def enable_stream(self):
+        self.is_streaming = True
+    def stream(self):
+        return self.is_streaming
+    def get_stream_args(self):
+        return self.stream_args
+    def classify_query_intent(self, query: str) -> str:
+        from syntaxmatrix.gpt_models_latest import extract_output_text as _out, set_args
+        if not self._classification_profile:
+            classification_profile = _prof.get_profile('classification') or _prof.get_profile('chat') or _prof.get_profile('admin')
+            if not classification_profile:
+                return {"Error": "Set a profile for Classification"}
+            self._classification_profile = classification_profile
+            self._classification_profile['client'] = _prof.get_client(classification_profile)
+        _client = self._classification_profile['client']
+        _provider = self._classification_profile['provider']
+        _model = self._classification_profile['model']
+        # New instruction format with hybrid option
+        _intent_profile = "You are an intent classifier. Respond ONLY with the intent name."
+        _instructions = f"""
+            Classify the given query into ONE of these intents You must return ONLY the intent name with no comment or any preamble:
+            - "none": Casual chat/greetings
+            - "user_docs": Requires user-uploaded documents
+            - "system_docs": Requires company knowledge/docs
+            - "hybrid": Requires BOTH user docs AND company docs
+            Examples:
+            Query: "Hi there!" → none
+            Query: "Explain my uploaded contract" → user_docs
+            Query: "What's our refund policy?" → system_docs
+            Query: "How does my proposal align with company guidelines?" → hybrid
+            Query: "What is the weather today?" → none
+            Query: "Cross-reference the customer feedback from my uploaded survey results with our product's feature list in the official documentation." → hybrid
+            Now classify:
+            Query: "{query}"
+            Intent:
+        """
+        openai_sdk_messages = [
+            {"role": "system", "content": _intent_profile},
+            {"role": "user", "content": _instructions}
+        ]
+        def google_classify_query():
+            response = _client.models.generate_content(
+                model=_model,
+                contents=f"{_intent_profile}\n{_instructions}\n\n"
+            )
+            return response.text.strip().lower()
+        def gpt_models_latest_classify_query(reasoning_effort = "medium", verbosity = "low"):
+            args = set_args(
+                model=_model,
+                instructions=_intent_profile,
+                input=_instructions,
+                reasoning_effort=reasoning_effort,
+                verbosity=verbosity,
+            )
+            try:
+                resp = _client.responses.create(**args)
+                answer = _out(resp).strip().lower()
+                return answer if answer else ""
+            except Exception as e:
+                return f"Error!"
+        def anthropic_classify_query():
+            try:
+                response = _client.messages.create(
+                    model=_model,
+                    max_tokens=1024,
+                    system=_intent_profile,
+                    messages=[{"role": "user", "content":_instructions}],
+                    stream=False,
+                )
+                return response.content[0].text.strip()
+            except Exception as e:
+                return f"Error: {str(e)}"
+        def openai_sdk_classify_query():
+            try:
+                response = _client.chat.completions.create(
+                    model=_model,
+                    messages=openai_sdk_messages,
+                    temperature=0,
+                    max_tokens=100
+                )
+                intent = response.choices[0].message.content.strip().lower()
+                return intent if intent else ""
+            except Exception as e:
+                return f"Error!"
+        if _provider == "google":
+            intent = google_classify_query()
+            return intent
+        if _model in self.get_gpt_models_latest():
+            intent = gpt_models_latest_classify_query()
+            return intent
+        if _provider == "anthropic":
+            intent = anthropic_classify_query()
+            return intent
+        else:
+            intent = openai_sdk_classify_query()
+            return intent
-    # @staticmethod
     def generate_contextual_title(self, chat_history):
-        if not self.labeller_profile:
-            labeller_profile = prof.get_profile('labeller') or prof.get_profile('chat') or {}
-            if not labeller_profile:
-                return
+        if not self._summarization_profile:
+            summarization_profile = _prof.get_profile('summarization') or _prof.get_profile('chat') or _prof.get_profile('admin')
+            if not summarization_profile:
+                return {"Error": "Chat profile not set yet."}
-            self.labeller_profile = labeller_profile
-            self.labeller_profile['client'] = self.get_client(labeller_profile)
+            self._summarization_profile = summarization_profile
+            self._summarization_profile['client'] = _prof.get_client(summarization_profile)
         conversation = "\n".join([f"{role}: {msg}" for role, msg in chat_history])
-        instructions = f"""
-                PROMPT_PROFILE: You are a title generator.
-                INSTRUCTIONS: Generate a contextual title (5 short words max) from the given Conversation History: \n{conversation}.\n\n
-                The title should be concise, relevant, and capture the essence of the conversation, and with no preamble.
-                return only the title.
-            """
-        client = self.labeller_profile['client']
-        model = self.labeller_profile['model']
+        _title_profile = "You are a title generator that creates concise and relevant titles for the given conversations."
+        _instructions = f"""
+            Generate a contextual title (5 short words max) from the given Conversation History
+            The title should be concise - with no preamble, relevant, and capture the essence of this Conversation: \n{conversation}.\n\n
+            return only the title.
+        """
+        _client = self._summarization_profile['client']
+        _provider = self._summarization_profile['provider']
+        _model = self._summarization_profile['model']
         def google_generated_title():
-            response = client.models.generate_content(
-                model=model,
-                contents=instructions
-            )
-            return response.text
+            try:
+                response = _client.models.generate_content(
+                    model=_model,
+                    contents=f"{_title_profile}\n{_instructions}"
+                )
+                return response.text.strip()
+            except Exception as e:
+                return f"Summary agent error!"
+        def gpt_models_latest_generated_title():
+            try:
+                args = set_args(
+                    model=_model,
+                    instructions=_title_profile,
+                    input=_instructions,
+                    # reasoning_effort=reasoning_effort,
+                    # verbosity=verbosity,
+                )
+                resp = _client.responses.create(**args)
+                return _out(resp).strip()
+            except Exception as e:
+                return f"Summary agent error!"
+        def anthropic_generated_title():
+            try:
+                response = _client.messages.create(
+                    model=_model,
+                    max_tokens=50,
+                    system=_title_profile,
+                    messages=[{"role": "user", "content":_instructions}],
+                    stream=False,
+                )
+                return response.content[0].text.strip()
+            except Exception as e:
+                return f"Summary agent error!"
         def openai_sdk_generated_title():
             prompt = [
-                {
-                    "role": "system",
-                    "content": instructions
-                },
+                { "role": "system", "content": _title_profile },
+                { "role": "user", "content": _instructions },
             ]
-            response = client.chat.completions.create(
-                model=model,
-                messages=prompt,
-                temperature=0,
-                max_tokens=50
-            )
-            title = response.choices[0].message.content.strip().lower()
-            return title
+            try:
+                response = _client.chat.completions.create(
+                    model=_model,
+                    messages=prompt,
+                    temperature=0.3,
+                    max_tokens=50
+                )
+                title = response.choices[0].message.content.strip().lower()
+                return title if title else ""
+            except Exception as e:
+               return f"Summary agent error!"
-        if self.labeller_profile['provider'] == "google":
+        if _provider == "google":
             title = google_generated_title()
+        elif _model in self.get_gpt_models_latest():
+            title = gpt_models_latest_generated_title()
+        elif _provider == "anthropic":
+            title = anthropic_generated_title()
         else:
             title = openai_sdk_generated_title()
         return title
+    def stream_process_query(self, query, context, conversations, sources):
+        self.stream_args['query'] = query
+        self.stream_args['context'] = context
+        self.stream_args['conversations'] = conversations
+        self.stream_args['sources'] = sources
-    def _classify_query(self, query: str) -> str:
+    def process_query_stream(self, query: str, context: str, history: list, stream=True) -> Generator[str, None, None]:
+        if not self._chat_profile:
+            chat_profile = _prof.get_profile("chat") or _prof.get_profile("admin")
+            if not chat_profile:
+                yield """<p style='color:red;'>Error: Chat profile is not configured. Add a chat profile inside the admin panel or contact your administrator.</p>
+                """
+                return None
+            self._chat_profile = chat_profile
+            self._chat_profile['client'] = _prof.get_client(chat_profile)
+        _provider = self._chat_profile['provider']
+        _client = self._chat_profile['client']
+        _model = self._chat_profile['model']
+        _contents = f"""
+            {self.smxai_instructions}\n\n
+            Question: {query}\n
+            Context: {context}\n\n
+            History: {history}\n\n
+            Use conversation continuity if available.
+        """
-        if not self.classifier_profile:
-            classifier_profile = prof.get_profile('classifier') or prof.get_profile('chat') or {}
-            if not classifier_profile:
-                return
-            self.classifier_profile = classifier_profile
-            self.classifier_profile['client'] = self.get_client(classifier_profile)
-        exp = [
-            { "query":"Hi there!", "intent": "none" },
-            { "query": "Summarize my uploaded marketing deck.", "intent": "user_doc" },
-            { "query": "What’s the SLA for our email-delivery service?", "intent": "system_docs" },
-            { "query": "What are my colleaues' surnames, in the contact list I sent you?", "intent": "hybrid" }
-        ]
+        try:
+            if _provider == "google":     # Google, non openai skd series
+                for chunk in _client.models.generate_content_stream(
+                    model=_model,
+                    contents=_contents,
+                    config=types.GenerateContentConfig(
+                        system_instruction=self.smxai_identity,
+                        temperature=0.3,
+                        max_output_tokens=1024,
+                    ),
+                ):
+                    yield chunk.text
-        instructions = f"""
-                        You are an intent router. Classify questions into exactly one of the following intents:
-                            i. `base`
-                            ii. `user_docs`
-                            iii. `system_docs`
-                        1. Return `base` if the query is a greeting or an opening to a casual chat.
-                        2. Return `user_docs` if the user is asking about content the user personally uploaded.
-                        3. Return `system_docs` if the user is asking about factual or technical details
-                        about your company and requires that you to look into the system or company files.
-                        Follow the above instructions and criteria and determine the intent of the following Query:\n{query}\n\n
-                        See the Few-shot exmples below and learn from them.
-                        Few-shot £xamples below.\n\n{exp}
-                    """
-        prompt = {
-                    "role": "system",
-                    "content": instructions
-                },
-        def google_classify_query():
-            response = self.classifier_profile['client'].models.generate_content(
-                model=self.classifier_profile['model'],
-                contents=instructions
-            )
-            return response.text
-        def openai_sdk_classify_query():
-            response = self.classifier_profile['client'].chat.completions.create(
-                model=self.classifier_profile['model'],
-                messages=prompt,
-                temperature=0,
-                max_tokens=100
-            )
-            intent = response.choices[0].message.content.strip().lower()
-            return intent
-        if self.classifier_profile['provider'] == "google":
-            intent = google_classify_query()
-            return intent
-        else:
-            intent = openai_sdk_classify_query()
-            return intent
+            elif _provider == "openai" and _model in self.get_gpt_models_latest():  # GPt 5 series
+                input_prompt = (
+                    f"{self.smxai_instructions}\n\n"
+                    f"Generate a response to this query:\n{query}\n"
+                    f"based on this given context:\n{context}\n\n"
+                    f"(Use conversation continuity if available.)"
+                )
+                sid = self.get_session_id()
+                prev_id = self._gpt_models_latest_prev_resp_ids.get(sid)
+                args = set_args(model=_model, instructions=self.smxai_identity, input=input_prompt, previous_id=prev_id, store=True)
+                with _client.responses.stream(**args) as s:
+                    for event in s:
+                        if event.type == "response.output_text.delta" and event.delta:
+                            yield event.delta
+                        elif event.type == "response.error":
+                            raise RuntimeError(str(event.error))
+                    final = s.get_final_response()
+                    if getattr(final, "id", None):
+                        self._gpt_models_latest_prev_resp_ids[sid] = final.id
+            elif _provider == "anthropic":
+                with _client.messages.stream(
+                    max_tokens=1024,
+                    messages=[{"role": "user", "content":f"{self.smxai_identity}\n\n {_contents}"},],
+                    model=_model,
+                ) as stream:
+                    for text in stream.text_stream:
+                        yield text  # end="", flush=True
+            else:  # Assumes standard openai_sdk
+                openai_sdk_prompt = [
+                    {"role": "system", "content": self.smxai_identity},
+                    {"role": "user", "content": f"{self.smxai_instructions}\n\nGenerate response to this query: {query}\nbased on this context:\n{context}\nand history:\n{history}\n\nUse conversation continuity if available.)"},
+                ]
+                response = _client.chat.completions.create(
+                    model=_model,
+                    messages=openai_sdk_prompt,
+                    stream=True,
+                )
+                for chunk in response:
+                    token = getattr(chunk.choices[0].delta, "content", "")
+                    if token:
+                        yield token
+        except Exception as e:
+            yield f"Error during streaming: {type(e).__name__}: {e}"
     def process_query(self, query, context, history, stream=False):
-        if not self.chat_profile:
-            chat_profile = prof.get_profile("chat") or {}
+        if not self._chat_profile:
+            chat_profile = _prof.get_profile("chat") or _prof.get_profile("admin")
             if not chat_profile:
-                self.error("Error: setup a chat profile")
+                return """<p style='color:red;'>Error: Chat profile is not configured. Add a chat profile inside the admin panel or contact your administrator.</p>
+                """
                 return
-            client = self.get_client(chat_profile)
-            self.chat_profile = chat_profile
-            self.chat_profile['client'] = client
-        google_prompt = f"""
-                    {self.prompt_profile}\n\n
-                    {self.prompt_instructions}\n\n
+            self._chat_profile = chat_profile
+            self._chat_profile['client'] = _prof.get_client(chat_profile)
+        _provider = self._chat_profile['provider']
+        _client = self._chat_profile['client']
+        _model = self._chat_profile['model']
+        _contents = f"""
+                    {self.smxai_instructions}\n\n
                     Question: {query}\n
-                    Context: {context}\n
-                    History: {history}
+                    Context: {context}\n\n
+                    History: {history}\n\n
+                    Use conversation continuity if available.
                 """
         openai_sdk_prompt = [
-                {"role": "system", "content": self.prompt_profile},
-                {"role": "user",   "content": self.prompt_instructions},
-                {"role": "assistant", "content": f"Query: {query}\n\nContext1: {context}\n\n"
-                                                    f"History: {history}\n\nAnswer: "}
+                {"role": "system", "content": self.smxai_identity},
+                {"role": "user", "content": f"""{self.smxai_instructions}\n\n
+                                                Generate response to this query: {query}\n
+                                                based on this context:\n{context}\n
+                                                and history:\n{history}\n\n
+                                                Use conversation continuity if available.)
+                                            """
+                },
             ]
         def google_process_query():
-            response = self.chat_profile['client'].models.generate_content(
-                model=self.chat_profile['model'],
-                contents=google_prompt
+            try:
+                response = _client.models.generate_content(
+                    model=_model,
+                    contents=_contents,
+                    config=types.GenerateContentConfig(
+                        system_instruction=self.smxai_identity,
+                        temperature=0.3,
+                        max_output_tokens=1024,
+                    ),
+                )
+                answer = response.text
+                # answer = strip_html(answer)
+                return answer
+            except Exception as e:
+                return f"Error: {str(e)}"
+        def gpt_models_latest_process_query(previous_id: str | None, reasoning_effort = "minimal", verbosity = "low"):
+            """
+            Returns (answer_text, new_response_id)
+            """
+            # Prepare the prompt with conversation history and context
+            input = (
+                f"{self.smxai_instructions}\n\n"
+                f"Generate a response to this query:\n{query}\n"
+                f"based on this given context:\n{context}\n\n"
+                f"(Use conversation continuity if available.)"
             )
-            answer = response.text
-            return answer
+            sid = self.get_session_id()
+            prev_id = self._gpt_models_latest_prev_resp_ids.get(sid)
+            args = set_args(
+                model=_model,
+                instructions=self.smxai_identity,
+                input=input,
+                previous_id=prev_id,
+                store=True,
+                reasoning_effort=reasoning_effort,
+                verbosity=verbosity
+            )
+            try:
+                # Non-stream path
+                resp = _client.responses.create(**args)
+                answer = _out(resp)
+                if getattr(resp, "id", None):
+                    self._gpt_models_latest_prev_resp_ids[sid] = resp.id
+                # answer = strip_html(answer)
+                return answer
+            except Exception as e:
+                return f"Error: {type(e).__name__}: {e}"
+        def anthropic_process_query():
+            try:
+                response = _client.messages.create(
+                    model=_model,
+                    max_tokens=1024,
+                    system=self.self.smxai_identity,
+                    messages=[{"role": "user", "content":_contents}],
+                    stream=False,
+                )
+                return response.content[0].text.strip()
+            except Exception as e:
+                return f"Error: {str(e)}"
         def openai_sdk_process_query():
             try:
-                response = self.chat_profile['client'].chat.completions.create(
-                    model=self.chat_profile['model'],
+                response = _client.chat.completions.create(
+                    model=_model,
                     messages=openai_sdk_prompt,
-                    temperature=0.1,
-                    max_tokens=1024,
-                    stream=stream
+                    stream=False,
                 )
-                if stream:
-                    # -------- token streaming --------
-                    parts = []
-                    for chunk in response:
-                        token = getattr(chunk.choices[0].delta, "content", "")
-                        if not token:
-                            continue
-                        parts.append(token)
-                        self.stream_write(token)
-                    self.stream_write("[END]")   # close the SSE bubble
-                    answer = "".join(parts)
-                    return answer
-                else:
-                    # -------- one-shot buffered --------
-                    answer = response.choices[0].message.content
-                    return answer
+                # -------- one-shot buffered --------
+                answer = response.choices[0].message.content .strip()
+                return answer
             except Exception as e:
                 return f"Error: {str(e)}"
-        if self.chat_profile['provider'] == "google":
+        if _provider == "google":
             return google_process_query()
-        else:
-            return openai_sdk_process_query()
+        if _provider == "openai" and _model in self.get_gpt_models_latest():
+            return gpt_models_latest_process_query(self._gpt_models_latest_prev_resp_ids.get(self.get_session_id()))
+        if _provider == "anthropic":
+            return anthropic_process_query()
+        return openai_sdk_process_query()
-    def ai_generate_code(self, question, df):
-        if not self.coder_profile:
-            coder_profile = prof.get_profile('coder') or prof.get_profile('chat') or {}
-            if not coder_profile:
-                return
+    def repair_python_cell(self, py_code: str) -> str:
-            self.coder_profile = coder_profile
-            self.coder_profile['client'] = self.get_client(coder_profile)
-        context = f"Columns: {list(df.columns)}\n\nDtypes: {df.dtypes.astype(str).to_dict()}\n\n"
-        instructions = f"""
-            You are an expert Python data analyst. Given the dataframe `df` with the following Context:\n{context}\n\n
-            Write clean, working Python code that answers the question below.
-            DO NOT explain, just output the code only (Add overview comment or text at the bottom)
-            Question: {question}\n
-            Output only the working code needed. Assume df is already defined.
-            Produce at least one visible result: (syntaxmatrix.display.show(), display(), plt.show()).
+        _CELL_REPAIR_RULES = """
+        Fix the Python cell to satisfy:
+        - Single valid cell; imports at the top.
+        - Do not import or invoke or use 'python-dotenv' or 'dotenv' because it's not needed.
+        - No top-level statements between if/elif/else branches.
+        - Regression must use either sklearn with train_test_split (then X_test exists) and R^2/MAE/RMSE,
+            or statsmodels OLS. No accuracy_score in regression.
+        - Keep all plotting + savefig + BytesIO + display inside the branch that created the figure.
+        - Return ONLY the corrected cell.
         """
-        def google_generate_code():
-            response = self.coder_profile['client'].models.generate_content(
-                model=self.coder_profile['model'],
-                contents=instructions
+        code = textwrap.dedent(py_code or "").strip()
+        needs_fix = False
+        if re.search(r"\baccuracy_score\b", code) and re.search(r"\bLinearRegression\b|\bOLS\b", code):
+            needs_fix = True
+        if re.search(r"\bX_test\b", code) and not re.search(r"\bX_test\s*=", code):
+            needs_fix = True
+        try:
+            ast.parse(code)
+        except SyntaxError:
+            needs_fix = True
+        if not needs_fix:
+            return code
+        _prompt = f"```python\n{code}\n```"
+        repair_profile = _prof.get_profile("vision2text") or _prof.get_profile("admin")
+        if not repair_profile:
+            return (
+                '<div class="smx-alert smx-alert-warn">'
+                    'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
+                    'Please, add the LLM profile inside the admin panel or contact your Administrator.'
+                '</div>'
             )
-            return response.text
-        def others_generate_code():
-            response = self.coder_profile['client'].chat.completions.create(
-                model=self.coder_profile['model'],
-                messages=[{"role": "user", "content": instructions}],
-                temperature=0.0,
-                max_tokens=2048,
+        _client = _prof.get_client(repair_profile)
+        _provider = repair_profile['provider'].lower()
+        _model = repair_profile['model']
+        #1 Google
+        if _provider == "google":
+            from google.genai import types
+            fixed = _client.models.generate_content(
+                model=_model,
+                contents=_prompt,
+                config=types.GenerateContentConfig(
+                    system_instruction=_CELL_REPAIR_RULES,
+                    temperature=0.8,
+                    max_output_tokens=1024,
+                ),
+            )
+        #2 Openai
+        elif _provider == "openai" and _model in GPT_MODELS_LATEST:
+            args = set_args(
+                model=_model,
+                instructions=_CELL_REPAIR_RULES,
+                input=[{"role": "user", "content": _prompt}],
+                previous_id=None,
+                store=False,
+                reasoning_effort="medium",
+                verbosity="medium",
+            )
+            fixed = _out(_client.responses.create(**args))
+        # Anthropic
+        elif _provider == "anthropic":
+            fixed = _client.messages.create(
+                model=_model,
+                max_tokens=1024,
+                system=_CELL_REPAIR_RULES,
+                messages=[{"role": "user", "content":_prompt}],
+                stream=False,
+            )
+        # OpenAI SDK
+        else:
+            fixed = _client.chat.completions.create(
+                model=_model,
+                messages=[
+                    {"role": "system", "content":_CELL_REPAIR_RULES},
+                    {"role": "user", "content":_prompt},
+                ],
+                max_tokens=1024,
             )
-            return response.choices[0].message.content
+        try:
+            ast.parse(fixed);
+            return fixed
+        except Exception:
+            return code
+    def get_last_llm_usage(self):
+        return getattr(self, "_last_llm_usage", None)
+    def ai_generate_code(self, refined_question, tasks, df):
+        def normalise_llm_code(s: str) -> str:
+            s = s.replace("\t", "    ")
+            s = textwrap.dedent(s)
+            lines = s.splitlines()
+            # drop leading blank lines
+            while lines and not lines[0].strip():
+                lines.pop(0)
+            # if everything is still indented >=4 spaces, shift left
+            indents = [len(l) - len(l.lstrip(" ")) for l in lines if l.strip()]
+            if indents and min(indents) >= 4:
+                m = min(indents)
+                lines = [l[m:] if len(l) >= m else l for l in lines]
+            return "\n".join(lines)
-        if self.coder_profile['provider'] == 'google':
-            code = google_generate_code()
-        else:
-            code = others_generate_code()
+        CONTEXT = f"Columns: {list(df.columns)}\n\nDtypes: {df.dtypes.astype(str).to_dict()}\n\n"
+        AVAILABLE_COLUMNS = list(df.columns)
-        if "```python" in code:
-            code = code.split("```python")[1].split("```")[0].strip()
-        elif "```" in code:
-            code = code.split("```")[1].split("```")[0].strip()
+        # --- SMX: normalise tasks coming from intent agent ---
+        if isinstance(tasks, str):
+            import json, ast, re
+            try:
+                tasks_parsed = json.loads(tasks)
+            except Exception:
+                try:
+                    tasks_parsed = ast.literal_eval(tasks)
+                except Exception:
+                    tasks_parsed = re.findall(r"[A-Za-z_]+", tasks)
+            tasks = tasks_parsed
+        if not isinstance(tasks, list):
+            tasks = [str(tasks)]
+        tasks = [str(t).strip().lower() for t in tasks if str(t).strip()]
+        ai_profile = """
+        - You are a Python expert specializing in data science and machine learning.
+        - Your task is to generate a single, complete, production-quality, executable Python script for a Jupyter-like Python kernel, based on the given instructions.
+        - The dataset is already loaded as a pandas DataFrame named `df` (no file I/O or file uploads).
+        - Make a copy of `df` and name it `df_copy`. Make sure `df_copy` is preprocessed and cleaned, named `df_cleaned`, if not already done so. Then use `df_cleaned` to perform the ML tasks described in the given context.
+        - Select your features and target, from `df_cleaned`, with care and name it `required_cols`
+        - Create your 'df_filtered by doing: df_filtered = df_cleaned[required_cols].
+        - Use the {TEMPLATE_CATALOGUE} below to educate yourself on which visualizations you will implement in the code.
+        - The final output MUST be the complete, executable Python code only, enclosed in a single markdown code block (```python ... ```), which is required to fulfill the user's request. See the {tasks} below.
+        - Do not include any explanatory text or markdown outside the code block.
+        """
+        TEMPLATE_CATALOGUE = """
+        ### Available SyntaxMatrix templates (use these instead of inventing new helpers)
+        Visualisation templates (dataset-agnostic):
+        - viz_pie(df, category_col=None, top_k=8): pie/donut shares within a category.
+        - viz_stacked_bar(df, x=None, hue=None, normalise=True): composition across groups.
+        - viz_count_bar(df, category_col=None, top_k=12): counts/denominators by category.
+        - viz_box(df, x=None, y=None): spread/outliers of numeric by category.
+        - viz_scatter(df, x=None, y=None, hue=None): relationship between two numeric vars.
+        - viz_distribution(df, col=None): histogram-style distribution for numeric.
+        - viz_kde(df, col=None): density curve for numeric.
+        - viz_area(df, time_col=None, y_col=None): area/trend over time.
+        - viz_line(df, x=None, y=None, hue=None): line/trend plot.
+        ML/stat templates:
+        - classification(df): standard classification pipeline + metrics + plots.
+        - regression(df): standard regression pipeline + metrics + plots.
+        - clustering(df): clustering workflow + cluster plots.
+        - anomaly_detection(df)
+        - ts_anomaly_detection(df)
+        - time_series_forecasting(df)
+        - time_series_classification(df, entity_col, time_col, target_col)
+        - dimensionality_reduction(df)
+        - feature_selection(df)
+        - eda_overview(df)
+        - eda_correlation(df)
+        - multilabel_classification(df, label_cols)
+        - recommendation(df)
+        - topic_modelling(df)
+        """
+        instructions = (
+            "### Context"
+            f"- DataFrame - (`df`): {df}"
+            f"- Schema (names → dtypes): {CONTEXT}"
+            f"- Row count: {len(df)}"
+            f"- Task description: {refined_question}"
+            f"- Tasks: {tasks}"
+            f"- Available columns: {AVAILABLE_COLUMNS}"
+            f"- Template catalogue: {TEMPLATE_CATALOGUE}"
+            """
+            ### Template rules
+            - You MAY call a template if it matches the task.
+            - Do NOT invent template names.
+            - If no template fits, write minimal direct pandas/sklearn/seaborn code instead.
+            - Keep the solution short: avoid writing wrappers/utilities already handled by SyntaxMatrix hardener.
+            #### Template selection hint examples:
+            - If the task asks for pie/donut/composition shares → use viz_pie.
+            - If it asks for denominators/counts per category → viz_count_bar.
+            - If it asks for spread/outliers/comparison across groups → viz_box.
+            - If it asks for relationship / “X vs Y” → viz_scatter.
+            - If it asks for trend over time → viz_line or viz_area.
+            ### Hard requirements
+            1) Code only. No markdown, no comments, no explanations.
+            2) Import everything you use explicitly.
+            - Use pandas/numpy/matplotlib by default.
+            - Seaborn may be unavailable at runtime; **do not import seaborn inside your code**.
+            - If you call sns.*, assume sns is already defined by the framework.
+            3) Avoid deprecated / removed APIs**, e.g.:
+            - pandas: do not use `.append`, `.ix`, `.as_matrix`; prefer current patterns.
+            - seaborn: do not use `distplot`; avoid `pairplot` on very large data unless sampling.
+            - scikit-learn: import from `sklearn.model_selection` (not `sklearn.cross_validation`);
+                set `random_state=42` where relevant.
+            4) Be defensive, but avoid hard-failing on optional fields:
+            - If the primary column, needed to answer the question, is missing, review your copy of the `df` again.
+            Make sure that you selected the proper column.
+            Never use a column/variable which isn't available or defined.
+            - If a secondary/extra column is missing, show a warning with `show(...)` and continue using available fields.
+            - Handle missing values sensibly (drop rows for simple EDA; use `ColumnTransformer` + `SimpleImputer` for modelling).
+            - For categorical features in ML, use `OneHotEncoder(handle_unknown="ignore")`
+                inside a `Pipeline`/`ColumnTransformer` (no `LabelEncoder` on features).
+            5) Keep it fast (kernel timeout ~8s):
+            - For plots on large frames (>20k rows), downsample to ~1,000 rows
+                (`df.sample(1000, random_state=42)`) unless aggregation is more appropriate.
+            - Prefer vectorised ops; avoid O(n²) Python loops.
+            6) Keep the solution compact:
+            - Do not define large helper libraries or long “required column” sets.
+            - Aim for ≤120 lines excluding imports.
+            7) Always produce at least one visible result at the end:
+            - If plotting with matplotlib/seaborn: call `plt.tight_layout(); plt.show()`.
+            - If producing a table or metrics:
+                `from syntaxmatrix.display import show` then `show(object_or_dataframe)`.
+            8) Follow task type conventions:
+            - **EDA/Stats**: compute the requested stat, then show a relevant table
+                (e.g., summary/crosstab) or plot.
+            - **Classification**: train/valid split (`train_test_split`), pipeline with scaling/encoding,
+                fit, show accuracy and a confusion matrix via
+                `ConfusionMatrixDisplay.from_estimator(...); plt.show()`.
+                Also show `classification_report` as a dataframe if short.
+            - **Regression**: train/valid split, pipeline as needed, fit, show R² and MAE;
+                plot predicted vs actual scatter.
+            - **Correlation/Chi-square/ANOVA**: compute the statistic + p-value and show a concise
+                result table (with `show(...)`) and, when sensible, a small plot (heatmap/bar).
+            9) Don't mutate or recreate target columns if they already exist.
+            10) Keep variable names short and clear; prefer `num_cols` / `cat_cols` discovery by dtype.
+            11) You MUST NOT reference any column outside Available columns: {AVAILABLE_COLUMNS}.
+            12) If asked to predict/classify, choose the target by matching the task text to Allowed columns
+                and never invent a new name.
+            #### Cohort rules
+            When you generate plots for cohorts or categories, you MUST obey these rules:
+            1) ALWAYS guard cohort masks:
+            - After you define something like:
+                _mask_a = (df['BMI'] < 18.5) & df['BMI'].notna()
+                _mask_b = ~(df['BMI'] < 18.5) & df['BMI'].notna()
+                compute their sizes:
+                n_a = int(_mask_a.sum())
+                n_b = int(_mask_b.sum())
+            - If a mask has no rows (or almost none), do NOT draw an empty plot.
+                Instead call:
+                show(f"Skipping cohort '{label}': no rows after filtering.")
+                and return.
+            2) Before any groupby / crosstab for a plot:
+            - Fill missing categories so groupby does not drop everything:
+                df[col] = df[col].fillna("Unknown")
+            - After building the table:
+                tab = tmp.groupby([...]).size().unstack(...).fillna(0)
+                ALWAYS check:
+                if tab.empty:
+                    show(f"Skipping plot for {col}: no data after grouping.")
+                    continue
+                Only call .plot(...) if the table is non-empty.
+            3) For value_counts-based plots:
+            - If the Series is empty after filtering (len(s) == 0),
+                do NOT draw a figure. Just call:
+                show(f"No data available to plot for {col} in this cohort.")
+                and skip.
+            4) Never try to “hide” an error with a blank plot.
+            A blank chart is treated as a bug. If there is no data, explain it
+            clearly using show(...), and avoid calling matplotlib/Seaborn.
+            5) Never use print(...). All user-visible diagnostics go through show(...).
+            ### Output
+            Return only runnable Python that:
+            - Imports what it needs,
+            - Validates columns,
+            - Visualize tables, charts, and graphs, each with appropriate caption.
+            - Solution: {tasks} to solve {refined_question},
+            - And ends with at least 3 visible output (`show(...)` and/or `plt.show()`).
+        """)
+        if not self._coding_profile:
+            coding_profile = _prof.get_profile("coding") or _prof.get_profile("admin")
+            if not coding_profile:
+                return (
+                    '<div class="smx-alert smx-alert-warn">'
+                        'No LLM profile configured for <code>coding</code> (or <code>admin</code>). <br>'
+                        'Please, add the LLM profile inside the admin panel or contact your Administrator.'
+                    '</div>'
+                )
+            self._coding_profile = coding_profile
+            self._coding_profile['client'] = _prof.get_client(coding_profile)
+        # code = mlearning_agent(instructions, ai_profile, self._coding_profile)
+        code, usage = mlearning_agent(instructions, ai_profile, self._coding_profile)
+        self._last_llm_usage = usage
+        if code:
+            import re
+            code = normalise_llm_code(code)
+            m = re.search(r"```(?:python)?\s*(.*?)\s*```", code, re.DOTALL | re.IGNORECASE)
+            if m:
+                code = m.group(1).strip()
+            if "import io" not in code and "io.BytesIO" in code:
+                lines = code.split('\n')
+                import_lines = []
+                other_lines = []
+                for line in lines:
+                    if line.strip().startswith('import ') or line.strip().startswith('from '):
+                        import_lines.append(line)
+                    else:
+                        other_lines.append(line)
+                if "import io" not in '\n'.join(import_lines):
+                    import_lines.append('import io')
+                code = '\n'.join(import_lines + [''] + other_lines)
+                TEMPLATE_NAMES = [
+                    "viz_pie","viz_stacked_bar","viz_count_bar","viz_box","viz_scatter",
+                    "viz_distribution","viz_kde","viz_area","viz_line",
+                    "classification","regression","clustering","anomaly_detection",
+                    "ts_anomaly_detection","time_series_forecasting","time_series_classification",
+                    "dimensionality_reduction","feature_selection","eda_overview","eda_correlation",
+                    "multilabel_classification","recommendation","topic_modelling"
+                ]
+                used = [t for t in TEMPLATE_NAMES if re.search(rf"\\b{t}\\s*\\(", code)]
+                if used:
+                    import_line = (
+                        "from syntaxmatrix.agentic.model_templates import " +
+                        ", ".join(sorted(set(used)))
+                    )
+                    if import_line not in code:
+                        code = import_line + "\n" + code
+            return code.strip()
+        return "Error: AI code generation failed."
+    def sanitize_rough_to_markdown_task(self, rough: str) -> str:
+        """
+        Return only the Task text (no tags).
+        Behaviour:
+        - If <Task>...</Task> exists: return its inner text.
+        - If not: return the input with <rough> wrapper and any <Error> blocks removed.
+        - Never raises; always returns a string.
+        """
+        s = ("" if rough is None else str(rough)).strip()
+        def _find_ci(hay, needle, start=0):
+            return hay.lower().find(needle.lower(), start)
+        # Prefer explicit <Task>...</Task>
+        i = _find_ci(s, "<task")
+        if i != -1:
+            j = s.find(">", i)
+            k = _find_ci(s, "</task>", j + 1)
+            if j != -1 and k != -1:
+                return s[j + 1:k].strip()
+        # Otherwise strip any <Error>...</Error> blocks (if present)
+        out = s
+        while True:
+            e1 = _find_ci(out, "<error")
+            if e1 == -1:
+                break
+            e1_end = out.find(">", e1)
+            e2 = _find_ci(out, "</error>", (e1_end + 1) if e1_end != -1 else e1 + 1)
+            if e1_end == -1 or e2 == -1:
+                break
+            out = out[:e1] + out[e2 + len("</error>"):]
+        # Drop optional <rough> wrapper
+        return out.replace("<rough>", "").replace("</rough>", "").strip()
-        code = strip_describe_slice(code)
-        code = drop_bad_classification_metrics(code, df)
-        return code.strip()
     def run(self):
         url = f"http://{self.host}:{self.port}/"
         webbrowser.open(url)
         self.app.run(host=self.host, port=self.port, debug=False)

syntaxmatrix 1.4.6__py3-none-any.whl → 2.5.5.4__py3-none-any.whl

syntaxmatrix 1.4.6py3-none-any.whl → 2.5.5.4py3-none-any.whl