PyPI - solana-agent - Versions diffs - 31.1.4__tar.gz → 31.1.6__tar.gz - Mend

solana-agent 31.1.4tar.gz → 31.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{solana_agent-31.1.4 → solana_agent-31.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: solana-agent
-Version: 31.1.4
+Version: 31.1.6
 Summary: AI Agents for Solana
 License: MIT
 Keywords: solana,solana ai,solana agent,ai,ai agent,ai agents
@@ -15,10 +15,10 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Dist: instructor (==1.11.2)
-Requires-Dist: llama-index-core (==0.13.3)
+Requires-Dist: llama-index-core (==0.13.5)
 Requires-Dist: llama-index-embeddings-openai (==0.5.0)
 Requires-Dist: logfire (==4.3.6)
-Requires-Dist: openai (==1.102.0)
+Requires-Dist: openai (==1.106.1)
 Requires-Dist: pillow (==11.3.0)
 Requires-Dist: pinecone[asyncio] (==7.3.0)
 Requires-Dist: pydantic (>=2)
@@ -52,7 +52,7 @@ Build your AI agents in three lines of code!
 ## Why?
 * Three lines of code setup
 * Simple Agent Definition
-* Fast Responses
+* Fast & Streaming Responses
 * Solana Integration
 * Multi-Agent Swarm
 * Multi-Modal (Images & Audio & Text)
@@ -361,7 +361,6 @@ config = {
             "instructions": "You provide friendly, helpful customer support responses.",
             "specialization": "Customer inquiries",
             "capture_name": "contact_info",
-            "capture_mode": "once",
             "capture_schema": {
                 "type": "object",
                 "properties": {

{solana_agent-31.1.4 → solana_agent-31.1.6}/README.md RENAMED Viewed

@@ -17,7 +17,7 @@ Build your AI agents in three lines of code!
 ## Why?
 * Three lines of code setup
 * Simple Agent Definition
-* Fast Responses
+* Fast & Streaming Responses
 * Solana Integration
 * Multi-Agent Swarm
 * Multi-Modal (Images & Audio & Text)
@@ -326,7 +326,6 @@ config = {
             "instructions": "You provide friendly, helpful customer support responses.",
             "specialization": "Customer inquiries",
             "capture_name": "contact_info",
-            "capture_mode": "once",
             "capture_schema": {
                 "type": "object",
                 "properties": {

{solana_agent-31.1.4 → solana_agent-31.1.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "solana-agent"
-version = "31.1.4"
+version = "31.1.6"
 description = "AI Agents for Solana"
 authors = ["Bevan Hunt <bevan@bevanhunt.com>"]
 license = "MIT"
@@ -24,13 +24,13 @@ testpaths = ["tests"]
 [tool.poetry.dependencies]
 python = ">=3.12,<4.0"
-openai = "1.102.0"
+openai = "1.106.1"
 pydantic = ">=2"
 pymongo = "4.14.1"
 zep-cloud = "3.4.3"
 instructor = "1.11.2"
 pinecone = { version = "7.3.0", extras = ["asyncio"] }
-llama-index-core = "0.13.3"
+llama-index-core = "0.13.5"
 llama-index-embeddings-openai = "0.5.0"
 pypdf = "6.0.0"
 scrubadub = "2.0.1"
@@ -40,17 +40,17 @@ rich = ">=13,<14.0"
 pillow = "11.3.0"
 [tool.poetry.group.dev.dependencies]
-pytest = "^8.4.0"
+pytest = "^8.4.2"
 pytest-cov = "^6.1.1"
 pytest-asyncio = "^1.1.0"
-pytest-mock = "^3.14.0"
+pytest-mock = "^3.15.0"
 pytest-github-actions-annotate-failures = "^0.3.0"
 sphinx = "^8.2.3"
 sphinx-rtd-theme = "^3.0.2"
 myst-parser = "^4.0.1"
-sphinx-autobuild = "^2024.10.3"
+sphinx-autobuild = "^2025.08.25"
 mongomock = "^4.3.0"
-ruff = "^0.12.10"
+ruff = "^0.12.12"
 [tool.poetry.scripts]
 solana-agent = "solana_agent.cli:app"

{solana_agent-31.1.4 → solana_agent-31.1.6}/solana_agent/adapters/openai_adapter.py RENAMED Viewed

@@ -399,6 +399,77 @@ class OpenAIAdapter(LLMProvider):
             logger.exception(f"Error in generate_text_with_images: {e}")
             return f"I apologize, but I encountered an unexpected error: {e}"
+    async def chat_stream(
+        self,
+        messages: List[Dict[str, Any]],
+        model: Optional[str] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+    ) -> AsyncGenerator[Dict[str, Any], None]:  # pragma: no cover
+        """Stream chat completions with optional tool calls, yielding normalized events."""
+        try:
+            request_params: Dict[str, Any] = {
+                "messages": messages,
+                "model": model or self.text_model,
+                "stream": True,
+            }
+            if tools:
+                request_params["tools"] = tools
+            client = self.client
+            if self.logfire:
+                logfire.instrument_openai(client)
+            stream = await client.chat.completions.create(**request_params)
+            async for chunk in stream:
+                try:
+                    if not chunk or not getattr(chunk, "choices", None):
+                        continue
+                    ch = chunk.choices[0]
+                    delta = getattr(ch, "delta", None)
+                    if delta is None:
+                        # Some SDKs use 'message' instead of 'delta'
+                        delta = getattr(ch, "message", None)
+                    if delta is None:
+                        # Finish event
+                        finish = getattr(ch, "finish_reason", None)
+                        if finish:
+                            yield {"type": "message_end", "finish_reason": finish}
+                        continue
+                    # Content delta
+                    content_piece = getattr(delta, "content", None)
+                    if content_piece:
+                        yield {"type": "content", "delta": content_piece}
+                    # Tool call deltas
+                    tool_calls = getattr(delta, "tool_calls", None)
+                    if tool_calls:
+                        for idx, tc in enumerate(tool_calls):
+                            try:
+                                tc_id = getattr(tc, "id", None)
+                                func = getattr(tc, "function", None)
+                                name = getattr(func, "name", None) if func else None
+                                args_piece = (
+                                    getattr(func, "arguments", "") if func else ""
+                                )
+                                yield {
+                                    "type": "tool_call_delta",
+                                    "id": tc_id,
+                                    "index": getattr(tc, "index", idx),
+                                    "name": name,
+                                    "arguments_delta": args_piece or "",
+                                }
+                            except Exception:
+                                continue
+                except Exception as parse_err:
+                    logger.debug(f"Error parsing stream chunk: {parse_err}")
+                    continue
+            # End of stream (SDK may not emit finish event in all cases)
+            yield {"type": "message_end", "finish_reason": "end_of_stream"}
+        except Exception as e:
+            logger.exception(f"Error in chat_stream: {e}")
+            yield {"type": "error", "error": str(e)}
     async def parse_structured_output(
         self,
         prompt: str,

{solana_agent-31.1.4 → solana_agent-31.1.6}/solana_agent/factories/agent_factory.py RENAMED Viewed

@@ -133,12 +133,7 @@ class SolanaAgentFactory:
                 voice=org_config.get("voice", ""),
             )
-        # Build capture modes from agent config if provided
-        capture_modes: Dict[str, str] = {}
-        for agent in config.get("agents", []):
-            mode = agent.get("capture_mode")
-            if mode in {"once", "multiple"} and agent.get("name"):
-                capture_modes[agent["name"]] = mode
+        # capture_mode removed: repository now always upserts/merges per capture
         # Create repositories
         memory_provider = None
@@ -148,22 +143,16 @@ class SolanaAgentFactory:
                 "mongo_adapter": db_adapter,
                 "zep_api_key": config["zep"].get("api_key"),
             }
-            if capture_modes:  # pragma: no cover
-                mem_kwargs["capture_modes"] = capture_modes
             memory_provider = MemoryRepository(**mem_kwargs)
         if "mongo" in config and "zep" not in config:
             mem_kwargs = {"mongo_adapter": db_adapter}
-            if capture_modes:
-                mem_kwargs["capture_modes"] = capture_modes
             memory_provider = MemoryRepository(**mem_kwargs)
         if "zep" in config and "mongo" not in config:
             if "api_key" not in config["zep"]:
                 raise ValueError("Zep API key is required.")
             mem_kwargs = {"zep_api_key": config["zep"].get("api_key")}
-            if capture_modes:  # pragma: no cover
-                mem_kwargs["capture_modes"] = capture_modes
             memory_provider = MemoryRepository(**mem_kwargs)
         guardrail_config = config.get("guardrails", {})

{solana_agent-31.1.4 → solana_agent-31.1.6}/solana_agent/interfaces/providers/llm.py RENAMED Viewed

@@ -33,6 +33,23 @@ class LLMProvider(ABC):
         """Generate text from the language model."""
         pass
+    @abstractmethod
+    async def chat_stream(
+        self,
+        messages: List[Dict[str, Any]],
+        model: Optional[str] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+    ) -> AsyncGenerator[Dict[str, Any], None]:
+        """Stream chat completion deltas and tool call deltas.
+            Yields normalized events:
+            - {"type": "content", "delta": str}
+        - {"type": "tool_call_delta", "id": Optional[str], "index": Optional[int], "name": Optional[str], "arguments_delta": str}
+            - {"type": "message_end", "finish_reason": str}
+            - {"type": "error", "error": str}
+        """
+        pass
     @abstractmethod
     async def parse_structured_output(
         self,

{solana_agent-31.1.4 → solana_agent-31.1.6}/solana_agent/repositories/memory.py RENAMED Viewed

@@ -19,10 +19,7 @@ class MemoryRepository(MemoryProvider):
         self,
         mongo_adapter: Optional[MongoDBAdapter] = None,
         zep_api_key: Optional[str] = None,
-        capture_modes: Optional[Dict[str, str]] = None,
     ):
-        self.capture_modes: Dict[str, str] = capture_modes or {}
         # Mongo setup
         if not mongo_adapter:
             self.mongo = None
@@ -46,18 +43,15 @@ class MemoryRepository(MemoryProvider):
                 self.mongo.create_index(self.captures_collection, [("capture_name", 1)])
                 self.mongo.create_index(self.captures_collection, [("agent_name", 1)])
                 self.mongo.create_index(self.captures_collection, [("timestamp", 1)])
-                # Unique only when mode == 'once'
+                # Unique per user/agent/capture combo
                 try:
                     self.mongo.create_index(
                         self.captures_collection,
                         [("user_id", 1), ("agent_name", 1), ("capture_name", 1)],
                         unique=True,
-                        partialFilterExpression={"mode": "once"},
                     )
                 except Exception as e:
-                    logger.error(
-                        f"Error creating partial unique index for captures: {e}"
-                    )
+                    logger.error(f"Error creating unique index for captures: {e}")
             except Exception as e:
                 logger.error(f"Error initializing MongoDB captures collection: {e}")
                 self.captures_collection = "captures"
@@ -223,54 +217,39 @@ class MemoryRepository(MemoryProvider):
             raise ValueError("data must be a dictionary")
         try:
-            mode = self.capture_modes.get(agent_name, "once") if agent_name else "once"
             now = datetime.now(timezone.utc)
-            if mode == "multiple":
-                doc = {
+            key = {
+                "user_id": user_id,
+                "agent_name": agent_name,
+                "capture_name": capture_name,
+            }
+            existing = self.mongo.find_one(self.captures_collection, key)
+            merged_data: Dict[str, Any] = {}
+            if existing and isinstance(existing.get("data"), dict):
+                merged_data.update(existing.get("data", {}))
+            merged_data.update(data or {})
+            update_doc = {
+                "$set": {
                     "user_id": user_id,
                     "agent_name": agent_name,
                     "capture_name": capture_name,
-                    "data": data or {},
-                    "schema": schema or {},
-                    "mode": "multiple",
+                    "data": merged_data,
+                    "schema": (
+                        schema
+                        if schema is not None
+                        else existing.get("schema")
+                        if existing
+                        else {}
+                    ),
                     "timestamp": now,
-                    "created_at": now,
-                }
-                return self.mongo.insert_one(self.captures_collection, doc)
-            else:
-                key = {
-                    "user_id": user_id,
-                    "agent_name": agent_name,
-                    "capture_name": capture_name,
-                }
-                existing = self.mongo.find_one(self.captures_collection, key)
-                merged_data: Dict[str, Any] = {}
-                if existing and isinstance(existing.get("data"), dict):
-                    merged_data.update(existing.get("data", {}))
-                merged_data.update(data or {})
-                update_doc = {
-                    "$set": {
-                        "user_id": user_id,
-                        "agent_name": agent_name,
-                        "capture_name": capture_name,
-                        "data": merged_data,
-                        "schema": (
-                            schema
-                            if schema is not None
-                            else existing.get("schema")
-                            if existing
-                            else {}
-                        ),
-                        "mode": "once",
-                        "timestamp": now,
-                    },
-                    "$setOnInsert": {"created_at": now},
-                }
-                self.mongo.update_one(
-                    self.captures_collection, key, update_doc, upsert=True
-                )
-                doc = self.mongo.find_one(self.captures_collection, key)
-                return str(doc.get("_id")) if doc and doc.get("_id") else None
+                },
+                "$setOnInsert": {"created_at": now},
+            }
+            self.mongo.update_one(
+                self.captures_collection, key, update_doc, upsert=True
+            )
+            doc = self.mongo.find_one(self.captures_collection, key)
+            return str(doc.get("_id")) if doc and doc.get("_id") else None
         except Exception as e:  # pragma: no cover
             logger.error(f"MongoDB save_capture error: {e}")
             return None

{solana_agent-31.1.4 → solana_agent-31.1.6}/solana_agent/services/agent.py RENAMED Viewed

@@ -265,56 +265,57 @@ class AgentService(AgentServiceInterface):
         prompt: Optional[str] = None,
         output_model: Optional[Type[BaseModel]] = None,
     ) -> AsyncGenerator[Union[str, bytes, BaseModel], None]:  # pragma: no cover
-        """Generate a response using OpenAI function calling (tools API) or structured output."""
-        agent = next((a for a in self.agents if a.name == agent_name), None)
-        if not agent:
-            error_msg = f"Agent '{agent_name}' not found."
-            logger.warning(error_msg)
-            if output_format == "audio":
-                async for chunk in self.llm_provider.tts(
-                    error_msg,
-                    instructions=audio_instructions,
-                    response_format=audio_output_format,
-                    voice=audio_voice,
-                ):
-                    yield chunk
-            else:
-                yield error_msg
-            return
-        # Build system prompt and messages
-        system_prompt = self.get_agent_system_prompt(agent_name)
-        user_content = str(query)
-        if images:
-            user_content += "\n\n[Images attached]"
-        # Compose the prompt for generate_text
-        full_prompt = ""
-        if memory_context:
-            full_prompt += f"CONVERSATION HISTORY:\n{memory_context}\n\n Always use your tools to perform actions and don't rely on your memory!\n\n"
-        if prompt:
-            full_prompt += f"ADDITIONAL PROMPT:\n{prompt}\n\n"
-        full_prompt += user_content
-        full_prompt += f"USER IDENTIFIER: {user_id}"
-        # Get OpenAI function schemas for this agent's tools
-        tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": tool["name"],
-                    "description": tool.get("description", ""),
-                    "parameters": tool.get("parameters", {}),
-                    "strict": True,
-                },
-            }
-            for tool in self.get_agent_tools(agent_name)
-        ]
+        """Generate a response using tool-calling with full streaming support."""
         try:
+            # Validate agent
+            agent = next((a for a in self.agents if a.name == agent_name), None)
+            if not agent:
+                error_msg = f"Agent '{agent_name}' not found."
+                logger.warning(error_msg)
+                if output_format == "audio":
+                    async for chunk in self.llm_provider.tts(
+                        error_msg,
+                        instructions=audio_instructions,
+                        response_format=audio_output_format,
+                        voice=audio_voice,
+                    ):
+                        yield chunk
+                else:
+                    yield error_msg
+                return
+            # Build system prompt and messages
+            system_prompt = self.get_agent_system_prompt(agent_name)
+            user_content = str(query)
+            if images:
+                user_content += "\n\n[Images attached]"
+            # Compose the prompt for generate_text
+            full_prompt = ""
+            if memory_context:
+                full_prompt += f"CONVERSATION HISTORY:\n{memory_context}\n\n Always use your tools to perform actions and don't rely on your memory!\n\n"
+            if prompt:
+                full_prompt += f"ADDITIONAL PROMPT:\n{prompt}\n\n"
+            full_prompt += user_content
+            full_prompt += f"USER IDENTIFIER: {user_id}"
+            # Get OpenAI function schemas for this agent's tools
+            tools = [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": tool["name"],
+                        "description": tool.get("description", ""),
+                        "parameters": tool.get("parameters", {}),
+                        "strict": True,
+                    },
+                }
+                for tool in self.get_agent_tools(agent_name)
+            ]
+            # Structured output path
             if output_model is not None:
-                # --- Structured output with tool support ---
                 model_instance = await self.llm_provider.parse_structured_output(
                     prompt=full_prompt,
                     system_prompt=system_prompt,
@@ -327,83 +328,131 @@ class AgentService(AgentServiceInterface):
                 yield model_instance
                 return
-            # --- Streaming text/audio with tool support (as before) ---
-            response_text = ""
-            while True:
-                if not images:
-                    response = await self.llm_provider.generate_text(
-                        prompt=full_prompt,
-                        system_prompt=system_prompt,
-                        api_key=self.api_key,
-                        base_url=self.base_url,
-                        model=self.model,
-                        tools=tools if tools else None,
-                    )
+            # Vision fallback (non-streaming for now)
+            if images:
+                vision_text = await self.llm_provider.generate_text_with_images(
+                    prompt=full_prompt, images=images, system_prompt=system_prompt
+                )
+                if output_format == "audio":
+                    cleaned_audio_buffer = self._clean_for_audio(vision_text)
+                    async for audio_chunk in self.llm_provider.tts(
+                        text=cleaned_audio_buffer,
+                        voice=audio_voice,
+                        response_format=audio_output_format,
+                        instructions=audio_instructions,
+                    ):
+                        yield audio_chunk
                 else:
-                    response = await self.llm_provider.generate_text_with_images(
-                        prompt=full_prompt,
-                        system_prompt=system_prompt,
-                        api_key=self.api_key,
-                        base_url=self.base_url,
-                        model=self.model,
-                        tools=tools if tools else None,
-                        images=images,
-                    )
-                if (
-                    not response
-                    or not hasattr(response, "choices")
-                    or not response.choices
+                    yield vision_text
+                return
+            # Build initial messages for chat streaming
+            messages: List[Dict[str, Any]] = []
+            if system_prompt:
+                messages.append({"role": "system", "content": system_prompt})
+            messages.append({"role": "user", "content": full_prompt})
+            accumulated_text = ""
+            # Loop to handle tool calls in streaming mode
+            while True:
+                # Aggregate tool calls by index and merge late IDs
+                tool_calls: Dict[int, Dict[str, Any]] = {}
+                async for event in self.llm_provider.chat_stream(
+                    messages=messages,
+                    model=self.model,
+                    tools=tools if tools else None,
                 ):
-                    logger.error("No response or choices from LLM provider.")
-                    response_text = "I apologize, but I could not generate a response."
-                    break
-                choice = response.choices[0]
-                message = getattr(choice, "message", choice)
-                if hasattr(message, "tool_calls") and message.tool_calls:
-                    for tool_call in message.tool_calls:
-                        if tool_call.type == "function":
-                            function_name = tool_call.function.name
-                            arguments = json.loads(tool_call.function.arguments)
-                            logger.info(
-                                f"Model requested tool '{function_name}' with args: {arguments}"
-                            )
-                            # Execute the tool (async)
-                            tool_result = await self.execute_tool(
-                                agent_name, function_name, arguments
-                            )
-                            # Add the tool result to the prompt for the next round
-                            full_prompt += (
-                                f"\n\nTool '{function_name}' was called with arguments {arguments}.\n"
-                                f"Result: {tool_result}\n"
+                    etype = event.get("type")
+                    if etype == "content":
+                        delta = event.get("delta", "")
+                        accumulated_text += delta
+                        if output_format == "text":
+                            yield delta
+                    elif etype == "tool_call_delta":
+                        tc_id = event.get("id")
+                        index_raw = event.get("index")
+                        try:
+                            index = int(index_raw) if index_raw is not None else 0
+                        except Exception:
+                            index = 0
+                        name = event.get("name")
+                        args_piece = event.get("arguments_delta", "")
+                        entry = tool_calls.setdefault(
+                            index, {"id": None, "name": None, "arguments": ""}
+                        )
+                        if tc_id and not entry.get("id"):
+                            entry["id"] = tc_id
+                        if name and not entry.get("name"):
+                            entry["name"] = name
+                        entry["arguments"] += args_piece
+                    elif etype == "message_end":
+                        _ = event.get("finish_reason")
+                # If tool calls were requested, execute them and continue the loop
+                if tool_calls:
+                    assistant_tool_calls: List[Dict[str, Any]] = []
+                    call_id_map: Dict[int, str] = {}
+                    for idx, tc in tool_calls.items():
+                        name = (tc.get("name") or "").strip()
+                        if not name:
+                            logger.warning(
+                                f"Skipping unnamed tool call at index {idx}; cannot send empty function name."
                             )
-                    continue
+                            continue
+                        norm_id = tc.get("id") or f"call_{idx}"
+                        call_id_map[idx] = norm_id
+                        assistant_tool_calls.append(
+                            {
+                                "id": norm_id,
+                                "type": "function",
+                                "function": {
+                                    "name": name,
+                                    "arguments": tc.get("arguments") or "{}",
+                                },
+                            }
+                        )
-                # Otherwise, it's a normal message (final answer)
-                response_text = message.content
-                break
+                    if assistant_tool_calls:
+                        messages.append(
+                            {
+                                "role": "assistant",
+                                "content": None,
+                                "tool_calls": assistant_tool_calls,
+                            }
+                        )
-            # Apply output guardrails if any
-            processed_final_text = response_text
-            if self.output_guardrails:
-                for guardrail in self.output_guardrails:
-                    try:
-                        processed_final_text = await guardrail.process(
-                            processed_final_text
+                    # Execute each tool and append the tool result messages
+                    for idx, tc in tool_calls.items():
+                        func_name = (tc.get("name") or "").strip()
+                        if not func_name:
+                            continue
+                        try:
+                            args = json.loads(tc.get("arguments") or "{}")
+                        except Exception:
+                            args = {}
+                        logger.info(
+                            f"Streaming: executing tool '{func_name}' with args: {args}"
                         )
-                    except Exception as e:
-                        logger.error(
-                            f"Error applying output guardrail {guardrail.__class__.__name__}: {e}"
+                        tool_result = await self.execute_tool(
+                            agent_name, func_name, args
+                        )
+                        messages.append(
+                            {
+                                "role": "tool",
+                                "tool_call_id": call_id_map.get(idx, f"call_{idx}"),
+                                "content": json.dumps(tool_result),
+                            }
                         )
-            self.last_text_response = processed_final_text
+                    accumulated_text = ""
+                    continue
-            if output_format == "text":
-                yield processed_final_text or ""
-            elif output_format == "audio":
-                cleaned_audio_buffer = self._clean_for_audio(processed_final_text)
-                if cleaned_audio_buffer:
+                # No tool calls: we've streamed the final answer
+                final_text = accumulated_text
+                if output_format == "audio":
+                    cleaned_audio_buffer = self._clean_for_audio(final_text)
                     async for audio_chunk in self.llm_provider.tts(
                         text=cleaned_audio_buffer,
                         voice=audio_voice,
@@ -412,7 +461,10 @@ class AgentService(AgentServiceInterface):
                     ):
                         yield audio_chunk
                 else:
-                    yield ""
+                    if not final_text:
+                        yield ""
+                self.last_text_response = final_text
+                break
         except Exception as e:
             import traceback

{solana_agent-31.1.4 → solana_agent-31.1.6}/solana_agent/services/query.py RENAMED Viewed

@@ -8,7 +8,18 @@ clean separation of concerns.
 import logging
 import re
-from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union
+import time
+from typing import (
+    Any,
+    AsyncGenerator,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Type,
+    Union,
+    Tuple,
+)
 from pydantic import BaseModel
@@ -50,6 +61,151 @@ class QueryService(QueryServiceInterface):
         self.knowledge_base = knowledge_base
         self.kb_results_count = kb_results_count
         self.input_guardrails = input_guardrails or []
+        # Per-user sticky sessions (in-memory)
+        # { user_id: { 'agent': str, 'started_at': float, 'last_updated': float, 'required_complete': bool } }
+        self._sticky_sessions: Dict[str, Dict[str, Any]] = {}
+    def _get_sticky_agent(self, user_id: str) -> Optional[str]:
+        sess = self._sticky_sessions.get(user_id)
+        return sess.get("agent") if isinstance(sess, dict) else None
+    def _set_sticky_agent(
+        self, user_id: str, agent_name: str, required_complete: bool = False
+    ) -> None:
+        self._sticky_sessions[user_id] = {
+            "agent": agent_name,
+            "started_at": self._sticky_sessions.get(user_id, {}).get(
+                "started_at", time.time()
+            ),
+            "last_updated": time.time(),
+            "required_complete": required_complete,
+        }
+    def _update_sticky_required_complete(
+        self, user_id: str, required_complete: bool
+    ) -> None:
+        if user_id in self._sticky_sessions:
+            self._sticky_sessions[user_id]["required_complete"] = required_complete
+            self._sticky_sessions[user_id]["last_updated"] = time.time()
+    def _clear_sticky_agent(self, user_id: str) -> None:
+        if user_id in self._sticky_sessions:
+            del self._sticky_sessions[user_id]
+    # LLM-backed switch intent detection (gpt-4.1-mini)
+    class _SwitchIntentModel(BaseModel):
+        switch: bool = False
+        target_agent: Optional[str] = None
+        start_new: bool = False
+    async def _detect_switch_intent(
+        self, text: str, available_agents: List[str]
+    ) -> Tuple[bool, Optional[str], bool]:
+        """Detect if the user is asking to switch agents or start a new conversation.
+        Returns: (switch_requested, target_agent_name_or_none, start_new_conversation)
+        Implemented as an LLM call to gpt-4.1-mini with structured output.
+        """
+        if not text:
+            return (False, None, False)
+        # Instruction and user prompt for the classifier
+        instruction = (
+            "You are a strict intent classifier for agent routing. "
+            "Decide if the user's message requests switching to another agent or starting a new conversation. "
+            "Only return JSON with keys: switch (bool), target_agent (string|null), start_new (bool). "
+            "If a target agent is mentioned, it MUST be one of the provided agent names (case-insensitive). "
+            "If none clearly applies, set switch=false and start_new=false and target_agent=null."
+        )
+        user_prompt = (
+            f"Available agents (choose only from these if a target is specified): {available_agents}\n\n"
+            f"User message:\n{text}\n\n"
+            'Return JSON only, like: {"switch": true|false, "target_agent": "<one_of_available_or_null>", "start_new": true|false}'
+        )
+        # Primary: use llm_provider.parse_structured_output
+        try:
+            if hasattr(self.agent_service.llm_provider, "parse_structured_output"):
+                try:
+                    result = (
+                        await self.agent_service.llm_provider.parse_structured_output(
+                            prompt=user_prompt,
+                            system_prompt=instruction,
+                            model_class=QueryService._SwitchIntentModel,
+                            model="gpt-4.1-mini",
+                        )
+                    )
+                except TypeError:
+                    # Provider may not accept 'model' kwarg
+                    result = (
+                        await self.agent_service.llm_provider.parse_structured_output(
+                            prompt=user_prompt,
+                            system_prompt=instruction,
+                            model_class=QueryService._SwitchIntentModel,
+                        )
+                    )
+                switch = bool(getattr(result, "switch", False))
+                target = getattr(result, "target_agent", None)
+                start_new = bool(getattr(result, "start_new", False))
+                # Normalize target to available agent name
+                if target:
+                    target_lower = target.lower()
+                    norm = None
+                    for a in available_agents:
+                        if a.lower() == target_lower or target_lower in a.lower():
+                            norm = a
+                            break
+                    target = norm
+                if not switch:
+                    target = None
+                return (switch, target, start_new)
+        except Exception as e:
+            logger.debug(f"LLM switch intent parse_structured_output failed: {e}")
+        # Fallback: generate_response with output_model
+        try:
+            async for r in self.agent_service.generate_response(
+                agent_name="default",
+                user_id="router",
+                query="",
+                images=None,
+                memory_context="",
+                output_format="text",
+                prompt=f"{instruction}\n\n{user_prompt}",
+                output_model=QueryService._SwitchIntentModel,
+            ):
+                result = r
+                switch = False
+                target = None
+                start_new = False
+                try:
+                    switch = bool(result.switch)  # type: ignore[attr-defined]
+                    target = result.target_agent  # type: ignore[attr-defined]
+                    start_new = bool(result.start_new)  # type: ignore[attr-defined]
+                except Exception:
+                    try:
+                        d = result.model_dump()
+                        switch = bool(d.get("switch", False))
+                        target = d.get("target_agent")
+                        start_new = bool(d.get("start_new", False))
+                    except Exception:
+                        pass
+                if target:
+                    target_lower = str(target).lower()
+                    norm = None
+                    for a in available_agents:
+                        if a.lower() == target_lower or target_lower in a.lower():
+                            norm = a
+                            break
+                    target = norm
+                if not switch:
+                    target = None
+                return (switch, target, start_new)
+        except Exception as e:
+            logger.debug(f"LLM switch intent generate_response failed: {e}")
+        # Last resort: no switch
+        return (False, None, False)
     async def process(
         self,
@@ -80,7 +236,7 @@ class QueryService(QueryServiceInterface):
         router: Optional[RoutingServiceInterface] = None,
         output_model: Optional[Type[BaseModel]] = None,
         capture_schema: Optional[Dict[str, Any]] = None,
-        capture_name: Optional[Dict[str, Any]] = None,
+        capture_name: Optional[str] = None,
     ) -> AsyncGenerator[Union[str, bytes, BaseModel], None]:  # pragma: no cover
         """Process the user request and generate a response."""
         try:
@@ -164,7 +320,7 @@ class QueryService(QueryServiceInterface):
                 except Exception:
                     kb_context = ""
-            # 6) Route query (and fetch previous assistant message)
+            # 6) Determine agent (sticky session aware; allow explicit switch/new conversation)
             agent_name = "default"
             prev_assistant = ""
             routing_input = user_text
@@ -184,19 +340,52 @@ class QueryService(QueryServiceInterface):
                             "assistant_message", ""
                         ) or ""
                         if prev_user_msg:
-                            routing_input = (
-                                f"previous_user_message: {prev_user_msg}\n"
-                                f"current_user_message: {user_text}"
-                            )
+                            routing_input = f"previous_user_message: {prev_user_msg}\ncurrent_user_message: {user_text}"
                 except Exception:
                     pass
-            try:
-                if router:
-                    agent_name = await router.route_query(routing_input)
-                else:
-                    agent_name = await self.routing_service.route_query(routing_input)
-            except Exception:
-                agent_name = "default"
+            # Get available agents first so the LLM can select a valid target
+            agents = self.agent_service.get_all_ai_agents() or {}
+            available_agent_names = list(agents.keys())
+            # LLM detects switch intent
+            (
+                switch_requested,
+                requested_agent_raw,
+                start_new,
+            ) = await self._detect_switch_intent(user_text, available_agent_names)
+            # Normalize requested agent to an exact available key
+            requested_agent = None
+            if requested_agent_raw:
+                raw_lower = requested_agent_raw.lower()
+                for a in available_agent_names:
+                    if a.lower() == raw_lower or raw_lower in a.lower():
+                        requested_agent = a
+                        break
+            sticky_agent = self._get_sticky_agent(user_id)
+            if sticky_agent and not switch_requested:
+                agent_name = sticky_agent
+            else:
+                try:
+                    if start_new:
+                        # Start fresh
+                        self._clear_sticky_agent(user_id)
+                    if requested_agent:
+                        agent_name = requested_agent
+                    else:
+                        # Route if no explicit target
+                        if router:
+                            agent_name = await router.route_query(routing_input)
+                        else:
+                            agent_name = await self.routing_service.route_query(
+                                routing_input
+                            )
+                except Exception:
+                    agent_name = next(iter(agents.keys())) if agents else "default"
+                self._set_sticky_agent(user_id, agent_name, required_complete=False)
             # 7) Captured data context + incremental save using previous assistant message
             capture_context = ""
@@ -285,7 +474,6 @@ class QueryService(QueryServiceInterface):
                                 system_prompt=instruction,
                                 model_class=_FieldDetect,
                             )
-                        # Read result
                         sel = None
                         try:
                             sel = getattr(result, "field", None)
@@ -544,6 +732,11 @@ class QueryService(QueryServiceInterface):
             if lines:
                 capture_context = "\n".join(lines) + "\n\n"
+            # Update sticky session completion flag
+            try:
+                self._update_sticky_required_complete(user_id, required_complete)
+            except Exception:
+                pass
             # Merge contexts + flow rules
             combined_context = ""