PyPI - solana-agent - Versions diffs - 23.0.7__py3-none-any.whl → 24.1.0__py3-none-any.whl - Mend

solana-agent 23.0.7py3-none-any.whl → 24.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

solana_agent/adapters/llm_adapter.py +185 -17
solana_agent/client/solana_agent.py +3 -3
solana_agent/interfaces/client/client.py +1 -1
solana_agent/interfaces/providers/llm.py +11 -2
solana_agent/interfaces/services/agent.py +1 -1
solana_agent/interfaces/services/query.py +4 -1
solana_agent/repositories/memory.py +2 -2
solana_agent/services/agent.py +274 -156
solana_agent/services/query.py +4 -4
{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/METADATA +20 -23
{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/RECORD +13 -13
{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/LICENSE +0 -0
{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/WHEEL +0 -0

solana_agent/adapters/llm_adapter.py CHANGED Viewed

@@ -3,10 +3,14 @@ LLM provider adapters for the Solana Agent system.
 These adapters implement the LLMProvider interface for different LLM services.
 """
-from typing import AsyncGenerator, List, Literal, Type, TypeVar, Union
+import asyncio
+import json
+from typing import Any, AsyncGenerator, Callable, Dict, Literal, Optional, Type, TypeVar
-from openai import OpenAI
+import httpx
+from openai import AsyncOpenAI
 from pydantic import BaseModel
+import websockets
 from solana_agent.interfaces.providers.llm import LLMProvider
@@ -17,10 +21,9 @@ class OpenAIAdapter(LLMProvider):
     """OpenAI implementation of LLMProvider with web search capabilities."""
     def __init__(self, api_key: str):
-        self.client = OpenAI(api_key=api_key)
+        self.client = AsyncOpenAI(api_key=api_key)
         self.parse_model = "gpt-4o-mini"
         self.text_model = "gpt-4o-mini"
-        self.internet_search_model = "gpt-4o-mini-search-preview"
         self.transcription_model = "gpt-4o-mini-transcribe"
         self.tts_model = "gpt-4o-mini-tts"
@@ -45,7 +48,7 @@ class OpenAIAdapter(LLMProvider):
             Audio bytes as they become available
         """
         try:
-            with self.client.audio.speech.with_streaming_response.create(
+            async with self.client.audio.speech.with_streaming_response.create(
                 model=self.tts_model,
                 voice=voice,
                 instructions=instructions,
@@ -53,7 +56,7 @@ class OpenAIAdapter(LLMProvider):
                 response_format=response_format
             ) as stream:
                 # Stream the bytes in 16KB chunks
-                for chunk in stream.iter_bytes(chunk_size=1024 * 16):
+                async for chunk in stream.iter_bytes(chunk_size=1024 * 16):
                     yield chunk
         except Exception as e:
@@ -85,13 +88,13 @@ class OpenAIAdapter(LLMProvider):
             Transcript text chunks as they become available
         """
         try:
-            with self.client.audio.transcriptions.with_streaming_response.create(
+            async with self.client.audio.transcriptions.with_streaming_response.create(
                 model=self.transcription_model,
                 file=(f"file.{input_format}", audio_bytes),
                 response_format="text",
             ) as stream:
                 # Stream the text in 16KB chunks
-                for chunk in stream.iter_text(chunk_size=1024 * 16):
+                async for chunk in stream.iter_text(chunk_size=1024 * 16):
                     yield chunk
         except Exception as e:
@@ -104,7 +107,6 @@ class OpenAIAdapter(LLMProvider):
         self,
         prompt: str,
         system_prompt: str = "",
-        internet_search: bool = False,
     ) -> AsyncGenerator[str, None]:  # pragma: no cover
         """Generate text from OpenAI models."""
         messages = []
@@ -114,20 +116,16 @@ class OpenAIAdapter(LLMProvider):
         messages.append({"role": "user", "content": prompt})
-        model = self.text_model
-        if internet_search:
-            model = self.internet_search_model
         # Prepare request parameters
         request_params = {
             "messages": messages,
             "stream": True,
-            "model": model,
+            "model": self.text_model,
         }
         try:
-            response = self.client.chat.completions.create(**request_params)
+            response = await self.client.chat.completions.create(**request_params)
-            for chunk in response:
+            async for chunk in response:
                 if chunk.choices:
                     if chunk.choices[0].delta.content:
                         text = chunk.choices[0].delta.content
@@ -154,7 +152,7 @@ class OpenAIAdapter(LLMProvider):
         try:
             # First try the beta parsing API
-            completion = self.client.beta.chat.completions.parse(
+            completion = await self.client.beta.chat.completions.parse(
                 model=self.parse_model,
                 messages=messages,
                 response_format=model_class,
@@ -162,3 +160,173 @@ class OpenAIAdapter(LLMProvider):
             return completion.choices[0].message.parsed
         except Exception as e:
             print(f"Error with beta.parse method: {e}")
+    async def create_realtime_session(
+        self,
+        model: str = "gpt-4o-mini-realtime-preview",
+        modalities: list = ["audio", "text"],
+        instructions: str = "You are a helpful assistant.",
+        voice: str = "alloy",
+        input_audio_format: str = "pcm16",
+        output_audio_format: str = "pcm16",
+    ) -> Dict[str, Any]:  # pragma: no cover
+        """Create a realtime session token for WebSocket communication."""
+        try:
+            # Get the API key from the AsyncOpenAI client
+            api_key = self.client.api_key
+            # Create an async HTTP client
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    "https://api.openai.com/v1/realtime/sessions",
+                    json={
+                        "model": model,
+                        "modalities": modalities,
+                        "instructions": instructions,
+                        "voice": voice,
+                        "input_audio_format": input_audio_format,
+                        "output_audio_format": output_audio_format,
+                    },
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                        "OpenAI-Beta": "realtime=v1"
+                    }
+                )
+                if response.status_code == 200:
+                    return response.json()
+                else:
+                    raise Exception(
+                        f"Failed to create realtime session: {response.text}")
+        except Exception as e:
+            print(f"Error creating realtime session: {str(e)}")
+            raise
+    async def realtime_audio_transcription(
+        self,
+        audio_generator: AsyncGenerator[bytes, None],
+        transcription_config: Optional[Dict[str, Any]] = None,
+        on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
+    ) -> AsyncGenerator[str, None]:  # pragma: no cover
+        """Stream real-time audio transcription using the Realtime API.
+        Args:
+            audio_generator: Async generator that yields audio chunks
+            transcription_config: Optional custom configuration for transcription
+            on_event: Optional callback function for handling raw events
+        Yields:
+            Transcription text as it becomes available
+        """
+        # Create default transcription config if none provided
+        if transcription_config is None:
+            transcription_config = {
+                "input_audio_format": "pcm16",
+                "input_audio_transcription": {
+                    "model": "gpt-4o-mini-transcribe"
+                },
+                "turn_detection": {
+                    "type": "server_vad",
+                    "threshold": 0.5,
+                    "prefix_padding_ms": 300,
+                    "silence_duration_ms": 200
+                }
+            }
+        try:
+            # Get the API key from the AsyncOpenAI client
+            api_key = self.client.api_key
+            # Create transcription session
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    "https://api.openai.com/v1/realtime/transcription_sessions",
+                    json=transcription_config,
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                        "OpenAI-Beta": "realtime=v1"
+                    }
+                )
+                if response.status_code != 200:
+                    raise Exception(
+                        f"Failed to create transcription session: {response.text}")
+                session = response.json()
+                client_secret = session["client_secret"]["value"]
+            # Connect to WebSocket with proper headers as dictionary
+            url = "wss://api.openai.com/v1/realtime?model=gpt-4o-mini-transcribe"
+            headers = {
+                "Authorization": f"Bearer {client_secret}",
+                "OpenAI-Beta": "realtime=v1"
+            }
+            async with websockets.connect(url, additional_headers=headers) as websocket:
+                # Handle WebSocket communication in the background
+                audio_task = None
+                async def send_audio():
+                    try:
+                        async for audio_chunk in audio_generator:
+                            # Base64 encode the audio
+                            import base64
+                            encoded_audio = base64.b64encode(
+                                audio_chunk).decode('utf-8')
+                            # Send audio chunk
+                            await websocket.send(json.dumps({
+                                "type": "input_audio_buffer.append",
+                                "audio": encoded_audio
+                            }))
+                            # Small delay to prevent flooding
+                            await asyncio.sleep(0.05)
+                        # Commit the audio buffer when done
+                        await websocket.send(json.dumps({
+                            "type": "input_audio_buffer.commit"
+                        }))
+                    except Exception as e:
+                        print(f"Error sending audio: {str(e)}")
+                # Start sending audio in the background
+                audio_task = asyncio.create_task(send_audio())
+                # Process transcription events
+                try:
+                    while True:
+                        message = await websocket.recv()
+                        event = json.loads(message)
+                        if on_event:
+                            # Check if on_event is a coroutine function and await it if needed
+                            if asyncio.iscoroutinefunction(on_event):
+                                await on_event(event)
+                            else:
+                                on_event(event)
+                        # Extract transcription deltas
+                        if event["type"] == "conversation.item.input_audio_transcription.delta":
+                            yield event["delta"]
+                        # Also handle completed transcriptions
+                        elif event["type"] == "conversation.item.input_audio_transcription.completed":
+                            yield event["transcript"]
+                            break
+                finally:
+                    # Clean up audio task if it's still running
+                    if audio_task and not audio_task.done():
+                        audio_task.cancel()
+                        try:
+                            await audio_task
+                        except asyncio.CancelledError:
+                            pass
+        except Exception as e:
+            print(f"Error in realtime audio transcription: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
+            yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"

solana_agent/client/solana_agent.py CHANGED Viewed

@@ -55,8 +55,8 @@ class SolanaAgent(SolanaAgentInterface):
         audio_input_format: Literal[
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
+        audio_transcription_real_time: bool = True,
         router: Optional[RoutingInterface] = None,
-        internet_search: bool = False,
     ) -> AsyncGenerator[Union[str, bytes], None]:  # pragma: no cover
         """Process a user message and return the response stream.
@@ -69,8 +69,8 @@ class SolanaAgent(SolanaAgentInterface):
             audio_instructions: Audio voice instructions
             audio_output_format: Audio output format
             audio_input_format: Audio input format
+            audio_transcription_real_time: Flag for real-time audio transcription
             router: Optional routing service for processing
-            internet_search: Flag to use OpenAI Internet search
         Returns:
             Async generator yielding response chunks (text strings or audio bytes)
@@ -85,7 +85,7 @@ class SolanaAgent(SolanaAgentInterface):
             audio_input_format=audio_input_format,
             prompt=prompt,
             router=router,
-            internet_search=internet_search,
+            audio_transcription_real_time=audio_transcription_real_time,
         ):
             yield chunk

solana_agent/interfaces/client/client.py CHANGED Viewed

@@ -24,7 +24,7 @@ class SolanaAgent(ABC):
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
         router: Optional[RoutingInterface] = None,
-        internet_search: bool = False,
+        audio_transcription_real_time: bool = True,
     ) -> AsyncGenerator[Union[str, bytes], None]:
         """Process a user message and return the response stream."""
         pass

solana_agent/interfaces/providers/llm.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import AsyncGenerator, List, Literal, Type, TypeVar, Union
+from typing import Any, AsyncGenerator, Callable, Dict, List, Literal, Optional, Type, TypeVar, Union
 from pydantic import BaseModel
@@ -15,7 +15,6 @@ class LLMProvider(ABC):
         self,
         prompt: str,
         system_prompt: str = "",
-        internet_search: bool = False,
     ) -> AsyncGenerator[str, None]:
         """Generate text from the language model."""
         pass
@@ -50,3 +49,13 @@ class LLMProvider(ABC):
     ) -> AsyncGenerator[str, None]:
         """Transcribe audio from the language model."""
         pass
+    @abstractmethod
+    async def realtime_audio_transcription(
+        self,
+        audio_generator: AsyncGenerator[bytes, None],
+        transcription_config: Optional[Dict[str, Any]] = None,
+        on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
+    ) -> AsyncGenerator[str, None]:
+        """Stream real-time audio transcription from the language model."""
+        pass

solana_agent/interfaces/services/agent.py CHANGED Viewed

@@ -34,7 +34,7 @@ class AgentService(ABC):
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
         prompt: Optional[str] = None,
-        internet_search: bool = False,
+        audio_transcription_real_time: bool = True,
     ) -> AsyncGenerator[Union[str, bytes], None]:
         """Generate a response from an agent."""
         pass

solana_agent/interfaces/services/query.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from abc import ABC, abstractmethod
 from typing import Any, AsyncGenerator, Dict, Literal, Optional, Union
+from solana_agent.interfaces.services.routing import RoutingService as RoutingInterface
 class QueryService(ABC):
     """Interface for processing user queries."""
@@ -20,7 +22,8 @@ class QueryService(ABC):
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
         prompt: Optional[str] = None,
-        internet_search: bool = False,
+        router: Optional[RoutingInterface] = None,
+        audio_transcription_real_time: bool = True,
     ) -> AsyncGenerator[Union[str, bytes], None]:
         """Process the user request and generate a response."""
         pass

solana_agent/repositories/memory.py CHANGED Viewed

@@ -69,8 +69,8 @@ class MemoryRepository(MemoryProvider):
                     # Store truncated messages
                     doc = {
                         "user_id": user_id,
-                        "user_message": self._truncate(user_msg),
-                        "assistant_message": self._truncate(assistant_msg),
+                        "user_message": user_msg,
+                        "assistant_message": assistant_msg,
                         "timestamp": datetime.now(timezone.utc)
                     }
                     self.mongo.insert_one(self.collection, doc)

solana_agent/services/agent.py CHANGED Viewed

@@ -176,8 +176,8 @@ class AgentService(AgentServiceInterface):
         audio_input_format: Literal[
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
+        audio_transcription_real_time: bool = True,
         prompt: Optional[str] = None,
-        internet_search: bool = False,
     ) -> AsyncGenerator[Union[str, bytes], None]:  # pragma: no cover
         """Generate a response with support for text/audio input/output."""
         agent = next((a for a in self.agents if a.name == agent_name), None)
@@ -192,11 +192,25 @@ class AgentService(AgentServiceInterface):
             return
         try:
-            # Handle audio input if provided
+            # Handle audio input if provided - KEEP REAL-TIME AUDIO TRANSCRIPTION
             query_text = ""
             if not isinstance(query, str):
-                async for transcript in self.llm_provider.transcribe_audio(query, input_format=audio_input_format):
-                    query_text += transcript
+                if audio_transcription_real_time and hasattr(self.llm_provider, "realtime_audio_transcription"):
+                    # Use realtime transcription for faster processing if available
+                    print("Using realtime audio transcription")
+                    async for transcript in self.llm_provider.realtime_audio_transcription(
+                        audio_generator=self._bytes_to_generator(query),
+                        transcription_config={
+                            "input_audio_format": audio_input_format}
+                    ):
+                        query_text += transcript
+                else:
+                    # Fall back to standard transcription
+                    print("Using standard audio transcription")
+                    async for transcript in self.llm_provider.transcribe_audio(query, input_format=audio_input_format):
+                        query_text += transcript
+                print(f"Transcribed query: {query_text}")
             else:
                 query_text = query
@@ -210,119 +224,172 @@ class AgentService(AgentServiceInterface):
             if prompt:
                 system_prompt += f"\n\nADDITIONAL PROMPT: {prompt}"
-            # make tool calling prompt
+            # Add tool usage prompt if tools are available
             tool_calling_system_prompt = deepcopy(system_prompt)
             if self.tool_registry:
                 tool_usage_prompt = self._get_tool_usage_prompt(agent_name)
                 if tool_usage_prompt:
                     tool_calling_system_prompt += f"\n\nTOOL CALLING PROMPT: {tool_usage_prompt}"
+                    print(
+                        f"Tools available to agent {agent_name}: {[t.get('name') for t in self.get_agent_tools(agent_name)]}")
-            # Variables for tracking the response
+            # Variables for tracking the complete response
             complete_text_response = ""
-            # For audio output, we'll collect everything first
             full_response_buffer = ""
-            # Variables for handling JSON processing
-            json_buffer = ""
-            is_json = False
+            # Variables for robust handling of tool call markers that may be split across chunks
+            tool_buffer = ""
+            pending_chunk = ""  # To hold text that might contain partial markers
+            is_tool_call = False
+            window_size = 30  # Increased window size for better detection
-            # Generate and stream response
+            # Define start and end markers
+            start_marker = "[TOOL]"
+            end_marker = "[/TOOL]"
+            # Generate and stream response (ALWAYS use non-realtime for text generation)
+            print(
+                f"Generating response with {len(query_text)} characters of query text")
             async for chunk in self.llm_provider.generate_text(
                 prompt=query_text,
                 system_prompt=tool_calling_system_prompt,
-                internet_search=internet_search,
             ):
-                # Check if the chunk is JSON or a tool call
-                if (chunk.strip().startswith("{") or "{\"tool_call\":" in chunk) and not is_json:
-                    is_json = True
-                    json_buffer = chunk
+                # If we have pending text from the previous chunk, combine it with this chunk
+                if pending_chunk:
+                    combined_chunk = pending_chunk + chunk
+                    pending_chunk = ""  # Reset pending chunk
+                else:
+                    combined_chunk = chunk
+                # STEP 1: Check for tool call start marker
+                if start_marker in combined_chunk and not is_tool_call:
+                    print(
+                        f"Found tool start marker in chunk of length {len(combined_chunk)}")
+                    is_tool_call = True
+                    # Extract text before the marker and the marker itself with everything after
+                    start_pos = combined_chunk.find(start_marker)
+                    before_marker = combined_chunk[:start_pos]
+                    after_marker = combined_chunk[start_pos:]
+                    # Yield text that appeared before the marker
+                    if before_marker and output_format == "text":
+                        yield before_marker
+                    # Start collecting the tool call
+                    tool_buffer = after_marker
+                    continue  # Skip to next chunk
+                # STEP 2: Handle ongoing tool call collection
+                if is_tool_call:
+                    tool_buffer += combined_chunk
+                    # Check if the tool call is complete
+                    if end_marker in tool_buffer:
+                        print(
+                            f"Tool call complete, buffer size: {len(tool_buffer)}")
+                        # Process the tool call
+                        response_text = await self._handle_tool_call(
+                            agent_name=agent_name,
+                            tool_text=tool_buffer
+                        )
+                        # Clean the response to remove any markers or formatting
+                        response_text = self._clean_tool_response(
+                            response_text)
+                        print(
+                            f"Tool execution complete, result size: {len(response_text)}")
+                        # Create new prompt with search/tool results
+                        # Using "Search Result" instead of "TOOL RESPONSE" to avoid model repeating "TOOL"
+                        user_prompt = f"{query_text}\n\nSearch Result: {response_text}"
+                        tool_system_prompt = system_prompt + \
+                            "\n DO NOT use the tool calling format again."
+                        # Generate a new response with the tool results
+                        print("Generating new response with tool results")
+                        if output_format == "text":
+                            # Stream the follow-up response for text output
+                            async for processed_chunk in self.llm_provider.generate_text(
+                                prompt=user_prompt,
+                                system_prompt=tool_system_prompt,
+                            ):
+                                complete_text_response += processed_chunk
+                                yield processed_chunk
+                        else:
+                            # For audio output, collect the full response first
+                            tool_response = ""
+                            async for processed_chunk in self.llm_provider.generate_text(
+                                prompt=user_prompt,
+                                system_prompt=tool_system_prompt,
+                            ):
+                                tool_response += processed_chunk
+                            # Clean and add to our complete text record and audio buffer
+                            tool_response = self._clean_for_audio(
+                                tool_response)
+                            complete_text_response += tool_response
+                            full_response_buffer += tool_response
+                        # Reset tool handling state
+                        is_tool_call = False
+                        tool_buffer = ""
+                        pending_chunk = ""
+                        break  # Exit the original generation loop after tool processing
+                    # Continue collecting tool call content without yielding
                     continue
-                # Collect JSON or handle normal text
-                if is_json:
-                    json_buffer += chunk
-                    try:
-                        # Try to parse complete JSON
-                        data = json.loads(json_buffer)
-                        # Valid JSON found, handle it
-                        if "tool_call" in data:
-                            response_text = await self._handle_tool_call(
-                                agent_name=agent_name,
-                                json_chunk=json_buffer
-                            )
-                            # Update system prompt to prevent further tool calls
-                            tool_system_prompt = system_prompt + \
-                                "\n DO NOT make any tool calls or return JSON."
-                            # Create prompt with tool response
-                            user_prompt = f"\n USER QUERY: {query_text} \n"
-                            user_prompt += f"\n TOOL RESPONSE: {response_text} \n"
-                            # For text output, process chunks directly
-                            if output_format == "text":
-                                # Stream text response for text output
-                                async for processed_chunk in self.llm_provider.generate_text(
-                                    prompt=user_prompt,
-                                    system_prompt=tool_system_prompt,
-                                ):
-                                    complete_text_response += processed_chunk
-                                    yield processed_chunk
-                            else:
-                                # For audio output, collect the full tool response first
-                                tool_response = ""
-                                async for processed_chunk in self.llm_provider.generate_text(
-                                    prompt=user_prompt,
-                                    system_prompt=tool_system_prompt,
-                                ):
-                                    tool_response += processed_chunk
-                                # Add to our complete text record and full audio buffer
-                                tool_response = self._clean_for_audio(
-                                    tool_response)
-                                complete_text_response += tool_response
-                                full_response_buffer += tool_response
-                        else:
-                            # For non-tool JSON, still capture the text
-                            complete_text_response += json_buffer
-                            if output_format == "text":
-                                yield json_buffer
-                            else:
-                                # Add to full response buffer for audio
-                                full_response_buffer += json_buffer
-                        # Reset JSON handling
-                        is_json = False
-                        json_buffer = ""
-                    except json.JSONDecodeError:
-                        # JSON not complete yet, continue collecting
-                        pass
-                else:
-                    # For regular text
-                    complete_text_response += chunk
-                    if output_format == "text":
-                        # For text output, yield directly
-                        yield chunk
-                    else:
-                        # For audio output, add to the full response buffer
-                        full_response_buffer += chunk
-            # Handle any leftover JSON buffer
-            if json_buffer:
-                complete_text_response += json_buffer
+                # STEP 3: Check for possible partial start markers at the end of the chunk
+                # This helps detect markers split across chunks
+                potential_marker = False
+                for i in range(1, len(start_marker)):
+                    if combined_chunk.endswith(start_marker[:i]):
+                        # Found a partial marker at the end
+                        # Save the partial marker
+                        pending_chunk = combined_chunk[-i:]
+                        # Everything except the partial marker
+                        chunk_to_yield = combined_chunk[:-i]
+                        potential_marker = True
+                        print(
+                            f"Potential partial marker detected: '{pending_chunk}'")
+                        break
+                if potential_marker:
+                    # Process the safe part of the chunk
+                    if chunk_to_yield and output_format == "text":
+                        yield chunk_to_yield
+                    if chunk_to_yield:
+                        complete_text_response += chunk_to_yield
+                        if output_format == "audio":
+                            full_response_buffer += chunk_to_yield
+                    continue
+                # STEP 4: Normal text processing for non-tool call content
                 if output_format == "text":
-                    yield json_buffer
-                else:
-                    full_response_buffer += json_buffer
+                    yield combined_chunk
-            # For audio output, now process the complete response
+                complete_text_response += combined_chunk
+                if output_format == "audio":
+                    full_response_buffer += combined_chunk
+            # Process any incomplete tool call as regular text
+            if is_tool_call and tool_buffer:
+                print(
+                    f"Incomplete tool call detected, returning as regular text: {len(tool_buffer)} chars")
+                if output_format == "text":
+                    yield tool_buffer
+                complete_text_response += tool_buffer
+                if output_format == "audio":
+                    full_response_buffer += tool_buffer
+            # For audio output, generate speech from the complete buffer
             if output_format == "audio" and full_response_buffer:
                 # Clean text before TTS
+                print(
+                    f"Processing {len(full_response_buffer)} characters for audio output")
                 full_response_buffer = self._clean_for_audio(
                     full_response_buffer)
@@ -337,9 +404,15 @@ class AgentService(AgentServiceInterface):
             # Store the complete text response
             self.last_text_response = complete_text_response
+            print(
+                f"Response generation complete: {len(complete_text_response)} chars")
         except Exception as e:
             error_msg = f"I apologize, but I encountered an error: {str(e)}"
+            print(f"Error in generate_response: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
             if output_format == "audio":
                 async for chunk in self.llm_provider.tts(
                     error_msg,
@@ -351,52 +424,73 @@ class AgentService(AgentServiceInterface):
             else:
                 yield error_msg
-            print(f"Error in generate_response: {str(e)}")
-            import traceback
-            print(traceback.format_exc())
+    async def _bytes_to_generator(self, data: bytes) -> AsyncGenerator[bytes, None]:
+        """Convert bytes to an async generator for streaming.
-    async def _handle_tool_call(
-        self,
-        agent_name: str,
-        json_chunk: str,
-    ) -> str:
-        """Handle tool calls and return formatted response."""
+        Args:
+            data: Bytes of audio data
+        Yields:
+            Chunks of audio data
+        """
+        # Define a reasonable chunk size (adjust based on your needs)
+        chunk_size = 4096
+        for i in range(0, len(data), chunk_size):
+            yield data[i:i + chunk_size]
+            # Small delay to simulate streaming
+            await asyncio.sleep(0.01)
+    async def _handle_tool_call(self, agent_name: str, tool_text: str) -> str:
+        """Handle marker-based tool calls."""
         try:
-            data = json.loads(json_chunk)
-            if "tool_call" in data:
-                tool_data = data["tool_call"]
-                tool_name = tool_data.get("name")
-                parameters = tool_data.get("parameters", {})
-                if tool_name:
-                    # Execute the tool and get the result
-                    result = await self.execute_tool(agent_name, tool_name, parameters)
-                    if result.get("status") == "success":
-                        tool_result = result.get("result", "")
-                        return tool_result
-                    else:
-                        error_message = f"I apologize, but I encountered an issue with the {tool_name} tool: {result.get('message', 'Unknown error')}"
-                        print(f"Tool error: {error_message}")
-                        return error_message
-                else:
-                    return "Tool name was not provided in the tool call."
+            # Extract the content between markers
+            start_marker = "[TOOL]"
+            end_marker = "[/TOOL]"
+            start_idx = tool_text.find(start_marker) + len(start_marker)
+            end_idx = tool_text.find(end_marker)
+            tool_content = tool_text[start_idx:end_idx].strip()
+            # Parse the lines to extract name and parameters
+            tool_name = None
+            parameters = {}
+            for line in tool_content.split("\n"):
+                line = line.strip()
+                if not line:
+                    continue
+                if line.startswith("name:"):
+                    tool_name = line[5:].strip()
+                elif line.startswith("parameters:"):
+                    params_text = line[11:].strip()
+                    # Parse comma-separated parameters
+                    param_pairs = params_text.split(",")
+                    for pair in param_pairs:
+                        if "=" in pair:
+                            k, v = pair.split("=", 1)
+                            parameters[k.strip()] = v.strip()
+            # Execute the tool
+            result = await self.execute_tool(agent_name, tool_name, parameters)
+            # Return the result as string
+            if result.get("status") == "success":
+                tool_result = str(result.get("result", ""))
+                return tool_result
             else:
-                print(f"JSON received but no tool_call found: {json_chunk}")
+                error_msg = f"Error calling {tool_name}: {result.get('message', 'Unknown error')}"
+                return error_msg
-            # If we get here, it wasn't properly handled as a tool
-            return f"The following request was not processed as a valid tool call:\n{json_chunk}"
-        except json.JSONDecodeError as e:
-            print(f"JSON decode error in tool call: {e}")
-            return json_chunk
         except Exception as e:
-            print(f"Unexpected error in tool call handling: {str(e)}")
             import traceback
             print(traceback.format_exc())
             return f"Error processing tool call: {str(e)}"
     def _get_tool_usage_prompt(self, agent_name: str) -> str:
-        """Generate JSON-based instructions for tool usage."""
+        """Generate marker-based instructions for tool usage."""
         # Get tools assigned to this agent
         tools = self.get_agent_tools(agent_name)
         if not tools:
@@ -407,29 +501,38 @@ class AgentService(AgentServiceInterface):
         tools_json = json.dumps(tools, indent=2)
         return f"""
-    AVAILABLE TOOLS:
-    {tools_json}
-    TOOL USAGE FORMAT:
-    {{
-        "tool_call": {{
-            "name": "<one_of:{', '.join(available_tool_names)}>",
-            "parameters": {{
-                // parameters as specified in tool definition above
-            }}
-        }}
-    }}
-    RESPONSE RULES:
-    1. For tool usage:
-       - Only use tools from the AVAILABLE TOOLS list above
-       - Follow the exact parameter format shown in the tool definition
-    2. Format Requirements:
-       - Return ONLY the JSON object for tool calls
-       - No explanation text before or after
-       - Use exact tool names as shown in AVAILABLE TOOLS
-    """
+        AVAILABLE TOOLS:
+        {tools_json}
+        ⚠️ CRITICAL INSTRUCTION: When using a tool, NEVER include explanatory text.
+        Only output the exact tool call format shown below with NO other text.
+        TOOL USAGE FORMAT:
+        [TOOL]
+        name: tool_name
+        parameters: key1=value1, key2=value2
+        [/TOOL]
+        EXAMPLES:
+        ✅ CORRECT - ONLY the tool call with NOTHING else:
+        [TOOL]
+        name: search_internet
+        parameters: query=latest news on Solana
+        [/TOOL]
+        ❌ INCORRECT - Never add explanatory text like this:
+        To get the latest news on Solana, I will search the internet.
+        [TOOL]
+        name: search_internet
+        parameters: query=latest news on Solana
+        [/TOOL]
+        REMEMBER:
+        1. Output ONLY the exact tool call format with NO additional text
+        2. After seeing your tool call, I will execute it automatically
+        3. You will receive the tool results and can then respond to the user
+        """
     def _clean_for_audio(self, text: str) -> str:
         """Remove Markdown formatting, emojis, and non-pronounceable characters from text.
@@ -504,3 +607,18 @@ class AgentService(AgentServiceInterface):
         text = re.sub(r'\s+', ' ', text)
         return text.strip()
+    def _clean_tool_response(self, text: str) -> str:
+        """Remove any tool markers or formatting that might have leaked into the response."""
+        if not text:
+            return ""
+        # Remove any tool markers that might be in the response
+        text = text.replace("[TOOL]", "")
+        text = text.replace("[/TOOL]", "")
+        # Remove the word TOOL from start if it appears
+        if text.lstrip().startswith("TOOL"):
+            text = text.lstrip().replace("TOOL", "", 1)
+        return text.strip()

solana_agent/services/query.py CHANGED Viewed

@@ -47,9 +47,9 @@ class QueryService(QueryServiceInterface):
         audio_input_format: Literal[
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
+        audio_transcription_real_time: bool = True,
         prompt: Optional[str] = None,
         router: Optional[RoutingServiceInterface] = None,
-        internet_search: bool = False,
     ) -> AsyncGenerator[Union[str, bytes], None]:  # pragma: no cover
         """Process the user request with appropriate agent.
@@ -61,9 +61,9 @@ class QueryService(QueryServiceInterface):
             audio_instructions: Audio voice instructions
             audio_output_format: Audio output format
             audio_input_format: Audio input format
+            audio_transcription_real_time: Flag for real-time audio transcription
             prompt: Optional prompt for the agent
             router: Optional routing service for processing
-            internet_search: Flag to use OpenAI Internet search
         Yields:
             Response chunks (text strings or audio bytes)
@@ -122,7 +122,7 @@ class QueryService(QueryServiceInterface):
                     audio_output_format=audio_output_format,
                     audio_instructions=audio_instructions,
                     prompt=prompt,
-                    internet_search=internet_search,
+                    audio_transcription_real_time=audio_transcription_real_time,
                 ):
                     yield audio_chunk
@@ -141,7 +141,7 @@ class QueryService(QueryServiceInterface):
                     memory_context=memory_context,
                     output_format="text",
                     prompt=prompt,
-                    internet_search=internet_search,
+                    audio_transcription_real_time=audio_transcription_real_time,
                 ):
                     yield chunk
                     full_text_response += chunk

{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: solana-agent
-Version: 23.0.7
+Version: 24.1.0
 Summary: Agentic IQ
 License: MIT
 Keywords: ai,openai,ai agents,agi
@@ -14,9 +14,11 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Dist: openai (>=1.70.0,<2.0.0)
+Requires-Dist: httpx (>=0.28.1,<0.29.0)
+Requires-Dist: openai (>=1.71.0,<2.0.0)
 Requires-Dist: pydantic (>=2.11.2,<3.0.0)
 Requires-Dist: pymongo (>=4.11.3,<5.0.0)
+Requires-Dist: websockets (>=15.0.1,<16.0.0)
 Requires-Dist: zep-cloud (>=2.9.0,<3.0.0)
 Project-URL: Documentation, https://docs.solana-agent.com
 Project-URL: Repository, https://github.com/truemagic-coder/solana-agent
@@ -41,10 +43,11 @@ Build your AI business in three lines of code!
 ## Why?
 * Three lines of code setup
+* Fast Responses
 * Multi-Agent Swarm
 * Multi-Modal Streaming (Text & Audio)
 * Conversational Memory & History
-* Built-in Internet Search
+* Internet Search
 * Intelligent Routing
 * Business Alignment
 * Extensible Tooling
@@ -56,11 +59,12 @@ Build your AI business in three lines of code!
 ## Features
 * Easy three lines of code setup
+* Fast AI responses
 * Designed for a multi-agent swarm
 * Seamless text and audio streaming with real-time multi-modal processing
 * Configurable audio voice characteristics via prompting
 * Persistent memory that preserves context across all agent interactions
-* Quick built-in Internet search to answer users' queries
+* Quick Internet search to answer users' queries
 * Streamlined message history for all agent interactions
 * Intelligent query routing to agents with optimal domain expertise or your own custom routing
 * Unified value system ensuring brand-aligned agent responses
@@ -82,7 +86,6 @@ Build your AI business in three lines of code!
 * [gpt-4o-mini](https://platform.openai.com/docs/models/gpt-4o-mini)
 * [gpt-4o-mini-tts](https://platform.openai.com/docs/models/gpt-4o-mini-tts)
 * [gpt-4o-mini-transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe)
-* [gpt-4o-mini-search-preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview)
 ## Installation
@@ -353,21 +356,6 @@ API Calls:
 * If the Zep user and session isn't created it creates them for 2 API calls (POST)
-### Internet Search
-This mode is great for text output where the default response from OpenAI is enough.
-It is not suitable for audio as the OpenAI search results contain links and markdown.
-Also it may not call tools when they should be called as it thinks the search results answer the user query.
-It is much faster than calling `search_internet` from `sakit` as it saves 2 API calls.
-```python
-async for response in solana_agent.process("user123", "What is the latest news on Canada?", internet_search=True):
-    print(response, end="")
-```
 ### Customize Speech
 This is an audio to audio example using the `audio_instructions` parameter.
@@ -387,16 +375,25 @@ async for response in solana_agent.process("user123", audio_content, output_form
     print(response, end="")
 ```
+### Real-Time Audio Transcription
+It is possible to disable real-time audio transcription responses to save on costs.
+```python
+async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
+    print(response, end="")
+```
 ## Tools
 Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.
 * Agents can only call one tool per response
 * Agents choose the best tool for the job
-* Tools do not use OpenAI function calling
-* Tools are async functions
+* Solana Agent doesn't use OpenAI function calling (tools) as they don't support async functions
+* Solana Agent tools are async functions
-### Plugin Tool Example
+### Internet Search (Plugin Example)
 `pip install sakit`

{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 solana_agent/__init__.py,sha256=ceYeUpjIitpln8YK1r0JVJU8mzG6cRPYu-HLny3d-Tw,887
 solana_agent/adapters/__init__.py,sha256=tiEEuuy0NF3ngc_tGEcRTt71zVI58v3dYY9RvMrF2Cg,204
-solana_agent/adapters/llm_adapter.py,sha256=ReCVQH0X0hf5NpLqEMESft5LZtPj3gDNIOBiZpClqzo,5737
+solana_agent/adapters/llm_adapter.py,sha256=LLRRIhtJcPrNd2qIAHmEsFE5YyuUg53-POoiNKIradQ,12833
 solana_agent/adapters/mongodb_adapter.py,sha256=qqEFbY_v1XGyFXBmwd5HSXSSHnA9wWo-Hm1vGEyIG0k,2718
 solana_agent/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-solana_agent/client/solana_agent.py,sha256=u-W9KVfd_hrgj9cVnc4t_AD-8wSEeK88xLvBt1chW90,5356
+solana_agent/client/solana_agent.py,sha256=iIRuwOP1jChAgiP_ewW2lEOV-PE6AtVROlt-s8mBbyg,5415
 solana_agent/domains/__init__.py,sha256=HiC94wVPRy-QDJSSRywCRrhrFfTBeHjfi5z-QfZv46U,168
 solana_agent/domains/agent.py,sha256=WTo-pEc66V6D_35cpDE-kTsw1SJM-dtylPZ7em5em7Q,2659
 solana_agent/domains/routing.py,sha256=UDlgTjUoC9xIBVYu_dnf9-KG_bBgdEXAv_UtDOrYo0w,650
 solana_agent/factories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 solana_agent/factories/agent_factory.py,sha256=mJQb1G0-gebizZvSVHm4NAxRMB1kemm2w_BAcYlN15Y,5496
 solana_agent/interfaces/__init__.py,sha256=IQs1WIM1FeKP1-kY2FEfyhol_dB-I-VAe2rD6jrVF6k,355
-solana_agent/interfaces/client/client.py,sha256=vZcIXSnNawXlx0x_vD4D2ldtA5Oy93DGRjkDFv-AcwU,1729
+solana_agent/interfaces/client/client.py,sha256=ymZiJEVy966HKVTZR75MdcrTfct6MPielHKdvfCYF_g,1742
 solana_agent/interfaces/plugins/plugins.py,sha256=T8HPBsekmzVwfU_Rizp-vtzAeYkMlKMYD7U9d0Wjq9c,3338
 solana_agent/interfaces/providers/data_storage.py,sha256=NqGeFvAzhz9rr-liLPRNCGjooB2EIhe-EVsMmX__b0M,1658
-solana_agent/interfaces/providers/llm.py,sha256=_sbgSs3Sy1QAeFCB_jzw_Rjpq-N5wBY5qt6tmFYD9K4,1591
+solana_agent/interfaces/providers/llm.py,sha256=09E6NgMcIpf_nJGgdVLjlZAF2HGHtW5EmhIbaEiylt0,1972
 solana_agent/interfaces/providers/memory.py,sha256=oNOH8WZXVW8assDigIWZAWiwkxbpDiKupxA2RB6tQvQ,1010
-solana_agent/interfaces/services/agent.py,sha256=EPHY9uDLXBNo5CD5dLzI8vZZRd2LLiinei_biKIg228,2152
-solana_agent/interfaces/services/query.py,sha256=X54dLxwU2DTF5eReeg0XsOFVNBo6cFmH4iyCuWdN3Gs,1379
+solana_agent/interfaces/services/agent.py,sha256=KHGFjmxj0yE04VTeNa6Jpk-34OEMhDgAtzmPkpUBdRA,2165
+solana_agent/interfaces/services/query.py,sha256=2i-Qq4Bel5P5U1O5wWUYzYoECFwiMkNj7n0K1v1edd4,1532
 solana_agent/interfaces/services/routing.py,sha256=UzJC-z-Q9puTWPFGEo2_CAhIxuxP5IRnze7S66NSrsI,397
 solana_agent/plugins/__init__.py,sha256=coZdgJKq1ExOaj6qB810i3rEhbjdVlrkN76ozt_Ojgo,193
 solana_agent/plugins/manager.py,sha256=Il49hXeqvu0b02pURNNp7mY8kp9_sqpi_vJIWBW5Hc0,5044
@@ -24,12 +24,12 @@ solana_agent/plugins/registry.py,sha256=5S0DlUQKogsg1zLiRUIGMHEmGYHtOovU-S-5W1Mw
 solana_agent/plugins/tools/__init__.py,sha256=c0z7ij42gs94_VJrcn4Y8gUlTxMhsFNY6ahIsNswdLk,231
 solana_agent/plugins/tools/auto_tool.py,sha256=DgES_cZ6xKSf_HJpFINpvJxrjVlk5oeqa7pZRBsR9SM,1575
 solana_agent/repositories/__init__.py,sha256=fP83w83CGzXLnSdq-C5wbw9EhWTYtqE2lQTgp46-X_4,163
-solana_agent/repositories/memory.py,sha256=GYyNcwdQZKqfCjG_6uYh7YqjwwbUwvuVwbNim4aHN3I,7329
+solana_agent/repositories/memory.py,sha256=75zuqAMn4YFafiLsE8RvjFNd3p5ensXbFWv6VvlhFtE,7297
 solana_agent/services/__init__.py,sha256=ab_NXJmwYUCmCrCzuTlZ47bJZINW0Y0F5jfQ9OovidU,163
-solana_agent/services/agent.py,sha256=-u2rtu6w_0BQoHPFU3GdLuZLinQzQ0cpktOUsCRcbUU,20048
-solana_agent/services/query.py,sha256=6K5RcxoHXaKMvxmDPo_WInRYtjOKJ8In4BgdLhzc_98,11363
+solana_agent/services/agent.py,sha256=d6Sv6W6Vtuhf5JHknUchjAD8XSUOkXALkIImnre93j8,25524
+solana_agent/services/query.py,sha256=vWopHKES-K0KpxPCSZNyunRJrkBVGGQC13awd0Sd56M,11450
 solana_agent/services/routing.py,sha256=PMCSG5m3uLMaHMj3dxNvNfcFZaeaDi7kMr7AEBCzwDE,6499
-solana_agent-23.0.7.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
-solana_agent-23.0.7.dist-info/METADATA,sha256=D4x8ji7WuLa_OSYx5gtfTKFFOkZKki5ptvOUYfTqE8M,20857
-solana_agent-23.0.7.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-solana_agent-23.0.7.dist-info/RECORD,,
+solana_agent-24.1.0.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
+solana_agent-24.1.0.dist-info/METADATA,sha256=WC9LoaQVgFHhA0bfXC_c57iYU7V-ZW1TJVDrCbmUmh0,20685
+solana_agent-24.1.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+solana_agent-24.1.0.dist-info/RECORD,,

{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{solana_agent-23.0.7.dist-info → solana_agent-24.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

solana-agent 23.0.7__py3-none-any.whl → 24.1.0__py3-none-any.whl

solana-agent 23.0.7py3-none-any.whl → 24.1.0py3-none-any.whl