PyPI - solana-agent - Versions diffs - 24.0.0__tar.gz → 24.1.0__tar.gz - Mend

solana-agent 24.0.0tar.gz → 24.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{solana_agent-24.0.0 → solana_agent-24.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: solana-agent
-Version: 24.0.0
+Version: 24.1.0
 Summary: Agentic IQ
 License: MIT
 Keywords: ai,openai,ai agents,agi
@@ -14,9 +14,11 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Dist: httpx (>=0.28.1,<0.29.0)
 Requires-Dist: openai (>=1.71.0,<2.0.0)
 Requires-Dist: pydantic (>=2.11.2,<3.0.0)
 Requires-Dist: pymongo (>=4.11.3,<5.0.0)
+Requires-Dist: websockets (>=15.0.1,<16.0.0)
 Requires-Dist: zep-cloud (>=2.9.0,<3.0.0)
 Project-URL: Documentation, https://docs.solana-agent.com
 Project-URL: Repository, https://github.com/truemagic-coder/solana-agent
@@ -41,6 +43,7 @@ Build your AI business in three lines of code!
 ## Why?
 * Three lines of code setup
+* Fast Responses
 * Multi-Agent Swarm
 * Multi-Modal Streaming (Text & Audio)
 * Conversational Memory & History
@@ -56,6 +59,7 @@ Build your AI business in three lines of code!
 ## Features
 * Easy three lines of code setup
+* Fast AI responses
 * Designed for a multi-agent swarm
 * Seamless text and audio streaming with real-time multi-modal processing
 * Configurable audio voice characteristics via prompting
@@ -371,6 +375,15 @@ async for response in solana_agent.process("user123", audio_content, output_form
     print(response, end="")
 ```
+### Real-Time Audio Transcription
+It is possible to disable real-time audio transcription responses to save on costs.
+```python
+async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
+    print(response, end="")
+```
 ## Tools
 Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.

{solana_agent-24.0.0 → solana_agent-24.1.0}/README.md RENAMED Viewed

@@ -17,6 +17,7 @@ Build your AI business in three lines of code!
 ## Why?
 * Three lines of code setup
+* Fast Responses
 * Multi-Agent Swarm
 * Multi-Modal Streaming (Text & Audio)
 * Conversational Memory & History
@@ -32,6 +33,7 @@ Build your AI business in three lines of code!
 ## Features
 * Easy three lines of code setup
+* Fast AI responses
 * Designed for a multi-agent swarm
 * Seamless text and audio streaming with real-time multi-modal processing
 * Configurable audio voice characteristics via prompting
@@ -347,6 +349,15 @@ async for response in solana_agent.process("user123", audio_content, output_form
     print(response, end="")
 ```
+### Real-Time Audio Transcription
+It is possible to disable real-time audio transcription responses to save on costs.
+```python
+async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
+    print(response, end="")
+```
 ## Tools
 Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.

{solana_agent-24.0.0 → solana_agent-24.1.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "solana-agent"
-version = "24.0.0"
+version = "24.1.0"
 description = "Agentic IQ"
 authors = ["Bevan Hunt <bevan@bevanhunt.com>"]
 license = "MIT"
@@ -27,6 +27,8 @@ openai = "^1.71.0"
 pydantic = "^2.11.2"
 pymongo = "^4.11.3"
 zep-cloud = "^2.9.0"
+httpx = "^0.28.1"
+websockets = "^15.0.1"
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.5"

solana_agent-24.1.0/solana_agent/adapters/llm_adapter.py ADDED Viewed

@@ -0,0 +1,332 @@
+"""
+LLM provider adapters for the Solana Agent system.
+These adapters implement the LLMProvider interface for different LLM services.
+"""
+import asyncio
+import json
+from typing import Any, AsyncGenerator, Callable, Dict, Literal, Optional, Type, TypeVar
+import httpx
+from openai import AsyncOpenAI
+from pydantic import BaseModel
+import websockets
+from solana_agent.interfaces.providers.llm import LLMProvider
+T = TypeVar('T', bound=BaseModel)
+class OpenAIAdapter(LLMProvider):
+    """OpenAI implementation of LLMProvider with web search capabilities."""
+    def __init__(self, api_key: str):
+        self.client = AsyncOpenAI(api_key=api_key)
+        self.parse_model = "gpt-4o-mini"
+        self.text_model = "gpt-4o-mini"
+        self.transcription_model = "gpt-4o-mini-transcribe"
+        self.tts_model = "gpt-4o-mini-tts"
+    async def tts(
+        self,
+        text: str,
+        instructions: str = "You speak in a friendly and helpful manner.",
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo",
+                       "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
+        response_format: Literal['mp3', 'opus',
+                                 'aac', 'flac', 'wav', 'pcm'] = "aac",
+    ) -> AsyncGenerator[bytes, None]:  # pragma: no cover
+        """Stream text-to-speech audio from OpenAI models.
+        Args:
+            text: Text to convert to speech
+            instructions: Optional instructions for speech generation
+            voice: Voice to use for synthesis
+            response_format: Audio format
+        Yields:
+            Audio bytes as they become available
+        """
+        try:
+            async with self.client.audio.speech.with_streaming_response.create(
+                model=self.tts_model,
+                voice=voice,
+                instructions=instructions,
+                input=text,
+                response_format=response_format
+            ) as stream:
+                # Stream the bytes in 16KB chunks
+                async for chunk in stream.iter_bytes(chunk_size=1024 * 16):
+                    yield chunk
+        except Exception as e:
+            print(f"Error in text_to_speech: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
+            yield b""  # Return empty bytes on error
+        except Exception as e:
+            print(f"Error in text_to_speech: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
+            yield b""  # Return empty bytes on error
+    async def transcribe_audio(
+        self,
+        audio_bytes: bytes,
+        input_format: Literal[
+            "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
+        ] = "mp4",
+    ) -> AsyncGenerator[str, None]:  # pragma: no cover
+        """Stream transcription of an audio file.
+        Args:
+            audio_bytes: Audio file bytes
+            input_format: Format of the input audio file
+        Yields:
+            Transcript text chunks as they become available
+        """
+        try:
+            async with self.client.audio.transcriptions.with_streaming_response.create(
+                model=self.transcription_model,
+                file=(f"file.{input_format}", audio_bytes),
+                response_format="text",
+            ) as stream:
+                # Stream the text in 16KB chunks
+                async for chunk in stream.iter_text(chunk_size=1024 * 16):
+                    yield chunk
+        except Exception as e:
+            print(f"Error in transcribe_audio: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
+            yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
+    async def generate_text(
+        self,
+        prompt: str,
+        system_prompt: str = "",
+    ) -> AsyncGenerator[str, None]:  # pragma: no cover
+        """Generate text from OpenAI models."""
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+        # Prepare request parameters
+        request_params = {
+            "messages": messages,
+            "stream": True,
+            "model": self.text_model,
+        }
+        try:
+            response = await self.client.chat.completions.create(**request_params)
+            async for chunk in response:
+                if chunk.choices:
+                    if chunk.choices[0].delta.content:
+                        text = chunk.choices[0].delta.content
+                        yield text
+        except Exception as e:
+            print(f"Error in generate_text: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
+            yield f"I apologize, but I encountered an error: {str(e)}"
+    async def parse_structured_output(
+        self,
+        prompt: str,
+        system_prompt: str,
+        model_class: Type[T],
+    ) -> T:  # pragma: no cover
+        """Generate structured output using Pydantic model parsing."""
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+        try:
+            # First try the beta parsing API
+            completion = await self.client.beta.chat.completions.parse(
+                model=self.parse_model,
+                messages=messages,
+                response_format=model_class,
+            )
+            return completion.choices[0].message.parsed
+        except Exception as e:
+            print(f"Error with beta.parse method: {e}")
+    async def create_realtime_session(
+        self,
+        model: str = "gpt-4o-mini-realtime-preview",
+        modalities: list = ["audio", "text"],
+        instructions: str = "You are a helpful assistant.",
+        voice: str = "alloy",
+        input_audio_format: str = "pcm16",
+        output_audio_format: str = "pcm16",
+    ) -> Dict[str, Any]:  # pragma: no cover
+        """Create a realtime session token for WebSocket communication."""
+        try:
+            # Get the API key from the AsyncOpenAI client
+            api_key = self.client.api_key
+            # Create an async HTTP client
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    "https://api.openai.com/v1/realtime/sessions",
+                    json={
+                        "model": model,
+                        "modalities": modalities,
+                        "instructions": instructions,
+                        "voice": voice,
+                        "input_audio_format": input_audio_format,
+                        "output_audio_format": output_audio_format,
+                    },
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                        "OpenAI-Beta": "realtime=v1"
+                    }
+                )
+                if response.status_code == 200:
+                    return response.json()
+                else:
+                    raise Exception(
+                        f"Failed to create realtime session: {response.text}")
+        except Exception as e:
+            print(f"Error creating realtime session: {str(e)}")
+            raise
+    async def realtime_audio_transcription(
+        self,
+        audio_generator: AsyncGenerator[bytes, None],
+        transcription_config: Optional[Dict[str, Any]] = None,
+        on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
+    ) -> AsyncGenerator[str, None]:  # pragma: no cover
+        """Stream real-time audio transcription using the Realtime API.
+        Args:
+            audio_generator: Async generator that yields audio chunks
+            transcription_config: Optional custom configuration for transcription
+            on_event: Optional callback function for handling raw events
+        Yields:
+            Transcription text as it becomes available
+        """
+        # Create default transcription config if none provided
+        if transcription_config is None:
+            transcription_config = {
+                "input_audio_format": "pcm16",
+                "input_audio_transcription": {
+                    "model": "gpt-4o-mini-transcribe"
+                },
+                "turn_detection": {
+                    "type": "server_vad",
+                    "threshold": 0.5,
+                    "prefix_padding_ms": 300,
+                    "silence_duration_ms": 200
+                }
+            }
+        try:
+            # Get the API key from the AsyncOpenAI client
+            api_key = self.client.api_key
+            # Create transcription session
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    "https://api.openai.com/v1/realtime/transcription_sessions",
+                    json=transcription_config,
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                        "OpenAI-Beta": "realtime=v1"
+                    }
+                )
+                if response.status_code != 200:
+                    raise Exception(
+                        f"Failed to create transcription session: {response.text}")
+                session = response.json()
+                client_secret = session["client_secret"]["value"]
+            # Connect to WebSocket with proper headers as dictionary
+            url = "wss://api.openai.com/v1/realtime?model=gpt-4o-mini-transcribe"
+            headers = {
+                "Authorization": f"Bearer {client_secret}",
+                "OpenAI-Beta": "realtime=v1"
+            }
+            async with websockets.connect(url, additional_headers=headers) as websocket:
+                # Handle WebSocket communication in the background
+                audio_task = None
+                async def send_audio():
+                    try:
+                        async for audio_chunk in audio_generator:
+                            # Base64 encode the audio
+                            import base64
+                            encoded_audio = base64.b64encode(
+                                audio_chunk).decode('utf-8')
+                            # Send audio chunk
+                            await websocket.send(json.dumps({
+                                "type": "input_audio_buffer.append",
+                                "audio": encoded_audio
+                            }))
+                            # Small delay to prevent flooding
+                            await asyncio.sleep(0.05)
+                        # Commit the audio buffer when done
+                        await websocket.send(json.dumps({
+                            "type": "input_audio_buffer.commit"
+                        }))
+                    except Exception as e:
+                        print(f"Error sending audio: {str(e)}")
+                # Start sending audio in the background
+                audio_task = asyncio.create_task(send_audio())
+                # Process transcription events
+                try:
+                    while True:
+                        message = await websocket.recv()
+                        event = json.loads(message)
+                        if on_event:
+                            # Check if on_event is a coroutine function and await it if needed
+                            if asyncio.iscoroutinefunction(on_event):
+                                await on_event(event)
+                            else:
+                                on_event(event)
+                        # Extract transcription deltas
+                        if event["type"] == "conversation.item.input_audio_transcription.delta":
+                            yield event["delta"]
+                        # Also handle completed transcriptions
+                        elif event["type"] == "conversation.item.input_audio_transcription.completed":
+                            yield event["transcript"]
+                            break
+                finally:
+                    # Clean up audio task if it's still running
+                    if audio_task and not audio_task.done():
+                        audio_task.cancel()
+                        try:
+                            await audio_task
+                        except asyncio.CancelledError:
+                            pass
+        except Exception as e:
+            print(f"Error in realtime audio transcription: {str(e)}")
+            import traceback
+            print(traceback.format_exc())
+            yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"

{solana_agent-24.0.0 → solana_agent-24.1.0}/solana_agent/client/solana_agent.py RENAMED Viewed

@@ -55,6 +55,7 @@ class SolanaAgent(SolanaAgentInterface):
         audio_input_format: Literal[
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
+        audio_transcription_real_time: bool = True,
         router: Optional[RoutingInterface] = None,
     ) -> AsyncGenerator[Union[str, bytes], None]:  # pragma: no cover
         """Process a user message and return the response stream.
@@ -68,6 +69,7 @@ class SolanaAgent(SolanaAgentInterface):
             audio_instructions: Audio voice instructions
             audio_output_format: Audio output format
             audio_input_format: Audio input format
+            audio_transcription_real_time: Flag for real-time audio transcription
             router: Optional routing service for processing
         Returns:
@@ -83,6 +85,7 @@ class SolanaAgent(SolanaAgentInterface):
             audio_input_format=audio_input_format,
             prompt=prompt,
             router=router,
+            audio_transcription_real_time=audio_transcription_real_time,
         ):
             yield chunk

{solana_agent-24.0.0 → solana_agent-24.1.0}/solana_agent/interfaces/client/client.py RENAMED Viewed

@@ -24,6 +24,7 @@ class SolanaAgent(ABC):
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
         router: Optional[RoutingInterface] = None,
+        audio_transcription_real_time: bool = True,
     ) -> AsyncGenerator[Union[str, bytes], None]:
         """Process a user message and return the response stream."""
         pass

{solana_agent-24.0.0 → solana_agent-24.1.0}/solana_agent/interfaces/providers/llm.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import AsyncGenerator, List, Literal, Type, TypeVar, Union
+from typing import Any, AsyncGenerator, Callable, Dict, List, Literal, Optional, Type, TypeVar, Union
 from pydantic import BaseModel
@@ -49,3 +49,13 @@ class LLMProvider(ABC):
     ) -> AsyncGenerator[str, None]:
         """Transcribe audio from the language model."""
         pass
+    @abstractmethod
+    async def realtime_audio_transcription(
+        self,
+        audio_generator: AsyncGenerator[bytes, None],
+        transcription_config: Optional[Dict[str, Any]] = None,
+        on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
+    ) -> AsyncGenerator[str, None]:
+        """Stream real-time audio transcription from the language model."""
+        pass

{solana_agent-24.0.0 → solana_agent-24.1.0}/solana_agent/interfaces/services/agent.py RENAMED Viewed

@@ -34,6 +34,7 @@ class AgentService(ABC):
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
         prompt: Optional[str] = None,
+        audio_transcription_real_time: bool = True,
     ) -> AsyncGenerator[Union[str, bytes], None]:
         """Generate a response from an agent."""
         pass

{solana_agent-24.0.0 → solana_agent-24.1.0}/solana_agent/interfaces/services/query.py RENAMED Viewed

@@ -1,6 +1,8 @@
 from abc import ABC, abstractmethod
 from typing import Any, AsyncGenerator, Dict, Literal, Optional, Union
+from solana_agent.interfaces.services.routing import RoutingService as RoutingInterface
 class QueryService(ABC):
     """Interface for processing user queries."""
@@ -20,6 +22,8 @@ class QueryService(ABC):
             "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
         ] = "mp4",
         prompt: Optional[str] = None,
+        router: Optional[RoutingInterface] = None,
+        audio_transcription_real_time: bool = True,
     ) -> AsyncGenerator[Union[str, bytes], None]:
         """Process the user request and generate a response."""
         pass

{solana_agent-24.0.0 → solana_agent-24.1.0}/solana_agent/repositories/memory.py RENAMED Viewed

@@ -69,8 +69,8 @@ class MemoryRepository(MemoryProvider):
                     # Store truncated messages
                     doc = {
                         "user_id": user_id,
-                        "user_message": self._truncate(user_msg),
-                        "assistant_message": self._truncate(assistant_msg),
+                        "user_message": user_msg,
+                        "assistant_message": assistant_msg,
                         "timestamp": datetime.now(timezone.utc)
                     }
                     self.mongo.insert_one(self.collection, doc)

solana-agent 24.0.0__tar.gz → 24.1.0__tar.gz

solana-agent 24.0.0tar.gz → 24.1.0tar.gz