solana-agent 14.0.2__py3-none-any.whl → 15.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,8 @@ LLM provider adapters for the Solana Agent system.
3
3
 
4
4
  These adapters implement the LLMProvider interface for different LLM services.
5
5
  """
6
- from typing import AsyncGenerator, List, Type, TypeVar
6
+ from pathlib import Path
7
+ from typing import AsyncGenerator, BinaryIO, List, Literal, Type, TypeVar, Union
7
8
 
8
9
  from openai import OpenAI
9
10
  from pydantic import BaseModel
@@ -16,22 +17,98 @@ T = TypeVar('T', bound=BaseModel)
16
17
  class OpenAIAdapter(LLMProvider):
17
18
  """OpenAI implementation of LLMProvider with web search capabilities."""
18
19
 
19
- def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
20
+ def __init__(self, api_key: str):
20
21
  self.client = OpenAI(api_key=api_key)
21
- self.model = model
22
- # Add search-enabled model variants
23
- self.search_models = {
24
- "gpt-4o": "gpt-4o-search-preview",
25
- "gpt-4o-mini": "gpt-4o-mini-search-preview"
26
- }
22
+ self.parse_model = "gpt-4o-mini"
23
+ self.search_model = "gpt-4o-mini-search-preview"
24
+ self.transcription_model = "gpt-4o-mini-transcribe"
25
+ self.tts_model = "gpt-4o-mini-tts"
26
+
27
+ async def tts(
28
+ self,
29
+ text: str,
30
+ instructions: str = "",
31
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
32
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
33
+ ) -> AsyncGenerator[bytes, None]: # pragma: no cover
34
+ """Stream text-to-speech audio from OpenAI models.
35
+
36
+ Args:
37
+ text: Text to convert to speech
38
+ instructions: Optional instructions for speech generation
39
+ voice: Voice to use for synthesis
40
+
41
+ Yields:
42
+ Audio bytes as they become available
43
+ """
44
+ try:
45
+ stream = self.client.audio.speech.create(
46
+ model=self.tts_model,
47
+ voice=voice,
48
+ input=text,
49
+ instructions=instructions,
50
+ )
51
+
52
+ # Stream the bytes in chunks
53
+ for chunk in stream.iter_bytes(chunk_size=1024 * 16): # 16KB chunks
54
+ yield chunk
55
+
56
+ except Exception as e:
57
+ print(f"Error in text_to_speech: {str(e)}")
58
+ import traceback
59
+ print(traceback.format_exc())
60
+ yield b"" # Return empty bytes on error
61
+
62
+ except Exception as e:
63
+ print(f"Error in text_to_speech: {str(e)}")
64
+ import traceback
65
+ print(traceback.format_exc())
66
+ yield f"I apologize, but I encountered an error converting text to speech: {str(e)}"
67
+
68
+ async def transcribe_audio(
69
+ self,
70
+ audio_file: Union[str, Path, BinaryIO],
71
+ ) -> AsyncGenerator[str, None]: # pragma: no cover
72
+ """Stream transcription of an audio file.
73
+
74
+ Args:
75
+ audio_file: Path to audio file or file-like object
76
+
77
+ Yields:
78
+ Transcript text chunks as they become available
79
+ """
80
+ try:
81
+ # Handle file path vs file object
82
+ if isinstance(audio_file, (str, Path)):
83
+ audio_file = open(audio_file, "rb")
84
+
85
+ stream = self.client.audio.transcriptions.create(
86
+ model=self.transcription_model,
87
+ file=audio_file,
88
+ response_format="text",
89
+ stream=True
90
+ )
91
+
92
+ for event in stream:
93
+ if hasattr(event, 'text') and event.text:
94
+ yield event.text
95
+
96
+ except Exception as e:
97
+ print(f"Error in transcribe_audio: {str(e)}")
98
+ import traceback
99
+ print(traceback.format_exc())
100
+ yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
101
+
102
+ finally:
103
+ # Close file if we opened it
104
+ if isinstance(audio_file, (str, Path)):
105
+ audio_file.close()
27
106
 
28
107
  async def generate_text(
29
108
  self,
30
109
  prompt: str,
31
110
  system_prompt: str = "",
32
- needs_search: bool = False,
33
- **kwargs,
34
- ) -> AsyncGenerator[str, None]:
111
+ ) -> AsyncGenerator[str, None]: # pragma: no cover
35
112
  """Generate text from OpenAI models with web search capability."""
36
113
  messages = []
37
114
 
@@ -43,38 +120,16 @@ class OpenAIAdapter(LLMProvider):
43
120
  # Prepare request parameters
44
121
  request_params = {
45
122
  "messages": messages,
46
- "stream": kwargs.get("stream", True),
47
- "response_format": kwargs.get("response_format", None),
123
+ "stream": True,
124
+ "model": self.search_model,
48
125
  }
49
-
50
- # If search is needed, update model and add search options
51
- if needs_search:
52
- base_model = kwargs.get("model", self.model)
53
- request_params["model"] = self.search_models.get(
54
- base_model, "gpt-4o-mini-search-preview")
55
- request_params["web_search_options"] = {
56
- "search_context_size": "medium",
57
- "user_location": {
58
- "type": "approximate",
59
- "approximate": {
60
- "country": "US",
61
- "timezone": "America/Los_Angeles"
62
- }
63
- }
64
- }
65
- else:
66
- request_params["model"] = kwargs.get("model", self.model)
67
-
68
126
  try:
69
127
  response = self.client.chat.completions.create(**request_params)
70
- current_text = ""
71
128
 
72
129
  for chunk in response:
73
130
  if chunk.choices:
74
- # Handle content
75
131
  if chunk.choices[0].delta.content:
76
132
  text = chunk.choices[0].delta.content
77
- current_text += text
78
133
  yield text
79
134
 
80
135
  except Exception as e:
@@ -83,7 +138,7 @@ class OpenAIAdapter(LLMProvider):
83
138
  print(traceback.format_exc())
84
139
  yield f"I apologize, but I encountered an error: {str(e)}"
85
140
 
86
- def generate_embedding(self, text: str) -> List[float]:
141
+ def generate_embedding(self, text: str) -> List[float]: # pragma: no cover
87
142
  """Generate embeddings for a given text using OpenAI's embedding model."""
88
143
  try:
89
144
  response = self.client.embeddings.create(
@@ -101,8 +156,7 @@ class OpenAIAdapter(LLMProvider):
101
156
  prompt: str,
102
157
  system_prompt: str,
103
158
  model_class: Type[T],
104
- **kwargs
105
- ) -> T:
159
+ ) -> T: # pragma: no cover
106
160
  """Generate structured output using Pydantic model parsing."""
107
161
  messages = []
108
162
  if system_prompt:
@@ -113,10 +167,9 @@ class OpenAIAdapter(LLMProvider):
113
167
  try:
114
168
  # First try the beta parsing API
115
169
  completion = self.client.beta.chat.completions.parse(
116
- model=kwargs.get("model", self.model),
170
+ model=self.parse_model,
117
171
  messages=messages,
118
172
  response_format=model_class,
119
- temperature=kwargs.get("temperature", 0.2),
120
173
  )
121
174
  return completion.choices[0].message.parsed
122
175
  except Exception as e:
@@ -6,13 +6,14 @@ the agent system without dealing with internal implementation details.
6
6
  """
7
7
  import json
8
8
  import importlib.util
9
- from typing import AsyncGenerator, Dict, Any
9
+ from pathlib import Path
10
+ from typing import AsyncGenerator, BinaryIO, Dict, Any, Literal, Optional, Union
10
11
 
11
12
  from solana_agent.factories.agent_factory import SolanaAgentFactory
12
- from solana_agent.interfaces.client.client import SolanaAgent
13
+ from solana_agent.interfaces.client.client import SolanaAgent as SolanaAgentInterface
13
14
 
14
15
 
15
- class SolanaAgent(SolanaAgent):
16
+ class SolanaAgent(SolanaAgentInterface):
16
17
  """Simplified client interface for interacting with the agent system."""
17
18
 
18
19
  def __init__(self, config_path: str = None, config: Dict[str, Any] = None):
@@ -39,17 +40,34 @@ class SolanaAgent(SolanaAgent):
39
40
 
40
41
  self.query_service = SolanaAgentFactory.create_from_config(config)
41
42
 
42
- async def process(self, user_id: str, message: str) -> AsyncGenerator[str, None]:
43
+ async def process(
44
+ self,
45
+ user_id: str,
46
+ message: Union[str, Path, BinaryIO],
47
+ output_format: Literal["text", "audio"] = "text",
48
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
49
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
50
+ audio_instructions: Optional[str] = None,
51
+ ) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
43
52
  """Process a user message and return the response stream.
44
53
 
45
54
  Args:
46
55
  user_id: User ID
47
- message: User message
56
+ message: Text message or audio file input
57
+ output_format: Response format ("text" or "audio")
58
+ voice: Voice to use for audio output (only used if output_format is "audio")
59
+ audio_instructions: Optional instructions for audio synthesis
48
60
 
49
61
  Returns:
50
- Async generator yielding response chunks
62
+ Async generator yielding response chunks (text strings or audio bytes)
51
63
  """
52
- async for chunk in self.query_service.process(user_id, message):
64
+ async for chunk in self.query_service.process(
65
+ user_id=user_id,
66
+ query=message,
67
+ output_format=output_format,
68
+ voice=voice,
69
+ audio_instructions=audio_instructions
70
+ ):
53
71
  yield chunk
54
72
 
55
73
  async def get_user_history(
@@ -53,7 +53,6 @@ class AIAgent(BaseModel):
53
53
  instructions: str = Field(...,
54
54
  description="Base instructions for the agent")
55
55
  specialization: str = Field(..., description="Agent's specialized domain")
56
- model: str = Field("gpt-4o-mini", description="Language model to use")
57
56
  created_at: datetime = Field(
58
57
  default_factory=datetime.now, description="Creation timestamp")
59
58
  updated_at: datetime = Field(
@@ -20,7 +20,7 @@ from solana_agent.adapters.llm_adapter import OpenAIAdapter
20
20
  from solana_agent.adapters.mongodb_adapter import MongoDBAdapter
21
21
 
22
22
  # Domain and plugin imports
23
- from solana_agent.domains.agents import OrganizationMission
23
+ from solana_agent.domains.agent import OrganizationMission
24
24
  from solana_agent.plugins.manager import PluginManager
25
25
 
26
26
 
@@ -45,7 +45,6 @@ class SolanaAgentFactory:
45
45
 
46
46
  llm_adapter = OpenAIAdapter(
47
47
  api_key=config["openai"]["api_key"],
48
- model=config.get("openai", {}).get("default_model", "gpt-4o-mini"),
49
48
  )
50
49
 
51
50
  # Create organization mission if specified in config
@@ -110,7 +109,6 @@ class SolanaAgentFactory:
110
109
  name=agent_config["name"],
111
110
  instructions=agent_config["instructions"],
112
111
  specialization=agent_config["specialization"],
113
- model=agent_config.get("model", "gpt-4o-mini"),
114
112
  )
115
113
 
116
114
  # Register tools for this agent
@@ -1,12 +1,21 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, AsyncGenerator, Dict
2
+ from pathlib import Path
3
+ from typing import Any, AsyncGenerator, BinaryIO, Dict, Literal, Union
3
4
 
4
5
 
5
6
  class SolanaAgent(ABC):
6
7
  """Interface for the Solana agent system."""
7
8
 
8
9
  @abstractmethod
9
- async def process(self, user_id: str, message: str) -> AsyncGenerator[str, None]:
10
+ async def process(
11
+ self,
12
+ user_id: str,
13
+ message: Union[str, Path, BinaryIO],
14
+ output_format: Literal["text", "audio"] = "text",
15
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
16
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
17
+ audio_instructions: str = None
18
+ ) -> AsyncGenerator[Union[str, bytes], None]:
10
19
  """Process a user message and return the response stream."""
11
20
  pass
12
21
 
@@ -1,5 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import AsyncGenerator, List, Type, TypeVar
2
+ from pathlib import Path
3
+ from typing import AsyncGenerator, BinaryIO, List, Literal, Type, TypeVar, Union
3
4
 
4
5
  from pydantic import BaseModel
5
6
 
@@ -15,8 +16,6 @@ class LLMProvider(ABC):
15
16
  self,
16
17
  prompt: str,
17
18
  system_prompt: str = "",
18
- needs_search: bool = False,
19
- **kwargs,
20
19
  ) -> AsyncGenerator[str, None]:
21
20
  """Generate text from the language model."""
22
21
  pass
@@ -28,7 +27,26 @@ class LLMProvider(ABC):
28
27
 
29
28
  @abstractmethod
30
29
  async def parse_structured_output(
31
- self, prompt: str, system_prompt: str, model_class: Type[T], **kwargs
30
+ self, prompt: str, system_prompt: str, model_class: Type[T],
32
31
  ) -> T:
33
32
  """Generate structured output using a specific model class."""
34
33
  pass
34
+
35
+ @abstractmethod
36
+ async def tts(
37
+ self,
38
+ text: str,
39
+ instructions: str = "",
40
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
41
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
42
+ ) -> AsyncGenerator[bytes, None]:
43
+ """Stream text-to-speech audio from the language model."""
44
+ pass
45
+
46
+ @abstractmethod
47
+ async def transcribe_audio(
48
+ self,
49
+ audio_file: Union[str, Path, BinaryIO],
50
+ ) -> AsyncGenerator[str, None]:
51
+ """Transcribe audio from the language model."""
52
+ pass
@@ -1,7 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import List, Optional
3
3
 
4
- from solana_agent.domains.agents import AIAgent
4
+ from solana_agent.domains.agent import AIAgent
5
5
 
6
6
 
7
7
  class AgentRepository(ABC):
@@ -1,12 +1,15 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, AsyncGenerator, Dict, List
2
+ from pathlib import Path
3
+ from typing import Any, AsyncGenerator, BinaryIO, Dict, List, Literal, Union
4
+
5
+ from solana_agent.domains.agent import AIAgent
3
6
 
4
7
 
5
8
  class AgentService(ABC):
6
9
  """Interface for agent management and response generation."""
7
10
 
8
11
  @abstractmethod
9
- def register_ai_agent(self, name: str, instructions: str, specialization: str, model: str = "gpt-4o-mini") -> None:
12
+ def register_ai_agent(self, name: str, instructions: str, specialization: str) -> None:
10
13
  """Register an AI agent with its specialization."""
11
14
  pass
12
15
 
@@ -21,7 +24,17 @@ class AgentService(ABC):
21
24
  pass
22
25
 
23
26
  @abstractmethod
24
- async def generate_response(self, agent_name: str, user_id: str, query: str, memory_context: str = "", **kwargs) -> AsyncGenerator[str, None]:
27
+ async def generate_response(
28
+ self,
29
+ agent_name: str,
30
+ user_id: str,
31
+ query: Union[str, Path, BinaryIO],
32
+ memory_context: str = "",
33
+ output_format: Literal["text", "audio"] = "text",
34
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
35
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
36
+ audio_instructions: str = None,
37
+ ) -> AsyncGenerator[Union[str, bytes], None]:
25
38
  """Generate a response from an agent."""
26
39
  pass
27
40
 
@@ -39,3 +52,8 @@ class AgentService(ABC):
39
52
  def execute_tool(self, agent_name: str, tool_name: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
40
53
  """Execute a tool on behalf of an agent."""
41
54
  pass
55
+
56
+ @abstractmethod
57
+ def get_all_ai_agents(self) -> Dict[str, AIAgent]:
58
+ """Get all registered AI agents."""
59
+ pass
@@ -1,13 +1,22 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, AsyncGenerator, Dict
2
+ from pathlib import Path
3
+ from typing import Any, AsyncGenerator, BinaryIO, Dict, Literal, Optional, Union
3
4
 
4
5
 
5
6
  class QueryService(ABC):
6
7
  """Interface for processing user queries."""
7
8
 
8
9
  @abstractmethod
9
- async def process(self, user_id: str, query: str) -> AsyncGenerator[str, None]:
10
- """Process a query from a user."""
10
+ async def process(
11
+ self,
12
+ user_id: str,
13
+ query: Union[str, Path, BinaryIO],
14
+ output_format: Literal["text", "audio"] = "text",
15
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
16
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
17
+ audio_instructions: Optional[str] = None,
18
+ ) -> AsyncGenerator[Union[str, bytes], None]:
19
+ """Process the user request and generate a response."""
11
20
  pass
12
21
 
13
22
  @abstractmethod
@@ -6,7 +6,7 @@ class RoutingService(ABC):
6
6
  """Interface for query routing services."""
7
7
 
8
8
  @abstractmethod
9
- async def route_query(self, user_id: str, query: str) -> Tuple[str, Any]:
9
+ async def route_query(self, query: str) -> Tuple[str, Any]:
10
10
  """Route a query to the appropriate agent.
11
11
 
12
12
  Args:
@@ -3,7 +3,7 @@ MongoDB implementation of the agent repository.
3
3
  """
4
4
  from typing import List, Optional, Any
5
5
 
6
- from solana_agent.domains.agents import AIAgent
6
+ from solana_agent.domains.agent import AIAgent
7
7
  from solana_agent.interfaces.repositories.agent import AgentRepository
8
8
 
9
9
 
@@ -7,13 +7,14 @@ and response generation.
7
7
  import datetime as main_datetime
8
8
  from datetime import datetime
9
9
  import json
10
- from typing import AsyncGenerator, Dict, List, Optional, Any
10
+ from pathlib import Path
11
+ from typing import AsyncGenerator, BinaryIO, Dict, List, Literal, Optional, Any, Union
11
12
 
12
13
  from solana_agent.interfaces.services.agent import AgentService as AgentServiceInterface
13
14
  from solana_agent.interfaces.providers.llm import LLMProvider
14
15
  from solana_agent.interfaces.repositories.agent import AgentRepository
15
16
  from solana_agent.interfaces.plugins.plugins import ToolRegistry
16
- from solana_agent.domains.agents import AIAgent, OrganizationMission
17
+ from solana_agent.domains.agent import AIAgent, OrganizationMission
17
18
 
18
19
 
19
20
  class AgentService(AgentServiceInterface):
@@ -52,7 +53,7 @@ class AgentService(AgentServiceInterface):
52
53
  self.plugin_manager = None
53
54
 
54
55
  def register_ai_agent(
55
- self, name: str, instructions: str, specialization: str, model: str = "gpt-4o-mini"
56
+ self, name: str, instructions: str, specialization: str,
56
57
  ) -> None:
57
58
  """Register an AI agent with its specialization.
58
59
 
@@ -60,13 +61,11 @@ class AgentService(AgentServiceInterface):
60
61
  name: Agent name
61
62
  instructions: Agent instructions
62
63
  specialization: Agent specialization
63
- model: LLM model to use
64
64
  """
65
65
  agent = AIAgent(
66
66
  name=name,
67
67
  instructions=instructions,
68
68
  specialization=specialization,
69
- model=model
70
69
  )
71
70
  self.agent_repository.save_ai_agent(agent)
72
71
 
@@ -190,84 +189,131 @@ class AgentService(AgentServiceInterface):
190
189
  self,
191
190
  agent_name: str,
192
191
  user_id: str,
193
- query: str,
192
+ query: Union[str, Path, BinaryIO],
194
193
  memory_context: str = "",
195
- **kwargs
196
- ) -> AsyncGenerator[str, None]: # pragma: no cover
197
- """Generate a response with tool execution support."""
194
+ output_format: Literal["text", "audio"] = "text",
195
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
196
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
197
+ audio_instructions: Optional[str] = None,
198
+ ) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
199
+ """Generate a response with support for text/audio input/output.
200
+
201
+ Args:
202
+ agent_name: Agent name
203
+ user_id: User ID
204
+ query: Text query or audio file input
205
+ memory_context: Optional conversation context
206
+ output_format: Response format ("text" or "audio")
207
+ voice: Voice to use for audio output
208
+ audio_instructions: Optional instructions for audio synthesis
209
+
210
+ Yields:
211
+ Text chunks or audio bytes depending on output_format
212
+ """
198
213
  agent = self.agent_repository.get_ai_agent_by_name(agent_name)
199
214
  if not agent:
200
- yield f"Agent '{agent_name}' not found."
215
+ error_msg = f"Agent '{agent_name}' not found."
216
+ if output_format == "audio":
217
+ async for chunk in self.llm_provider.tts(error_msg, voice=voice):
218
+ yield chunk
219
+ else:
220
+ yield error_msg
201
221
  return
202
222
 
203
- # Get system prompt and add tool instructions
204
- system_prompt = self.get_agent_system_prompt(agent_name)
205
- if self.tool_registry:
206
- tool_usage_prompt = self._get_tool_usage_prompt(agent_name)
207
- if tool_usage_prompt:
208
- system_prompt = f"{system_prompt}\n\n{tool_usage_prompt}"
209
-
210
- # Add User ID context
211
- system_prompt += f"\n\n User ID: {user_id}"
212
-
213
- # Add memory context
214
- if memory_context:
215
- system_prompt += f"\n\n Memory Context: {memory_context}"
216
-
217
223
  try:
218
- json_response = ""
219
- is_json = False
220
-
224
+ # Handle audio input if provided
225
+ query_text = ""
226
+ if not isinstance(query, str):
227
+ async for transcript in self.llm_provider.transcribe_audio(query):
228
+ query_text += transcript
229
+ else:
230
+ query_text = query
231
+
232
+ # Get system prompt and add tool instructions
233
+ system_prompt = self.get_agent_system_prompt(agent_name)
234
+ if self.tool_registry:
235
+ tool_usage_prompt = self._get_tool_usage_prompt(agent_name)
236
+ if tool_usage_prompt:
237
+ system_prompt = f"{system_prompt}\n\n{tool_usage_prompt}"
238
+
239
+ # Add User ID and memory context
240
+ system_prompt += f"\n\nUser ID: {user_id}"
241
+ if memory_context:
242
+ system_prompt += f"\n\nMemory Context: {memory_context}"
243
+
244
+ # Buffer for collecting text when generating audio
245
+ text_buffer = ""
246
+
247
+ # Generate and stream response
221
248
  async for chunk in self.llm_provider.generate_text(
222
- user_id=user_id,
223
- prompt=query,
249
+ prompt=query_text,
224
250
  system_prompt=system_prompt,
225
- model=agent.model,
226
- needs_search=True, # Enable web search by default
227
- **kwargs
228
251
  ):
229
- # Check for JSON start
230
252
  if chunk.strip().startswith("{"):
231
- is_json = True
232
- json_response = chunk
233
- continue
234
-
235
- # Collect JSON or yield normal text
236
- if is_json:
237
- json_response += chunk
238
- try:
239
- # Try to parse complete JSON
240
- data = json.loads(json_response)
241
-
242
- # Handle tool call
243
- if "tool_call" in data:
244
- tool_data = data["tool_call"]
245
- tool_name = tool_data.get("name")
246
- parameters = tool_data.get("parameters", {})
247
-
248
- if tool_name:
249
- result = self.execute_tool(
250
- agent_name, tool_name, parameters)
251
- if result.get("status") == "success":
252
- yield result.get("result", "")
253
- else:
254
- yield f"I apologize, but I encountered an issue: {result.get('message', 'Unknown error')}"
255
- break
256
- else:
257
- # If JSON but not a tool call, yield as text
258
- yield json_response
259
- break
260
- except json.JSONDecodeError:
261
- # Not complete JSON yet, keep collecting
262
- continue
253
+ # Handle tool calls
254
+ result = await self._handle_tool_call(
255
+ agent_name, chunk, output_format, voice
256
+ )
257
+ if output_format == "audio":
258
+ async for audio_chunk in self.llm_provider.tts(result, instructions=audio_instructions, voice=voice):
259
+ yield audio_chunk
260
+ else:
261
+ yield result
263
262
  else:
264
- yield chunk
263
+ if output_format == "audio":
264
+ # Buffer text until we have a complete sentence
265
+ text_buffer += chunk
266
+ if any(punct in chunk for punct in ".!?"):
267
+ async for audio_chunk in self.llm_provider.tts(
268
+ text_buffer, instructions=audio_instructions, voice=voice
269
+ ):
270
+ yield audio_chunk
271
+ text_buffer = ""
272
+ else:
273
+ yield chunk
274
+
275
+ # Handle any remaining text in buffer
276
+ if output_format == "audio" and text_buffer:
277
+ async for audio_chunk in self.llm_provider.tts(
278
+ text_buffer, instructions=audio_instructions, voice=voice
279
+ ):
280
+ yield audio_chunk
265
281
 
266
282
  except Exception as e:
283
+ error_msg = f"I apologize, but I encountered an error: {str(e)}"
284
+ if output_format == "audio":
285
+ async for chunk in self.llm_provider.tts(error_msg, instructions=audio_instructions, voice=voice):
286
+ yield chunk
287
+ else:
288
+ yield error_msg
289
+
267
290
  print(f"Error in generate_response: {str(e)}")
268
291
  import traceback
269
292
  print(traceback.format_exc())
270
- yield f"I apologize, but I encountered an error: {str(e)}"
293
+
294
+ async def _handle_tool_call(
295
+ self,
296
+ agent_name: str,
297
+ json_chunk: str,
298
+ ) -> str:
299
+ """Handle tool calls and return formatted response."""
300
+ try:
301
+ data = json.loads(json_chunk)
302
+ if "tool_call" in data:
303
+ tool_data = data["tool_call"]
304
+ tool_name = tool_data.get("name")
305
+ parameters = tool_data.get("parameters", {})
306
+
307
+ if tool_name:
308
+ result = self.execute_tool(
309
+ agent_name, tool_name, parameters)
310
+ if result.get("status") == "success":
311
+ return result.get("result", "")
312
+ else:
313
+ return f"I apologize, but I encountered an issue: {result.get('message', 'Unknown error')}"
314
+ return json_chunk
315
+ except json.JSONDecodeError:
316
+ return json_chunk
271
317
 
272
318
  def _get_tool_usage_prompt(self, agent_name: str) -> str:
273
319
  """Generate JSON-based instructions for tool usage."""
@@ -5,7 +5,8 @@ This service orchestrates the processing of user queries, coordinating
5
5
  other services to provide comprehensive responses while maintaining
6
6
  clean separation of concerns.
7
7
  """
8
- from typing import Any, AsyncGenerator, Dict, Optional
8
+ from pathlib import Path
9
+ from typing import Any, AsyncGenerator, BinaryIO, Dict, Literal, Optional, Union
9
10
 
10
11
  from solana_agent.interfaces.services.query import QueryService as QueryServiceInterface
11
12
  from solana_agent.services.agent import AgentService
@@ -34,23 +35,44 @@ class QueryService(QueryServiceInterface):
34
35
  self.memory_provider = memory_provider
35
36
 
36
37
  async def process(
37
- self, user_id: str, user_text: str, timezone: str = None
38
- ) -> AsyncGenerator[str, None]: # pragma: no cover
38
+ self,
39
+ user_id: str,
40
+ query: Union[str, Path, BinaryIO],
41
+ output_format: Literal["text", "audio"] = "text",
42
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
43
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
44
+ audio_instructions: Optional[str] = None,
45
+ ) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
39
46
  """Process the user request with appropriate agent.
40
47
 
41
48
  Args:
42
49
  user_id: User ID
43
- user_text: User query text
44
- timezone: Optional user timezone
50
+ query: Text query or audio file input
51
+ output_format: Response format ("text" or "audio")
52
+ voice: Voice to use for audio output
53
+ audio_instructions: Optional instructions for audio synthesis
45
54
 
46
55
  Yields:
47
- Response text chunks
56
+ Response chunks (text strings or audio bytes)
48
57
  """
49
58
  try:
59
+ # Handle audio input if provided
60
+ user_text = ""
61
+ if not isinstance(query, str):
62
+ async for transcript in self.agent_service.llm_provider.transcribe_audio(query):
63
+ user_text += transcript
64
+ else:
65
+ user_text = query
66
+
50
67
  # Handle simple greetings
51
68
  if user_text.strip().lower() in ["test", "hello", "hi", "hey", "ping"]:
52
- response = f"Hello! How can I help you today?"
53
- yield response
69
+ response = "Hello! How can I help you today?"
70
+ if output_format == "audio":
71
+ async for chunk in self.agent_service.llm_provider.tts(response, instructions=audio_instructions, voice=voice):
72
+ yield chunk
73
+ else:
74
+ yield response
75
+
54
76
  # Store simple interaction in memory
55
77
  if self.memory_provider:
56
78
  await self._store_conversation(user_id, user_text, response)
@@ -62,27 +84,48 @@ class QueryService(QueryServiceInterface):
62
84
  memory_context = await self.memory_provider.retrieve(user_id)
63
85
 
64
86
  # Route query to appropriate agent
65
- agent_name = await self.routing_service.route_query(user_id, user_text)
87
+ agent_name = await self.routing_service.route_query(user_text)
66
88
 
67
- # Generate response
89
+ # Generate response using agent service
68
90
  full_response = ""
69
91
  async for chunk in self.agent_service.generate_response(
70
92
  agent_name=agent_name,
71
93
  user_id=user_id,
72
94
  query=user_text,
73
- memory_context=memory_context
95
+ memory_context=memory_context,
96
+ output_format=output_format,
97
+ voice=voice
74
98
  ):
75
99
  yield chunk
76
- full_response += chunk
100
+ if output_format == "text":
101
+ full_response += chunk
102
+
103
+ # For audio responses, get transcription for storage
104
+ if output_format == "audio":
105
+ # Re-generate response in text format for storage
106
+ async for chunk in self.agent_service.generate_response(
107
+ agent_name=agent_name,
108
+ user_id=user_id,
109
+ query=user_text,
110
+ memory_context=memory_context,
111
+ output_format="text"
112
+ ):
113
+ full_response += chunk
77
114
 
78
115
  # Store conversation and extract insights
79
116
  if self.memory_provider:
80
117
  await self._store_conversation(user_id, user_text, full_response)
81
118
 
82
119
  except Exception as e:
83
- yield f"I apologize for the technical difficulty. {str(e)}"
84
- import traceback
120
+ error_msg = f"I apologize for the technical difficulty. {str(e)}"
121
+ if output_format == "audio":
122
+ async for chunk in self.agent_service.llm_provider.tts(error_msg, instructions=audio_instructions, voice=voice):
123
+ yield chunk
124
+ else:
125
+ yield error_msg
126
+
85
127
  print(f"Error in query processing: {str(e)}")
128
+ import traceback
86
129
  print(traceback.format_exc())
87
130
 
88
131
  async def get_user_history(
@@ -54,7 +54,6 @@ class RoutingService(RoutingServiceInterface):
54
54
  prompt=prompt,
55
55
  system_prompt="Analyze user queries to determine appropriate routing.",
56
56
  model_class=QueryAnalysis,
57
- temperature=0.2
58
57
  )
59
58
 
60
59
  return {
@@ -75,27 +74,30 @@ class RoutingService(RoutingServiceInterface):
75
74
  "confidence": 0.0
76
75
  }
77
76
 
78
- async def route_query(self, user_id: str, query: str) -> str:
77
+ async def route_query(self, query: str) -> str:
79
78
  """Route a query to the appropriate agent.
80
79
 
81
80
  Args:
82
- user_id: ID of the user making the query
83
81
  query: The query text
84
82
 
85
83
  Returns:
86
- Name of the selected agent
84
+ Name of the best agent
87
85
  """
86
+ # If only one agent - use that agent
87
+ if len(self.agent_service.get_all_ai_agents()) == 1:
88
+ return next(iter(self.agent_service.get_all_ai_agents().keys()))
89
+
88
90
  # Analyze query
89
91
  analysis = await self._analyze_query(query)
90
92
 
91
93
  # Find best agent based on analysis
92
- selected_agent = await self._find_best_ai_agent(
94
+ best_agent = await self._find_best_ai_agent(
93
95
  analysis["primary_specialization"],
94
96
  analysis["secondary_specializations"]
95
97
  )
96
98
 
97
- # Return default agent if none found
98
- return selected_agent or "general_ai"
99
+ # Return best agent
100
+ return best_agent
99
101
 
100
102
  async def _find_best_ai_agent(
101
103
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: solana-agent
3
- Version: 14.0.2
3
+ Version: 15.0.0
4
4
  Summary: The Future of Work
5
5
  License: MIT
6
6
  Keywords: ai,openai,ai agents,agi
@@ -34,7 +34,7 @@ Description-Content-Type: text/markdown
34
34
 
35
35
  ## Features
36
36
 
37
- * Text streaming messages by AI Agents
37
+ * Multi-modal input-output streaming with text or audio by AI Agents
38
38
  * Conversational memory per user shared by all AI Agents
39
39
  * Routing based on AI Agent specializations
40
40
  * Built-in Internet Search for all AI Agents
@@ -85,21 +85,18 @@ config = {
85
85
  },
86
86
  "openai": {
87
87
  "api_key": "your-openai-key",
88
- "default_model": "gpt-4o-mini"
89
88
  },
90
89
  "agents": [
91
90
  {
92
91
  "name": "research_specialist",
93
92
  "instructions": "You are an expert researcher who synthesizes complex information clearly.",
94
93
  "specialization": "Research and knowledge synthesis",
95
- "model": "o3-mini",
96
94
  "tools": ["some_tool"]
97
95
  },
98
96
  {
99
97
  "name": "customer_support",
100
98
  "instructions": "You provide friendly, helpful customer support responses.",
101
99
  "specialization": "Customer inquiries",
102
- "model": "gpt-4o-mini"
103
100
  }
104
101
  ],
105
102
  }
@@ -112,6 +109,12 @@ async for response in solana_agent.process("user123", "What are the latest AI de
112
109
  print(response, end="")
113
110
  ```
114
111
 
112
+ ## LLMs Used
113
+ * The model used for AI Agents is `gpt-4o-mini-search-preview`
114
+ * The model used for internal structured outputs is `gpt-4o-mini`
115
+ * The model used for audio_transcription is `gpt-4o-mini-transcribe`
116
+ * The model used for tts is `gpt-4o-mini-tts`
117
+
115
118
  ## Solana Agent Kit
116
119
 
117
120
  [Solana Agent Kit](https://github.com/truemagic-coder/solana-agent-kit)
@@ -1,37 +1,37 @@
1
1
  solana_agent/__init__.py,sha256=ceYeUpjIitpln8YK1r0JVJU8mzG6cRPYu-HLny3d-Tw,887
2
2
  solana_agent/adapters/__init__.py,sha256=tiEEuuy0NF3ngc_tGEcRTt71zVI58v3dYY9RvMrF2Cg,204
3
- solana_agent/adapters/llm_adapter.py,sha256=-MmQL71JlNJeWr16a-qZ5OzTg1_69ewgAR9ZSwXpsbw,4326
3
+ solana_agent/adapters/llm_adapter.py,sha256=Fj-UGLc3vAcdtO_ZN0dX2zdh6wW7ae5Olvkg3q-Jtv4,6085
4
4
  solana_agent/adapters/mongodb_adapter.py,sha256=zvcIZ61zx45cwfjMimXC2RV_D_s6sL5b2Dz6H3HCgFc,2456
5
5
  solana_agent/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- solana_agent/client/solana_agent.py,sha256=Y2SRwysvUsW3_NM8dqURdabkmS1lMsiAKrPG9dtSQ4g,2676
6
+ solana_agent/client/solana_agent.py,sha256=-mwUoCrSyiY6k6Gw4RbaqpV3gWC9n8t1JRWalFvTWPo,3550
7
7
  solana_agent/domains/__init__.py,sha256=HiC94wVPRy-QDJSSRywCRrhrFfTBeHjfi5z-QfZv46U,168
8
- solana_agent/domains/agents.py,sha256=S8OKtkUQ7npl8bZrSH64TZuu5bnwnMYXXx3IbKvJOuU,3005
8
+ solana_agent/domains/agent.py,sha256=Ak_hD5gTCzRqAHLmqtxnny0Xki1qAKR7RzLW9LOQBTg,2930
9
9
  solana_agent/domains/routing.py,sha256=UDlgTjUoC9xIBVYu_dnf9-KG_bBgdEXAv_UtDOrYo0w,650
10
10
  solana_agent/factories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- solana_agent/factories/agent_factory.py,sha256=yjB3G8ItXFH3DaoRf1BB1acPnL84Pd2pbqz5W03-5Jc,5711
11
+ solana_agent/factories/agent_factory.py,sha256=zrDezSNT167ePTYAZSPLg9imPlz8nTXoSr5kO83ePCg,5566
12
12
  solana_agent/interfaces/__init__.py,sha256=IQs1WIM1FeKP1-kY2FEfyhol_dB-I-VAe2rD6jrVF6k,355
13
- solana_agent/interfaces/client/client.py,sha256=SouFRSUhXK5qN88ln5anHnStrZfPJyY2cr5sVRRDBEw,668
13
+ solana_agent/interfaces/client/client.py,sha256=NzsY2vBv3BbBOfgfRoQCluqS9oBCPkHBHbrzde4gG1Y,1027
14
14
  solana_agent/interfaces/plugins/plugins.py,sha256=TMmTXwHhmkdJpIhgADfrpGGGk7PHP7O9Qi89uA26uMI,3013
15
15
  solana_agent/interfaces/providers/data_storage.py,sha256=Qjui9ISvX_NtOUPTUyjPMNxDoYRpml-aMG8DZy_Qxzc,1509
16
- solana_agent/interfaces/providers/llm.py,sha256=y4OFj2Wq4XicMxArWsYBHSp6cFe3BcK9sCemfyaWV_A,887
16
+ solana_agent/interfaces/providers/llm.py,sha256=uJE1-WClY0K2n2ZIzylnwBl-q_7YXyKjkL3ao-b2t3A,1461
17
17
  solana_agent/interfaces/providers/memory.py,sha256=oNOH8WZXVW8assDigIWZAWiwkxbpDiKupxA2RB6tQvQ,1010
18
- solana_agent/interfaces/repositories/agent.py,sha256=HZL5q7DoOj-qK5IDSShAJnu4_A75OR0xgJD_2W6Zr6k,820
19
- solana_agent/interfaces/services/agent.py,sha256=JHUVsxAnOsopiNilU_zDBAhJfQT_BFrtOczDL2atoZo,1407
20
- solana_agent/interfaces/services/query.py,sha256=w2ZeAX3j0n7dfh5EtesWqEy4YZ-cqjI3EmR3lyGnyJs,641
21
- solana_agent/interfaces/services/routing.py,sha256=tKMK97m6U5I__F406sm60az4QInGLX_N3knc_AbMZ80,452
18
+ solana_agent/interfaces/repositories/agent.py,sha256=r2MzVYOpEBVN00yqRxr3bUgWUgSwqoI1hRrdHhgFpFU,819
19
+ solana_agent/interfaces/services/agent.py,sha256=nTLJVymnVHrCzm0gQPhQiU3KZoH93OeXdFjZU3gs3Bc,1926
20
+ solana_agent/interfaces/services/query.py,sha256=1ubfhQLx5l2b1UZCKnqUhGg-v_qpf7Ve_8KD5KRXvx8,1042
21
+ solana_agent/interfaces/services/routing.py,sha256=gohkt5f9uYDLpu4iDVDk9yj8js9P56R6QHSIDNylgwA,438
22
22
  solana_agent/plugins/__init__.py,sha256=coZdgJKq1ExOaj6qB810i3rEhbjdVlrkN76ozt_Ojgo,193
23
23
  solana_agent/plugins/manager.py,sha256=GWwhfMBn9THwVn7biOvVa25GLthCA1ilWIoDkt5hXNI,5084
24
24
  solana_agent/plugins/registry.py,sha256=dRKWoOEqiU7OLsjpBWf4VJfDQYZdJPjW5AKxeITmVMA,2283
25
25
  solana_agent/plugins/tools/__init__.py,sha256=c0z7ij42gs94_VJrcn4Y8gUlTxMhsFNY6ahIsNswdLk,231
26
26
  solana_agent/plugins/tools/auto_tool.py,sha256=Z3CcOzwdXpzciH-5yphhd9qt1b9owTxhwC-dYmPF6B0,1489
27
27
  solana_agent/repositories/__init__.py,sha256=fP83w83CGzXLnSdq-C5wbw9EhWTYtqE2lQTgp46-X_4,163
28
- solana_agent/repositories/agent.py,sha256=7FTT3WvOaBacWme7d-qaOyqAlUhf9LVLXnIiPb16FDk,3188
28
+ solana_agent/repositories/agent.py,sha256=e1rnsQiigkKwJNLKro86a3b6TBiky3GMfmCRc5b_jPw,3187
29
29
  solana_agent/repositories/memory.py,sha256=0wgoa2bXhpgdBgn9-i9G10PB1bMGYObxcoY9Newll40,4742
30
30
  solana_agent/services/__init__.py,sha256=ab_NXJmwYUCmCrCzuTlZ47bJZINW0Y0F5jfQ9OovidU,163
31
- solana_agent/services/agent.py,sha256=Z5b6aOuEAMw8CHjvQ__reG4b9P056hKquFisNzwzosg,11509
32
- solana_agent/services/query.py,sha256=5_Py2t3p8oB4EVZZnbi7BezP9yigRe1EU9ZQ9AzQAog,7901
33
- solana_agent/services/routing.py,sha256=L3nZaMeX4ENYfHoc2KrOtfzhScCWfrXS5RRaUIJPwNY,4956
34
- solana_agent-14.0.2.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
35
- solana_agent-14.0.2.dist-info/METADATA,sha256=8RG-LRrM9BBzXlb-159YvTacsao2BW4c7MgrutTY2ps,4774
36
- solana_agent-14.0.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
37
- solana_agent-14.0.2.dist-info/RECORD,,
31
+ solana_agent/services/agent.py,sha256=RTd7ulqBFd8Y-Ix1sao3BYJvhaAUuVtv_Qqu-26zI2Y,13625
32
+ solana_agent/services/query.py,sha256=d7p7Uop-UmthHqYN_rTv3_fb0idxzo026SHttXE4uGk,9865
33
+ solana_agent/services/routing.py,sha256=TPJ2Pas4acE93QzMEV6ZP670OtTNrVEPa76fz6urEV4,4996
34
+ solana_agent-15.0.0.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
35
+ solana_agent-15.0.0.dist-info/METADATA,sha256=4ktEyE89ap3ZBOA137EVG9_Bod4IYpZtlOAYIMcMyYc,4956
36
+ solana_agent-15.0.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
37
+ solana_agent-15.0.0.dist-info/RECORD,,