solana-agent 23.0.7__tar.gz → 24.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {solana_agent-23.0.7 → solana_agent-24.1.0}/PKG-INFO +20 -23
  2. {solana_agent-23.0.7 → solana_agent-24.1.0}/README.md +16 -21
  3. {solana_agent-23.0.7 → solana_agent-24.1.0}/pyproject.toml +4 -2
  4. solana_agent-24.1.0/solana_agent/adapters/llm_adapter.py +332 -0
  5. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/client/solana_agent.py +3 -3
  6. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/client/client.py +1 -1
  7. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/providers/llm.py +11 -2
  8. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/services/agent.py +1 -1
  9. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/services/query.py +4 -1
  10. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/repositories/memory.py +2 -2
  11. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/services/agent.py +274 -156
  12. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/services/query.py +4 -4
  13. solana_agent-23.0.7/solana_agent/adapters/llm_adapter.py +0 -164
  14. {solana_agent-23.0.7 → solana_agent-24.1.0}/LICENSE +0 -0
  15. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/__init__.py +0 -0
  16. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/adapters/__init__.py +0 -0
  17. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/adapters/mongodb_adapter.py +0 -0
  18. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/client/__init__.py +0 -0
  19. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/domains/__init__.py +0 -0
  20. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/domains/agent.py +0 -0
  21. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/domains/routing.py +0 -0
  22. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/factories/__init__.py +0 -0
  23. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/factories/agent_factory.py +0 -0
  24. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/__init__.py +0 -0
  25. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/plugins/plugins.py +0 -0
  26. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/providers/data_storage.py +0 -0
  27. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/providers/memory.py +0 -0
  28. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/interfaces/services/routing.py +0 -0
  29. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/plugins/__init__.py +0 -0
  30. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/plugins/manager.py +0 -0
  31. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/plugins/registry.py +0 -0
  32. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/plugins/tools/__init__.py +0 -0
  33. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/plugins/tools/auto_tool.py +0 -0
  34. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/repositories/__init__.py +0 -0
  35. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/services/__init__.py +0 -0
  36. {solana_agent-23.0.7 → solana_agent-24.1.0}/solana_agent/services/routing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: solana-agent
3
- Version: 23.0.7
3
+ Version: 24.1.0
4
4
  Summary: Agentic IQ
5
5
  License: MIT
6
6
  Keywords: ai,openai,ai agents,agi
@@ -14,9 +14,11 @@ Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
- Requires-Dist: openai (>=1.70.0,<2.0.0)
17
+ Requires-Dist: httpx (>=0.28.1,<0.29.0)
18
+ Requires-Dist: openai (>=1.71.0,<2.0.0)
18
19
  Requires-Dist: pydantic (>=2.11.2,<3.0.0)
19
20
  Requires-Dist: pymongo (>=4.11.3,<5.0.0)
21
+ Requires-Dist: websockets (>=15.0.1,<16.0.0)
20
22
  Requires-Dist: zep-cloud (>=2.9.0,<3.0.0)
21
23
  Project-URL: Documentation, https://docs.solana-agent.com
22
24
  Project-URL: Repository, https://github.com/truemagic-coder/solana-agent
@@ -41,10 +43,11 @@ Build your AI business in three lines of code!
41
43
 
42
44
  ## Why?
43
45
  * Three lines of code setup
46
+ * Fast Responses
44
47
  * Multi-Agent Swarm
45
48
  * Multi-Modal Streaming (Text & Audio)
46
49
  * Conversational Memory & History
47
- * Built-in Internet Search
50
+ * Internet Search
48
51
  * Intelligent Routing
49
52
  * Business Alignment
50
53
  * Extensible Tooling
@@ -56,11 +59,12 @@ Build your AI business in three lines of code!
56
59
  ## Features
57
60
 
58
61
  * Easy three lines of code setup
62
+ * Fast AI responses
59
63
  * Designed for a multi-agent swarm
60
64
  * Seamless text and audio streaming with real-time multi-modal processing
61
65
  * Configurable audio voice characteristics via prompting
62
66
  * Persistent memory that preserves context across all agent interactions
63
- * Quick built-in Internet search to answer users' queries
67
+ * Quick Internet search to answer users' queries
64
68
  * Streamlined message history for all agent interactions
65
69
  * Intelligent query routing to agents with optimal domain expertise or your own custom routing
66
70
  * Unified value system ensuring brand-aligned agent responses
@@ -82,7 +86,6 @@ Build your AI business in three lines of code!
82
86
  * [gpt-4o-mini](https://platform.openai.com/docs/models/gpt-4o-mini)
83
87
  * [gpt-4o-mini-tts](https://platform.openai.com/docs/models/gpt-4o-mini-tts)
84
88
  * [gpt-4o-mini-transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe)
85
- * [gpt-4o-mini-search-preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview)
86
89
 
87
90
  ## Installation
88
91
 
@@ -353,21 +356,6 @@ API Calls:
353
356
 
354
357
  * If the Zep user and session isn't created it creates them for 2 API calls (POST)
355
358
 
356
- ### Internet Search
357
-
358
- This mode is great for text output where the default response from OpenAI is enough.
359
-
360
- It is not suitable for audio as the OpenAI search results contain links and markdown.
361
-
362
- Also it may not call tools when they should be called as it thinks the search results answer the user query.
363
-
364
- It is much faster than calling `search_internet` from `sakit` as it saves 2 API calls.
365
-
366
- ```python
367
- async for response in solana_agent.process("user123", "What is the latest news on Canada?", internet_search=True):
368
- print(response, end="")
369
- ```
370
-
371
359
  ### Customize Speech
372
360
 
373
361
  This is an audio to audio example using the `audio_instructions` parameter.
@@ -387,16 +375,25 @@ async for response in solana_agent.process("user123", audio_content, output_form
387
375
  print(response, end="")
388
376
  ```
389
377
 
378
+ ### Real-Time Audio Transcription
379
+
380
+ It is possible to disable real-time audio transcription responses to save on costs.
381
+
382
+ ```python
383
+ async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
384
+ print(response, end="")
385
+ ```
386
+
390
387
  ## Tools
391
388
 
392
389
  Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.
393
390
 
394
391
  * Agents can only call one tool per response
395
392
  * Agents choose the best tool for the job
396
- * Tools do not use OpenAI function calling
397
- * Tools are async functions
393
+ * Solana Agent doesn't use OpenAI function calling (tools) as they don't support async functions
394
+ * Solana Agent tools are async functions
398
395
 
399
- ### Plugin Tool Example
396
+ ### Internet Search (Plugin Example)
400
397
 
401
398
  `pip install sakit`
402
399
 
@@ -17,10 +17,11 @@ Build your AI business in three lines of code!
17
17
 
18
18
  ## Why?
19
19
  * Three lines of code setup
20
+ * Fast Responses
20
21
  * Multi-Agent Swarm
21
22
  * Multi-Modal Streaming (Text & Audio)
22
23
  * Conversational Memory & History
23
- * Built-in Internet Search
24
+ * Internet Search
24
25
  * Intelligent Routing
25
26
  * Business Alignment
26
27
  * Extensible Tooling
@@ -32,11 +33,12 @@ Build your AI business in three lines of code!
32
33
  ## Features
33
34
 
34
35
  * Easy three lines of code setup
36
+ * Fast AI responses
35
37
  * Designed for a multi-agent swarm
36
38
  * Seamless text and audio streaming with real-time multi-modal processing
37
39
  * Configurable audio voice characteristics via prompting
38
40
  * Persistent memory that preserves context across all agent interactions
39
- * Quick built-in Internet search to answer users' queries
41
+ * Quick Internet search to answer users' queries
40
42
  * Streamlined message history for all agent interactions
41
43
  * Intelligent query routing to agents with optimal domain expertise or your own custom routing
42
44
  * Unified value system ensuring brand-aligned agent responses
@@ -58,7 +60,6 @@ Build your AI business in three lines of code!
58
60
  * [gpt-4o-mini](https://platform.openai.com/docs/models/gpt-4o-mini)
59
61
  * [gpt-4o-mini-tts](https://platform.openai.com/docs/models/gpt-4o-mini-tts)
60
62
  * [gpt-4o-mini-transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe)
61
- * [gpt-4o-mini-search-preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview)
62
63
 
63
64
  ## Installation
64
65
 
@@ -329,21 +330,6 @@ API Calls:
329
330
 
330
331
  * If the Zep user and session isn't created it creates them for 2 API calls (POST)
331
332
 
332
- ### Internet Search
333
-
334
- This mode is great for text output where the default response from OpenAI is enough.
335
-
336
- It is not suitable for audio as the OpenAI search results contain links and markdown.
337
-
338
- Also it may not call tools when they should be called as it thinks the search results answer the user query.
339
-
340
- It is much faster than calling `search_internet` from `sakit` as it saves 2 API calls.
341
-
342
- ```python
343
- async for response in solana_agent.process("user123", "What is the latest news on Canada?", internet_search=True):
344
- print(response, end="")
345
- ```
346
-
347
333
  ### Customize Speech
348
334
 
349
335
  This is an audio to audio example using the `audio_instructions` parameter.
@@ -363,16 +349,25 @@ async for response in solana_agent.process("user123", audio_content, output_form
363
349
  print(response, end="")
364
350
  ```
365
351
 
352
+ ### Real-Time Audio Transcription
353
+
354
+ It is possible to disable real-time audio transcription responses to save on costs.
355
+
356
+ ```python
357
+ async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
358
+ print(response, end="")
359
+ ```
360
+
366
361
  ## Tools
367
362
 
368
363
  Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.
369
364
 
370
365
  * Agents can only call one tool per response
371
366
  * Agents choose the best tool for the job
372
- * Tools do not use OpenAI function calling
373
- * Tools are async functions
367
+ * Solana Agent doesn't use OpenAI function calling (tools) as they don't support async functions
368
+ * Solana Agent tools are async functions
374
369
 
375
- ### Plugin Tool Example
370
+ ### Internet Search (Plugin Example)
376
371
 
377
372
  `pip install sakit`
378
373
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "solana-agent"
3
- version = "23.0.7"
3
+ version = "24.1.0"
4
4
  description = "Agentic IQ"
5
5
  authors = ["Bevan Hunt <bevan@bevanhunt.com>"]
6
6
  license = "MIT"
@@ -23,10 +23,12 @@ python_paths = [".", "tests"]
23
23
 
24
24
  [tool.poetry.dependencies]
25
25
  python = ">=3.12,<4.0"
26
- openai = "^1.70.0"
26
+ openai = "^1.71.0"
27
27
  pydantic = "^2.11.2"
28
28
  pymongo = "^4.11.3"
29
29
  zep-cloud = "^2.9.0"
30
+ httpx = "^0.28.1"
31
+ websockets = "^15.0.1"
30
32
 
31
33
  [tool.poetry.group.dev.dependencies]
32
34
  pytest = "^8.3.5"
@@ -0,0 +1,332 @@
1
+ """
2
+ LLM provider adapters for the Solana Agent system.
3
+
4
+ These adapters implement the LLMProvider interface for different LLM services.
5
+ """
6
+ import asyncio
7
+ import json
8
+ from typing import Any, AsyncGenerator, Callable, Dict, Literal, Optional, Type, TypeVar
9
+
10
+ import httpx
11
+ from openai import AsyncOpenAI
12
+ from pydantic import BaseModel
13
+ import websockets
14
+
15
+ from solana_agent.interfaces.providers.llm import LLMProvider
16
+
17
+ T = TypeVar('T', bound=BaseModel)
18
+
19
+
20
+ class OpenAIAdapter(LLMProvider):
21
+ """OpenAI implementation of LLMProvider with web search capabilities."""
22
+
23
+ def __init__(self, api_key: str):
24
+ self.client = AsyncOpenAI(api_key=api_key)
25
+ self.parse_model = "gpt-4o-mini"
26
+ self.text_model = "gpt-4o-mini"
27
+ self.transcription_model = "gpt-4o-mini-transcribe"
28
+ self.tts_model = "gpt-4o-mini-tts"
29
+
30
+ async def tts(
31
+ self,
32
+ text: str,
33
+ instructions: str = "You speak in a friendly and helpful manner.",
34
+ voice: Literal["alloy", "ash", "ballad", "coral", "echo",
35
+ "fable", "onyx", "nova", "sage", "shimmer"] = "nova",
36
+ response_format: Literal['mp3', 'opus',
37
+ 'aac', 'flac', 'wav', 'pcm'] = "aac",
38
+ ) -> AsyncGenerator[bytes, None]: # pragma: no cover
39
+ """Stream text-to-speech audio from OpenAI models.
40
+
41
+ Args:
42
+ text: Text to convert to speech
43
+ instructions: Optional instructions for speech generation
44
+ voice: Voice to use for synthesis
45
+ response_format: Audio format
46
+
47
+ Yields:
48
+ Audio bytes as they become available
49
+ """
50
+ try:
51
+ async with self.client.audio.speech.with_streaming_response.create(
52
+ model=self.tts_model,
53
+ voice=voice,
54
+ instructions=instructions,
55
+ input=text,
56
+ response_format=response_format
57
+ ) as stream:
58
+ # Stream the bytes in 16KB chunks
59
+ async for chunk in stream.iter_bytes(chunk_size=1024 * 16):
60
+ yield chunk
61
+
62
+ except Exception as e:
63
+ print(f"Error in text_to_speech: {str(e)}")
64
+ import traceback
65
+ print(traceback.format_exc())
66
+ yield b"" # Return empty bytes on error
67
+
68
+ except Exception as e:
69
+ print(f"Error in text_to_speech: {str(e)}")
70
+ import traceback
71
+ print(traceback.format_exc())
72
+ yield b"" # Return empty bytes on error
73
+
74
+ async def transcribe_audio(
75
+ self,
76
+ audio_bytes: bytes,
77
+ input_format: Literal[
78
+ "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
79
+ ] = "mp4",
80
+ ) -> AsyncGenerator[str, None]: # pragma: no cover
81
+ """Stream transcription of an audio file.
82
+
83
+ Args:
84
+ audio_bytes: Audio file bytes
85
+ input_format: Format of the input audio file
86
+
87
+ Yields:
88
+ Transcript text chunks as they become available
89
+ """
90
+ try:
91
+ async with self.client.audio.transcriptions.with_streaming_response.create(
92
+ model=self.transcription_model,
93
+ file=(f"file.{input_format}", audio_bytes),
94
+ response_format="text",
95
+ ) as stream:
96
+ # Stream the text in 16KB chunks
97
+ async for chunk in stream.iter_text(chunk_size=1024 * 16):
98
+ yield chunk
99
+
100
+ except Exception as e:
101
+ print(f"Error in transcribe_audio: {str(e)}")
102
+ import traceback
103
+ print(traceback.format_exc())
104
+ yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
105
+
106
+ async def generate_text(
107
+ self,
108
+ prompt: str,
109
+ system_prompt: str = "",
110
+ ) -> AsyncGenerator[str, None]: # pragma: no cover
111
+ """Generate text from OpenAI models."""
112
+ messages = []
113
+
114
+ if system_prompt:
115
+ messages.append({"role": "system", "content": system_prompt})
116
+
117
+ messages.append({"role": "user", "content": prompt})
118
+
119
+ # Prepare request parameters
120
+ request_params = {
121
+ "messages": messages,
122
+ "stream": True,
123
+ "model": self.text_model,
124
+ }
125
+ try:
126
+ response = await self.client.chat.completions.create(**request_params)
127
+
128
+ async for chunk in response:
129
+ if chunk.choices:
130
+ if chunk.choices[0].delta.content:
131
+ text = chunk.choices[0].delta.content
132
+ yield text
133
+
134
+ except Exception as e:
135
+ print(f"Error in generate_text: {str(e)}")
136
+ import traceback
137
+ print(traceback.format_exc())
138
+ yield f"I apologize, but I encountered an error: {str(e)}"
139
+
140
+ async def parse_structured_output(
141
+ self,
142
+ prompt: str,
143
+ system_prompt: str,
144
+ model_class: Type[T],
145
+ ) -> T: # pragma: no cover
146
+ """Generate structured output using Pydantic model parsing."""
147
+ messages = []
148
+ if system_prompt:
149
+ messages.append({"role": "system", "content": system_prompt})
150
+
151
+ messages.append({"role": "user", "content": prompt})
152
+
153
+ try:
154
+ # First try the beta parsing API
155
+ completion = await self.client.beta.chat.completions.parse(
156
+ model=self.parse_model,
157
+ messages=messages,
158
+ response_format=model_class,
159
+ )
160
+ return completion.choices[0].message.parsed
161
+ except Exception as e:
162
+ print(f"Error with beta.parse method: {e}")
163
+
164
+ async def create_realtime_session(
165
+ self,
166
+ model: str = "gpt-4o-mini-realtime-preview",
167
+ modalities: list = ["audio", "text"],
168
+ instructions: str = "You are a helpful assistant.",
169
+ voice: str = "alloy",
170
+ input_audio_format: str = "pcm16",
171
+ output_audio_format: str = "pcm16",
172
+ ) -> Dict[str, Any]: # pragma: no cover
173
+ """Create a realtime session token for WebSocket communication."""
174
+ try:
175
+ # Get the API key from the AsyncOpenAI client
176
+ api_key = self.client.api_key
177
+
178
+ # Create an async HTTP client
179
+ async with httpx.AsyncClient() as client:
180
+ response = await client.post(
181
+ "https://api.openai.com/v1/realtime/sessions",
182
+ json={
183
+ "model": model,
184
+ "modalities": modalities,
185
+ "instructions": instructions,
186
+ "voice": voice,
187
+ "input_audio_format": input_audio_format,
188
+ "output_audio_format": output_audio_format,
189
+ },
190
+ headers={
191
+ "Authorization": f"Bearer {api_key}",
192
+ "Content-Type": "application/json",
193
+ "OpenAI-Beta": "realtime=v1"
194
+ }
195
+ )
196
+
197
+ if response.status_code == 200:
198
+ return response.json()
199
+ else:
200
+ raise Exception(
201
+ f"Failed to create realtime session: {response.text}")
202
+ except Exception as e:
203
+ print(f"Error creating realtime session: {str(e)}")
204
+ raise
205
+
206
+ async def realtime_audio_transcription(
207
+ self,
208
+ audio_generator: AsyncGenerator[bytes, None],
209
+ transcription_config: Optional[Dict[str, Any]] = None,
210
+ on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
211
+ ) -> AsyncGenerator[str, None]: # pragma: no cover
212
+ """Stream real-time audio transcription using the Realtime API.
213
+
214
+ Args:
215
+ audio_generator: Async generator that yields audio chunks
216
+ transcription_config: Optional custom configuration for transcription
217
+ on_event: Optional callback function for handling raw events
218
+
219
+ Yields:
220
+ Transcription text as it becomes available
221
+ """
222
+ # Create default transcription config if none provided
223
+ if transcription_config is None:
224
+ transcription_config = {
225
+ "input_audio_format": "pcm16",
226
+ "input_audio_transcription": {
227
+ "model": "gpt-4o-mini-transcribe"
228
+ },
229
+ "turn_detection": {
230
+ "type": "server_vad",
231
+ "threshold": 0.5,
232
+ "prefix_padding_ms": 300,
233
+ "silence_duration_ms": 200
234
+ }
235
+ }
236
+
237
+ try:
238
+ # Get the API key from the AsyncOpenAI client
239
+ api_key = self.client.api_key
240
+
241
+ # Create transcription session
242
+ async with httpx.AsyncClient() as client:
243
+ response = await client.post(
244
+ "https://api.openai.com/v1/realtime/transcription_sessions",
245
+ json=transcription_config,
246
+ headers={
247
+ "Authorization": f"Bearer {api_key}",
248
+ "Content-Type": "application/json",
249
+ "OpenAI-Beta": "realtime=v1"
250
+ }
251
+ )
252
+
253
+ if response.status_code != 200:
254
+ raise Exception(
255
+ f"Failed to create transcription session: {response.text}")
256
+
257
+ session = response.json()
258
+ client_secret = session["client_secret"]["value"]
259
+
260
+ # Connect to WebSocket with proper headers as dictionary
261
+ url = "wss://api.openai.com/v1/realtime?model=gpt-4o-mini-transcribe"
262
+ headers = {
263
+ "Authorization": f"Bearer {client_secret}",
264
+ "OpenAI-Beta": "realtime=v1"
265
+ }
266
+
267
+ async with websockets.connect(url, additional_headers=headers) as websocket:
268
+ # Handle WebSocket communication in the background
269
+ audio_task = None
270
+
271
+ async def send_audio():
272
+ try:
273
+ async for audio_chunk in audio_generator:
274
+ # Base64 encode the audio
275
+ import base64
276
+ encoded_audio = base64.b64encode(
277
+ audio_chunk).decode('utf-8')
278
+
279
+ # Send audio chunk
280
+ await websocket.send(json.dumps({
281
+ "type": "input_audio_buffer.append",
282
+ "audio": encoded_audio
283
+ }))
284
+
285
+ # Small delay to prevent flooding
286
+ await asyncio.sleep(0.05)
287
+
288
+ # Commit the audio buffer when done
289
+ await websocket.send(json.dumps({
290
+ "type": "input_audio_buffer.commit"
291
+ }))
292
+ except Exception as e:
293
+ print(f"Error sending audio: {str(e)}")
294
+
295
+ # Start sending audio in the background
296
+ audio_task = asyncio.create_task(send_audio())
297
+
298
+ # Process transcription events
299
+ try:
300
+ while True:
301
+ message = await websocket.recv()
302
+ event = json.loads(message)
303
+
304
+ if on_event:
305
+ # Check if on_event is a coroutine function and await it if needed
306
+ if asyncio.iscoroutinefunction(on_event):
307
+ await on_event(event)
308
+ else:
309
+ on_event(event)
310
+
311
+ # Extract transcription deltas
312
+ if event["type"] == "conversation.item.input_audio_transcription.delta":
313
+ yield event["delta"]
314
+
315
+ # Also handle completed transcriptions
316
+ elif event["type"] == "conversation.item.input_audio_transcription.completed":
317
+ yield event["transcript"]
318
+ break
319
+ finally:
320
+ # Clean up audio task if it's still running
321
+ if audio_task and not audio_task.done():
322
+ audio_task.cancel()
323
+ try:
324
+ await audio_task
325
+ except asyncio.CancelledError:
326
+ pass
327
+
328
+ except Exception as e:
329
+ print(f"Error in realtime audio transcription: {str(e)}")
330
+ import traceback
331
+ print(traceback.format_exc())
332
+ yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
@@ -55,8 +55,8 @@ class SolanaAgent(SolanaAgentInterface):
55
55
  audio_input_format: Literal[
56
56
  "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
57
57
  ] = "mp4",
58
+ audio_transcription_real_time: bool = True,
58
59
  router: Optional[RoutingInterface] = None,
59
- internet_search: bool = False,
60
60
  ) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
61
61
  """Process a user message and return the response stream.
62
62
 
@@ -69,8 +69,8 @@ class SolanaAgent(SolanaAgentInterface):
69
69
  audio_instructions: Audio voice instructions
70
70
  audio_output_format: Audio output format
71
71
  audio_input_format: Audio input format
72
+ audio_transcription_real_time: Flag for real-time audio transcription
72
73
  router: Optional routing service for processing
73
- internet_search: Flag to use OpenAI Internet search
74
74
 
75
75
  Returns:
76
76
  Async generator yielding response chunks (text strings or audio bytes)
@@ -85,7 +85,7 @@ class SolanaAgent(SolanaAgentInterface):
85
85
  audio_input_format=audio_input_format,
86
86
  prompt=prompt,
87
87
  router=router,
88
- internet_search=internet_search,
88
+ audio_transcription_real_time=audio_transcription_real_time,
89
89
  ):
90
90
  yield chunk
91
91
 
@@ -24,7 +24,7 @@ class SolanaAgent(ABC):
24
24
  "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
25
25
  ] = "mp4",
26
26
  router: Optional[RoutingInterface] = None,
27
- internet_search: bool = False,
27
+ audio_transcription_real_time: bool = True,
28
28
  ) -> AsyncGenerator[Union[str, bytes], None]:
29
29
  """Process a user message and return the response stream."""
30
30
  pass
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import AsyncGenerator, List, Literal, Type, TypeVar, Union
2
+ from typing import Any, AsyncGenerator, Callable, Dict, List, Literal, Optional, Type, TypeVar, Union
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
@@ -15,7 +15,6 @@ class LLMProvider(ABC):
15
15
  self,
16
16
  prompt: str,
17
17
  system_prompt: str = "",
18
- internet_search: bool = False,
19
18
  ) -> AsyncGenerator[str, None]:
20
19
  """Generate text from the language model."""
21
20
  pass
@@ -50,3 +49,13 @@ class LLMProvider(ABC):
50
49
  ) -> AsyncGenerator[str, None]:
51
50
  """Transcribe audio from the language model."""
52
51
  pass
52
+
53
+ @abstractmethod
54
+ async def realtime_audio_transcription(
55
+ self,
56
+ audio_generator: AsyncGenerator[bytes, None],
57
+ transcription_config: Optional[Dict[str, Any]] = None,
58
+ on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
59
+ ) -> AsyncGenerator[str, None]:
60
+ """Stream real-time audio transcription from the language model."""
61
+ pass
@@ -34,7 +34,7 @@ class AgentService(ABC):
34
34
  "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
35
35
  ] = "mp4",
36
36
  prompt: Optional[str] = None,
37
- internet_search: bool = False,
37
+ audio_transcription_real_time: bool = True,
38
38
  ) -> AsyncGenerator[Union[str, bytes], None]:
39
39
  """Generate a response from an agent."""
40
40
  pass
@@ -1,6 +1,8 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Any, AsyncGenerator, Dict, Literal, Optional, Union
3
3
 
4
+ from solana_agent.interfaces.services.routing import RoutingService as RoutingInterface
5
+
4
6
 
5
7
  class QueryService(ABC):
6
8
  """Interface for processing user queries."""
@@ -20,7 +22,8 @@ class QueryService(ABC):
20
22
  "flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
21
23
  ] = "mp4",
22
24
  prompt: Optional[str] = None,
23
- internet_search: bool = False,
25
+ router: Optional[RoutingInterface] = None,
26
+ audio_transcription_real_time: bool = True,
24
27
  ) -> AsyncGenerator[Union[str, bytes], None]:
25
28
  """Process the user request and generate a response."""
26
29
  pass
@@ -69,8 +69,8 @@ class MemoryRepository(MemoryProvider):
69
69
  # Store truncated messages
70
70
  doc = {
71
71
  "user_id": user_id,
72
- "user_message": self._truncate(user_msg),
73
- "assistant_message": self._truncate(assistant_msg),
72
+ "user_message": user_msg,
73
+ "assistant_message": assistant_msg,
74
74
  "timestamp": datetime.now(timezone.utc)
75
75
  }
76
76
  self.mongo.insert_one(self.collection, doc)