solana-agent 31.1.7__py3-none-any.whl → 31.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,506 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Any, AsyncGenerator, Dict, Optional
6
+
7
+ from solana_agent.interfaces.providers.realtime import (
8
+ BaseRealtimeSession,
9
+ RealtimeSessionOptions,
10
+ )
11
+ from solana_agent.interfaces.providers.audio import AudioTranscoder
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class RealtimeService:
17
+ """High-level service to manage a realtime audio session.
18
+
19
+ Responsibilities:
20
+ - Connect/close a realtime session (WebSocket-based)
21
+ - Update voice and VAD at runtime via session.update
22
+ - Append/commit/clear input audio buffers
23
+ - Expose separate async generators for audio and input/output transcripts
24
+ - Allow out-of-band response.create (e.g., text-to-speech without new audio)
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ session: BaseRealtimeSession,
30
+ options: Optional[RealtimeSessionOptions] = None,
31
+ transcoder: Optional[AudioTranscoder] = None,
32
+ accept_compressed_input: bool = False,
33
+ client_input_mime: str = "audio/mp4",
34
+ encode_output: bool = False,
35
+ client_output_mime: str = "audio/aac",
36
+ ) -> None:
37
+ self._session = session
38
+ self._options = options or RealtimeSessionOptions()
39
+ self._connected = False
40
+ self._lock = asyncio.Lock()
41
+ self._transcoder = transcoder
42
+ # Client-side transport controls (do not affect OpenAI session formats)
43
+ self._accept_compressed_input = accept_compressed_input
44
+ self._client_input_mime = client_input_mime
45
+ self._encode_output = encode_output
46
+ self._client_output_mime = client_output_mime
47
+
48
+ async def start(self) -> None: # pragma: no cover
49
+ async with self._lock:
50
+ if self._connected:
51
+ return
52
+ logger.info("RealtimeService: starting session")
53
+ await self._session.connect()
54
+ self._connected = True
55
+
56
+ async def stop(self) -> None: # pragma: no cover
57
+ async with self._lock:
58
+ if not self._connected:
59
+ return
60
+ logger.info("RealtimeService: stopping session")
61
+ await self._session.close()
62
+ self._connected = False
63
+
64
+ # --- Configuration ---
65
+ async def configure(
66
+ self,
67
+ *,
68
+ voice: Optional[str] = None,
69
+ vad_enabled: Optional[bool] = None,
70
+ instructions: Optional[str] = None,
71
+ input_rate_hz: Optional[int] = None,
72
+ output_rate_hz: Optional[int] = None,
73
+ input_mime: Optional[str] = None,
74
+ output_mime: Optional[str] = None,
75
+ tools: Optional[list[dict[str, Any]]] = None,
76
+ tool_choice: Optional[str] = None,
77
+ ) -> None: # pragma: no cover
78
+ """Update session settings (voice, VAD, formats, tools)."""
79
+ patch: Dict[str, Any] = {}
80
+
81
+ audio_patch: Dict[str, Any] = {}
82
+ if input_mime or input_rate_hz is not None or vad_enabled is not None:
83
+ turn_detection = None
84
+ if vad_enabled is not None:
85
+ if vad_enabled:
86
+ turn_detection = {
87
+ "type": "semantic_vad",
88
+ "create_response": True,
89
+ }
90
+ else:
91
+ turn_detection = None
92
+ audio_patch["input"] = {
93
+ "format": "pcm16", # session is fixed to PCM16 server-side
94
+ "turn_detection": turn_detection,
95
+ }
96
+
97
+ if output_mime or output_rate_hz is not None or voice is not None:
98
+ audio_patch["output"] = {
99
+ "format": "pcm16", # session is fixed to PCM16 server-side
100
+ "voice": voice or self._options.voice,
101
+ "speed": 1.0,
102
+ }
103
+
104
+ if audio_patch:
105
+ patch["audio"] = audio_patch
106
+
107
+ if instructions is not None:
108
+ patch["instructions"] = instructions
109
+ if tools is not None:
110
+ patch["tools"] = tools
111
+ if tool_choice is not None:
112
+ patch["tool_choice"] = tool_choice
113
+
114
+ if patch:
115
+ logger.debug("RealtimeService.configure patch: %s", patch)
116
+ await self._session.update_session(patch)
117
+
118
+ # Update local options snapshot
119
+ if voice is not None:
120
+ self._options.voice = voice
121
+ if vad_enabled is not None:
122
+ self._options.vad_enabled = vad_enabled
123
+ if instructions is not None:
124
+ self._options.instructions = instructions
125
+ if input_rate_hz is not None:
126
+ self._options.input_rate_hz = input_rate_hz
127
+ if output_rate_hz is not None:
128
+ self._options.output_rate_hz = output_rate_hz
129
+ if input_mime is not None:
130
+ self._options.input_mime = input_mime
131
+ if output_mime is not None:
132
+ self._options.output_mime = output_mime
133
+ if tools is not None:
134
+ self._options.tools = tools
135
+ if tool_choice is not None:
136
+ self._options.tool_choice = tool_choice
137
+
138
+ # --- Audio input ---
139
+ async def append_audio(self, chunk_bytes: bytes) -> None: # pragma: no cover
140
+ """Accepts PCM16 by default; if accept_compressed_input is True, transcodes client audio to PCM16.
141
+
142
+ This keeps the server session configured for PCM while allowing mobile clients to send MP4/AAC.
143
+ """
144
+ logger.debug(
145
+ "RealtimeService.append_audio: len=%d, accept_compressed_input=%s, client_input_mime=%s",
146
+ len(chunk_bytes),
147
+ self._accept_compressed_input,
148
+ self._client_input_mime,
149
+ )
150
+ if self._accept_compressed_input:
151
+ if not self._transcoder:
152
+ raise ValueError(
153
+ "Compressed input enabled but no transcoder configured"
154
+ )
155
+ pcm16 = await self._transcoder.to_pcm16(
156
+ chunk_bytes, self._client_input_mime, self._options.input_rate_hz
157
+ )
158
+ await self._session.append_audio(pcm16)
159
+ logger.debug("RealtimeService.append_audio: sent PCM16 len=%d", len(pcm16))
160
+ return
161
+ # Default: pass-through PCM16
162
+ await self._session.append_audio(chunk_bytes)
163
+ logger.debug(
164
+ "RealtimeService.append_audio: sent passthrough len=%d", len(chunk_bytes)
165
+ )
166
+
167
+ async def commit_input(self) -> None: # pragma: no cover
168
+ logger.debug("RealtimeService.commit_input")
169
+ await self._session.commit_input()
170
+
171
+ async def clear_input(self) -> None: # pragma: no cover
172
+ logger.debug("RealtimeService.clear_input")
173
+ await self._session.clear_input()
174
+
175
+ # --- Out-of-band response (e.g., TTS without new audio) ---
176
+ async def create_response( # pragma: no cover
177
+ self, response_patch: Optional[Dict[str, Any]] = None
178
+ ) -> None:
179
+ await self._session.create_response(response_patch)
180
+
181
+ # --- Streams ---
182
+ def iter_events(self) -> AsyncGenerator[Dict[str, Any], None]: # pragma: no cover
183
+ return self._session.iter_events()
184
+
185
+ def iter_output_audio(self) -> AsyncGenerator[bytes, None]: # pragma: no cover
186
+ return self._session.iter_output_audio()
187
+
188
+ async def iter_output_audio_encoded(
189
+ self,
190
+ ) -> AsyncGenerator[bytes, None]: # pragma: no cover
191
+ """Stream PCM16 audio, tolerating long tool executions by waiting while calls are pending.
192
+
193
+ - If no audio arrives immediately, we keep waiting as long as a function/tool call is pending.
194
+ - Bridge across multiple audio segments (e.g., pre-call and post-call responses).
195
+ - Only end the stream when no audio is available and no pending tool call remains.
196
+ """
197
+
198
+ def _has_pending_tool() -> bool:
199
+ try:
200
+ return bool(
201
+ getattr(self._session, "has_pending_tool_call", lambda: False)()
202
+ )
203
+ except Exception:
204
+ return False
205
+
206
+ async def _produce_pcm():
207
+ max_wait_pending_sec = 600.0 # allow up to 10 minutes while tools run
208
+ waited_while_pending = 0.0
209
+ base_idle_timeout = 12.0
210
+ idle_slice = 1.0
211
+
212
+ while True:
213
+ gen = self._session.iter_output_audio()
214
+ try:
215
+ # Inner loop for one segment until generator ends
216
+ while True:
217
+ try:
218
+ chunk = await asyncio.wait_for(
219
+ gen.__anext__(), timeout=idle_slice
220
+ )
221
+ except asyncio.TimeoutError:
222
+ if _has_pending_tool():
223
+ waited_while_pending += idle_slice
224
+ if waited_while_pending <= max_wait_pending_sec:
225
+ continue
226
+ else:
227
+ logger.warning(
228
+ "RealtimeService: exceeded max pending-tool wait; ending stream"
229
+ )
230
+ return
231
+ else:
232
+ # No pending tool: accumulate idle time; stop after base timeout
233
+ waited_while_pending += idle_slice
234
+ if waited_while_pending >= base_idle_timeout:
235
+ logger.warning(
236
+ "RealtimeService: idle with no pending tool; ending stream"
237
+ )
238
+ return
239
+ continue
240
+ # Got a chunk; reset idle counter and yield
241
+ waited_while_pending = 0.0
242
+ if not chunk:
243
+ continue
244
+ yield chunk
245
+ except StopAsyncIteration:
246
+ # Segment ended; if a tool is pending, continue to next segment
247
+ if _has_pending_tool():
248
+ await asyncio.sleep(0.25)
249
+ continue
250
+ # Otherwise, no more audio segments expected
251
+ return
252
+
253
+ if self._encode_output and self._transcoder:
254
+ async for out in self._transcoder.stream_from_pcm16(
255
+ _produce_pcm(), self._client_output_mime, self._options.output_rate_hz
256
+ ):
257
+ yield out
258
+ else:
259
+ async for chunk in _produce_pcm():
260
+ yield chunk
261
+
262
+ def iter_input_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
263
+ return self._session.iter_input_transcript()
264
+
265
+ def iter_output_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
266
+ return self._session.iter_output_transcript()
267
+
268
+
269
+ class TwinRealtimeService:
270
+ """Orchestrates two realtime sessions in parallel:
271
+
272
+ - conversation: full duplex (audio out + assistant transcript, tools, etc.)
273
+ - transcription: transcription-only session per GA (input transcript deltas)
274
+
275
+ Audio input is fanned out to both sessions. Output audio is sourced from the
276
+ conversation session only. Input transcript is sourced from the transcription
277
+ session only. This aligns with the GA guidance to use a dedicated
278
+ transcription session for reliable realtime STT, while the conversation
279
+ session handles assistant speech.
280
+ """
281
+
282
+ def __init__(
283
+ self,
284
+ conversation: BaseRealtimeSession,
285
+ transcription: BaseRealtimeSession,
286
+ *,
287
+ conv_options: Optional[RealtimeSessionOptions] = None,
288
+ trans_options: Optional[RealtimeSessionOptions] = None,
289
+ transcoder: Optional[AudioTranscoder] = None,
290
+ accept_compressed_input: bool = False,
291
+ client_input_mime: str = "audio/mp4",
292
+ encode_output: bool = False,
293
+ client_output_mime: str = "audio/aac",
294
+ ) -> None:
295
+ self._conv = conversation
296
+ self._trans = transcription
297
+ self._conv_opts = conv_options or RealtimeSessionOptions()
298
+ self._trans_opts = trans_options or RealtimeSessionOptions()
299
+ self._transcoder = transcoder
300
+ self._accept_compressed_input = accept_compressed_input
301
+ self._client_input_mime = client_input_mime
302
+ self._encode_output = encode_output
303
+ self._client_output_mime = client_output_mime
304
+ self._connected = False
305
+ self._lock = asyncio.Lock()
306
+
307
+ async def start(self) -> None: # pragma: no cover
308
+ async with self._lock:
309
+ if self._connected:
310
+ return
311
+ logger.info("TwinRealtimeService: starting conversation + transcription")
312
+ await asyncio.gather(self._conv.connect(), self._trans.connect())
313
+ self._connected = True
314
+
315
+ async def stop(self) -> None: # pragma: no cover
316
+ async with self._lock:
317
+ if not self._connected:
318
+ return
319
+ logger.info("TwinRealtimeService: stopping both sessions")
320
+ try:
321
+ await asyncio.gather(self._conv.close(), self._trans.close())
322
+ finally:
323
+ self._connected = False
324
+
325
+ async def reconnect(self) -> None: # pragma: no cover
326
+ async with self._lock:
327
+ try:
328
+ await asyncio.gather(self._conv.close(), self._trans.close())
329
+ except Exception:
330
+ pass
331
+ self._connected = False
332
+ await self.start()
333
+
334
+ async def configure(
335
+ self,
336
+ *,
337
+ voice: Optional[str] = None,
338
+ vad_enabled: Optional[bool] = None,
339
+ instructions: Optional[str] = None,
340
+ input_rate_hz: Optional[int] = None,
341
+ output_rate_hz: Optional[int] = None,
342
+ input_mime: Optional[str] = None,
343
+ output_mime: Optional[str] = None,
344
+ tools: Optional[list[dict[str, Any]]] = None,
345
+ tool_choice: Optional[str] = None,
346
+ ) -> None: # pragma: no cover
347
+ # Only the conversation session needs voice/tools; transcription session
348
+ # already has its own VAD model configured at connect-time.
349
+ patch: Dict[str, Any] = {}
350
+ audio_patch: Dict[str, Any] = {}
351
+ if (
352
+ vad_enabled is not None
353
+ or input_rate_hz is not None
354
+ or input_mime is not None
355
+ ):
356
+ turn_detection = None
357
+ if vad_enabled is not None:
358
+ if vad_enabled:
359
+ turn_detection = {"type": "semantic_vad", "create_response": True}
360
+ else:
361
+ turn_detection = None
362
+ audio_patch["input"] = {"format": "pcm16", "turn_detection": turn_detection}
363
+ if output_rate_hz is not None or output_mime is not None or voice is not None:
364
+ audio_patch["output"] = {
365
+ "format": "pcm16",
366
+ "voice": voice or self._conv_opts.voice,
367
+ "speed": 1.0,
368
+ }
369
+ if audio_patch:
370
+ patch["audio"] = audio_patch
371
+ if instructions is not None:
372
+ patch["instructions"] = instructions
373
+ if tools is not None:
374
+ patch["tools"] = tools
375
+ if tool_choice is not None:
376
+ patch["tool_choice"] = tool_choice
377
+
378
+ if patch:
379
+ logger.debug("TwinRealtimeService.configure patch (conv): %s", patch)
380
+ await self._conv.update_session(patch)
381
+
382
+ # Update local snapshots
383
+ if voice is not None:
384
+ self._conv_opts.voice = voice
385
+ if vad_enabled is not None:
386
+ self._conv_opts.vad_enabled = vad_enabled
387
+ self._trans_opts.vad_enabled = vad_enabled
388
+ if instructions is not None:
389
+ self._conv_opts.instructions = instructions
390
+ if input_rate_hz is not None:
391
+ self._conv_opts.input_rate_hz = input_rate_hz
392
+ self._trans_opts.input_rate_hz = input_rate_hz
393
+ if output_rate_hz is not None:
394
+ self._conv_opts.output_rate_hz = output_rate_hz
395
+ if input_mime is not None:
396
+ self._conv_opts.input_mime = input_mime
397
+ self._trans_opts.input_mime = input_mime
398
+ if output_mime is not None:
399
+ self._conv_opts.output_mime = output_mime
400
+ if tools is not None:
401
+ self._conv_opts.tools = tools
402
+ if tool_choice is not None:
403
+ self._conv_opts.tool_choice = tool_choice
404
+
405
+ async def append_audio(self, chunk_bytes: bytes) -> None: # pragma: no cover
406
+ # Transcode once if needed, then fan out to both
407
+ if self._accept_compressed_input:
408
+ if not self._transcoder:
409
+ raise ValueError(
410
+ "Compressed input enabled but no transcoder configured"
411
+ )
412
+ pcm16 = await self._transcoder.to_pcm16(
413
+ chunk_bytes, self._client_input_mime, self._conv_opts.input_rate_hz
414
+ )
415
+ await asyncio.gather(
416
+ self._conv.append_audio(pcm16), self._trans.append_audio(pcm16)
417
+ )
418
+ return
419
+ await asyncio.gather(
420
+ self._conv.append_audio(chunk_bytes),
421
+ self._trans.append_audio(chunk_bytes),
422
+ )
423
+
424
+ async def commit_input(self) -> None: # pragma: no cover
425
+ await asyncio.gather(self._conv.commit_input(), self._trans.commit_input())
426
+
427
+ async def commit_conversation(self) -> None: # pragma: no cover
428
+ await self._conv.commit_input()
429
+
430
+ async def commit_transcription(self) -> None: # pragma: no cover
431
+ await self._trans.commit_input()
432
+
433
+ async def clear_input(self) -> None: # pragma: no cover
434
+ await asyncio.gather(self._conv.clear_input(), self._trans.clear_input())
435
+
436
+ async def create_response(
437
+ self, response_patch: Optional[Dict[str, Any]] = None
438
+ ) -> None: # pragma: no cover
439
+ # Only conversation session creates assistant responses
440
+ await self._conv.create_response(response_patch)
441
+
442
+ # --- Streams ---
443
+ def iter_events(self) -> AsyncGenerator[Dict[str, Any], None]: # pragma: no cover
444
+ # Prefer conversation events; caller can listen to transcription via iter_input_transcript
445
+ return self._conv.iter_events()
446
+
447
+ def iter_output_audio(self) -> AsyncGenerator[bytes, None]: # pragma: no cover
448
+ return self._conv.iter_output_audio()
449
+
450
+ async def iter_output_audio_encoded(
451
+ self,
452
+ ) -> AsyncGenerator[bytes, None]: # pragma: no cover
453
+ # Reuse the same encoding pipeline as RealtimeService but source from conversation
454
+ pcm_gen = self._conv.iter_output_audio()
455
+
456
+ try:
457
+ first_chunk = await asyncio.wait_for(pcm_gen.__anext__(), timeout=12.0)
458
+ except StopAsyncIteration:
459
+ logger.warning("TwinRealtimeService: no PCM produced (ended immediately)")
460
+ return
461
+ except asyncio.TimeoutError:
462
+ logger.warning("TwinRealtimeService: no PCM within timeout; closing conv")
463
+ try:
464
+ # Close both sessions to ensure clean restart on next turn
465
+ await asyncio.gather(self._conv.close(), self._trans.close())
466
+ self._connected = False
467
+ except Exception:
468
+ pass
469
+ return
470
+
471
+ async def _pcm_iter():
472
+ if first_chunk:
473
+ yield first_chunk
474
+ async for c in pcm_gen:
475
+ if not c:
476
+ continue
477
+ yield c
478
+
479
+ if self._encode_output and self._transcoder:
480
+ async for out in self._transcoder.stream_from_pcm16(
481
+ _pcm_iter(), self._client_output_mime, self._conv_opts.output_rate_hz
482
+ ):
483
+ yield out
484
+ else:
485
+ async for chunk in _pcm_iter():
486
+ yield chunk
487
+
488
+ def iter_input_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
489
+ return self._trans.iter_input_transcript()
490
+
491
+ def iter_output_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
492
+ return self._conv.iter_output_transcript()
493
+
494
+ def iter_transcription_events(
495
+ self,
496
+ ) -> AsyncGenerator[Dict[str, Any], None]: # pragma: no cover
497
+ # Expose transcription session events for completion detection
498
+ return self._trans.iter_events()
499
+
500
+ def is_connected(self) -> bool: # pragma: no cover
501
+ return self._connected
502
+
503
+ def set_tool_executor(self, executor) -> None: # pragma: no cover
504
+ # Forward to conversation session (tools only apply there)
505
+ if hasattr(self._conv, "set_tool_executor"):
506
+ self._conv.set_tool_executor(executor)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: solana-agent
3
- Version: 31.1.7
3
+ Version: 31.2.1
4
4
  Summary: AI Agents for Solana
5
5
  License: MIT
6
6
  Keywords: solana,solana ai,solana agent,ai,ai agent,ai agents
@@ -14,11 +14,11 @@ Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
- Requires-Dist: instructor (==1.11.2)
18
- Requires-Dist: llama-index-core (==0.13.5)
19
- Requires-Dist: llama-index-embeddings-openai (==0.5.0)
20
- Requires-Dist: logfire (==4.3.6)
21
- Requires-Dist: openai (==1.106.1)
17
+ Requires-Dist: instructor (==1.11.3)
18
+ Requires-Dist: llama-index-core (==0.14.0)
19
+ Requires-Dist: llama-index-embeddings-openai (==0.5.1)
20
+ Requires-Dist: logfire (==4.5.0)
21
+ Requires-Dist: openai (==1.107.0)
22
22
  Requires-Dist: pillow (==11.3.0)
23
23
  Requires-Dist: pinecone[asyncio] (==7.3.0)
24
24
  Requires-Dist: pydantic (>=2)
@@ -26,7 +26,8 @@ Requires-Dist: pymongo (==4.14.1)
26
26
  Requires-Dist: pypdf (==6.0.0)
27
27
  Requires-Dist: rich (>=13,<14.0)
28
28
  Requires-Dist: scrubadub (==2.0.1)
29
- Requires-Dist: typer (==0.17.3)
29
+ Requires-Dist: typer (==0.17.4)
30
+ Requires-Dist: websockets (>=13,<16)
30
31
  Requires-Dist: zep-cloud (==3.4.3)
31
32
  Project-URL: Documentation, https://docs.solana-agent.com
32
33
  Project-URL: Homepage, https://solana-agent.com
@@ -52,7 +53,7 @@ Build your AI agents in three lines of code!
52
53
  ## Why?
53
54
  * Three lines of code setup
54
55
  * Simple Agent Definition
55
- * Fast & Streaming Responses
56
+ * Streaming or Realtime Responses
56
57
  * Solana Integration
57
58
  * Multi-Agent Swarm
58
59
  * Multi-Modal (Images & Audio & Text)
@@ -131,6 +132,7 @@ Smart workflows are as easy as combining your tools and prompts.
131
132
  **OpenAI**
132
133
  * [gpt-4.1](https://platform.openai.com/docs/models/gpt-4.1) (agent & router)
133
134
  * [text-embedding-3-large](https://platform.openai.com/docs/models/text-embedding-3-large) (embedding)
135
+ * [gpt-realtime](https://platform.openai.com/docs/models/gpt-realtime) (realtime audio agent)
134
136
  * [tts-1](https://platform.openai.com/docs/models/tts-1) (audio TTS)
135
137
  * [gpt-4o-mini-transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe) (audio transcription)
136
138
 
@@ -307,6 +309,36 @@ async for response in solana_agent.process("user123", audio_content, audio_input
307
309
  print(response, end="")
308
310
  ```
309
311
 
312
+ ### Realtime Audio Streaming
313
+
314
+ If input and/or output is encoded (compressed) like mp4/aac then you must have `ffmpeg` installed.
315
+
316
+ Due to the overhead of the router (API call) - realtime only supports a single agent setup.
317
+
318
+ Realtime uses MongoDB for memory so Zep is not needed.
319
+
320
+ ```python
321
+ from solana_agent import SolanaAgent
322
+
323
+ solana_agent = SolanaAgent(config=config)
324
+
325
+ # Example: mobile sends MP4/AAC; server encodes output to AAC
326
+ audio_content = await audio_file.read() # bytes
327
+ async for audio_chunk in solana_agent.process(
328
+ "user123", # required
329
+ audio_content, # required
330
+ realtime=True, # optional (default False)
331
+ output_format="audio", # required
332
+ vad=True, # enable VAD (optional)
333
+ rt_encode_input=True, # accept compressed input (optional)
334
+ rt_encode_output=True, # encode output for client (optional)
335
+ rt_voice="marin" # the voice to use for interactions (optional)
336
+ audio_input_format="mp4", # client transport (optional)
337
+ audio_output_format="aac" # client transport (optional)
338
+ ):
339
+ handle_audio(audio_chunk)
340
+ ```
341
+
310
342
  ### Image/Text Streaming
311
343
 
312
344
  ```python
@@ -1,28 +1,32 @@
1
1
  solana_agent/__init__.py,sha256=iu0PnSAEZ6qzzHFVu7WVCQVbeCZmiZ6axUDDgWRn1j4,1070
2
2
  solana_agent/adapters/__init__.py,sha256=tiEEuuy0NF3ngc_tGEcRTt71zVI58v3dYY9RvMrF2Cg,204
3
+ solana_agent/adapters/ffmpeg_transcoder.py,sha256=X8k4oz7CWcmhwvzXriatbE15wvQoU1jGdRdk7zp5mSA,9332
3
4
  solana_agent/adapters/mongodb_adapter.py,sha256=Hq3S8VzfLmnPjV40z8yJXGqUamOJcX5GbOMd-1nNWO4,3175
4
- solana_agent/adapters/openai_adapter.py,sha256=QaLOl48DhwtRk6AwRzEcFa_g29iOZW_KU3sCKckgsLE,26502
5
+ solana_agent/adapters/openai_adapter.py,sha256=U3x6fMRmdvfvNt7M9-RKzV835WtPxNGrV1VRBMiRHV8,26714
6
+ solana_agent/adapters/openai_realtime_ws.py,sha256=XTgwWXF4aD38UJSH7PCLIAqt9EuUUwsHRoM1Q6Fq66A,74352
5
7
  solana_agent/adapters/pinecone_adapter.py,sha256=XlfOpoKHwzpaU4KZnovO2TnEYbsw-3B53ZKQDtBeDgU,23847
6
8
  solana_agent/cli.py,sha256=FGvTIQmKLp6XsQdyKtuhIIfbBtMmcCCXfigNrj4bzMc,4704
7
9
  solana_agent/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- solana_agent/client/solana_agent.py,sha256=Ivi18kQaHu8Jp395SNe-dr751AEjmNxkt2dGfVXzVew,9374
10
+ solana_agent/client/solana_agent.py,sha256=hLtiR3xD1eFww7XRdg4dTvxlJnTCepilYmEfABn9L7E,10344
9
11
  solana_agent/domains/__init__.py,sha256=HiC94wVPRy-QDJSSRywCRrhrFfTBeHjfi5z-QfZv46U,168
10
12
  solana_agent/domains/agent.py,sha256=8pAi1-kIgzFNANt3dyQjw-1zbThcNdpEllbAGWi79uI,2841
11
13
  solana_agent/domains/routing.py,sha256=1yR4IswGcmREGgbOOI6TKCfuM7gYGOhQjLkBqnZ-rNo,582
12
14
  solana_agent/factories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- solana_agent/factories/agent_factory.py,sha256=FgwRpATEc_pFZV2My-yoql_nT1SBIqjU8KEPBleWE6M,14128
15
+ solana_agent/factories/agent_factory.py,sha256=d9VuD5E9khqVXU_Qu67zKU2yVvXHK2EmercDmSZ4stk,14226
14
16
  solana_agent/guardrails/pii.py,sha256=FCz1IC3mmkr41QFFf5NaC0fwJrVkwFsxgyOCS2POO5I,4428
15
17
  solana_agent/interfaces/__init__.py,sha256=IQs1WIM1FeKP1-kY2FEfyhol_dB-I-VAe2rD6jrVF6k,355
16
- solana_agent/interfaces/client/client.py,sha256=9hg35-hp_CI-WVGOXehBE1ZCKYahLmbeAvtQOYmML4o,3245
18
+ solana_agent/interfaces/client/client.py,sha256=VWMoxCflhxjwgmaCqDlU5Z9xSWgq0lrHa7ANagfsGVg,3660
17
19
  solana_agent/interfaces/guardrails/guardrails.py,sha256=gZCQ1FrirW-mX6s7FoYrbRs6golsp-x269kk4kQiZzc,572
18
20
  solana_agent/interfaces/plugins/plugins.py,sha256=Rz52cWBLdotwf4kV-2mC79tRYlN29zHSu1z9-y1HVPk,3329
21
+ solana_agent/interfaces/providers/audio.py,sha256=CescIuGBEUQZ4XRyxb_1VYrO9x3Q80ilp-sxpYpxAyQ,1213
19
22
  solana_agent/interfaces/providers/data_storage.py,sha256=Y92Cq8BtC55VlsYLD7bo3ofqQabNnlg7Q4H1Q6CDsLU,1713
20
- solana_agent/interfaces/providers/llm.py,sha256=pvhatUVeaw7OE0iX_6O4LXnd-vtNCsoD-GYRn9uN4pc,3559
23
+ solana_agent/interfaces/providers/llm.py,sha256=nerYO7QcbdSY44_YFqf_f4lftL0HbwC_G_er6oW80tw,3484
21
24
  solana_agent/interfaces/providers/memory.py,sha256=28X1LeS-bEac4yoIXdRPyuRU91oW9Kdt2NZtDmwSTxM,1360
25
+ solana_agent/interfaces/providers/realtime.py,sha256=P0xKgMOWa0Zrp46g_Z9dzbWhjGdzRgnDeman-bc1xyQ,3089
22
26
  solana_agent/interfaces/providers/vector_storage.py,sha256=XPYzvoWrlDVFCS9ItBmoqCFWXXWNYY-d9I7_pvP7YYk,1561
23
- solana_agent/interfaces/services/agent.py,sha256=A-Hmgelr3g_qaNB0PEPMFHxB5nSCBK0WJ5hauJtIcmI,2257
27
+ solana_agent/interfaces/services/agent.py,sha256=Hz3ldNb0NDMp8Rm9E3GM0L3kMAO3XLJ6U2HAh6gdPeU,2176
24
28
  solana_agent/interfaces/services/knowledge_base.py,sha256=Mu8lCGFXPmI_IW5LRGti7octLoWZIg4k5PmGwPfe7LQ,1479
25
- solana_agent/interfaces/services/query.py,sha256=uu_qV-DcMEAjj-XQkIc29-inXgERohui4FXrbJj7tBo,1838
29
+ solana_agent/interfaces/services/query.py,sha256=jk-k6UeBFfWyZdPUr9imYLmlikTAuHegP0oWg2_ioew,2014
26
30
  solana_agent/interfaces/services/routing.py,sha256=Qbn3-DQGVSQKaegHDekSFmn_XCklA0H2f0XUx9-o3wA,367
27
31
  solana_agent/plugins/__init__.py,sha256=coZdgJKq1ExOaj6qB810i3rEhbjdVlrkN76ozt_Ojgo,193
28
32
  solana_agent/plugins/manager.py,sha256=mO_dKSVJ8GToD3wZflMcpKDEBXRoaaMRtY267HENCI0,5542
@@ -30,14 +34,15 @@ solana_agent/plugins/registry.py,sha256=VAG0BWdUUIsEE-VpATtHi8qat7ziPuh7pKuzGXau
30
34
  solana_agent/plugins/tools/__init__.py,sha256=VDjJxvUjefIy10VztQ9WDKgIegvDbIXBQWsHLhxdZ3o,125
31
35
  solana_agent/plugins/tools/auto_tool.py,sha256=uihijtlc9CCqCIaRcwPuuN7o1SHIpWL2GV3vr33GG3E,1576
32
36
  solana_agent/repositories/__init__.py,sha256=fP83w83CGzXLnSdq-C5wbw9EhWTYtqE2lQTgp46-X_4,163
33
- solana_agent/repositories/memory.py,sha256=YYbpUZi9iQ9-RfnlZSiFn5OhtEUyfqXV5n55Kgibf3M,10109
37
+ solana_agent/repositories/memory.py,sha256=cipt9eC5YApi8ozFXAV5xq7QxQJExJmVdgGjkBjHwF0,17279
34
38
  solana_agent/services/__init__.py,sha256=iko0c2MlF8b_SA_nuBGFllr2E3g_JowOrOzGcnU9tkA,162
35
- solana_agent/services/agent.py,sha256=m6nqBFHB7IxUOwK0c2nlH6rt1hsRq4csxbT_oJaLBAA,23438
39
+ solana_agent/services/agent.py,sha256=LWjsdmCeygwmjFoazOCVhrb0hdZHQDEQo_DFWZe57Lk,23133
36
40
  solana_agent/services/knowledge_base.py,sha256=ZvOPrSmcNDgUzz4bJIQ4LeRl9vMZiK9hOfs71IpB7Bk,32735
37
- solana_agent/services/query.py,sha256=X8SpNskx_UkLCPsk_GUFxa3rtmYPLowi7QStxKOzI_c,43829
41
+ solana_agent/services/query.py,sha256=3nNGTNNQm-kYN60mIALIbc8ocWN5hQx8zT-pueKl0kA,67602
42
+ solana_agent/services/realtime.py,sha256=kd9THmffBgquUr4Oho0q8QAa7kPwH6OIR74j7YAoc6s,20592
38
43
  solana_agent/services/routing.py,sha256=hsHe8HSGO_xFc0A17WIOGTidLTfLSfApQw3l2HHqkLo,7614
39
- solana_agent-31.1.7.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
40
- solana_agent-31.1.7.dist-info/METADATA,sha256=Q7bcSMIFKuwOoWvsej9dNBj9v4XVk5wbaTAZPicqX_I,29932
41
- solana_agent-31.1.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
42
- solana_agent-31.1.7.dist-info/entry_points.txt,sha256=-AuT_mfqk8dlZ0pHuAjx1ouAWpTRjpqvEUa6YV3lmc0,53
43
- solana_agent-31.1.7.dist-info/RECORD,,
44
+ solana_agent-31.2.1.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
45
+ solana_agent-31.2.1.dist-info/METADATA,sha256=1GipkxJJmxfMzgAdqtpCpoBKXfsKQBbAPG2Zn0Qx4I4,31240
46
+ solana_agent-31.2.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
47
+ solana_agent-31.2.1.dist-info/entry_points.txt,sha256=-AuT_mfqk8dlZ0pHuAjx1ouAWpTRjpqvEUa6YV3lmc0,53
48
+ solana_agent-31.2.1.dist-info/RECORD,,