solana-agent 31.1.7__py3-none-any.whl → 31.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solana_agent/adapters/ffmpeg_transcoder.py +282 -0
- solana_agent/adapters/openai_adapter.py +5 -0
- solana_agent/adapters/openai_realtime_ws.py +1613 -0
- solana_agent/client/solana_agent.py +29 -3
- solana_agent/factories/agent_factory.py +2 -1
- solana_agent/interfaces/client/client.py +18 -1
- solana_agent/interfaces/providers/audio.py +40 -0
- solana_agent/interfaces/providers/llm.py +0 -1
- solana_agent/interfaces/providers/realtime.py +100 -0
- solana_agent/interfaces/services/agent.py +0 -1
- solana_agent/interfaces/services/query.py +12 -1
- solana_agent/repositories/memory.py +184 -19
- solana_agent/services/agent.py +0 -5
- solana_agent/services/query.py +561 -6
- solana_agent/services/realtime.py +506 -0
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.1.dist-info}/METADATA +40 -8
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.1.dist-info}/RECORD +20 -15
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.1.dist-info}/LICENSE +0 -0
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.1.dist-info}/WHEEL +0 -0
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,506 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import logging
|
5
|
+
from typing import Any, AsyncGenerator, Dict, Optional
|
6
|
+
|
7
|
+
from solana_agent.interfaces.providers.realtime import (
|
8
|
+
BaseRealtimeSession,
|
9
|
+
RealtimeSessionOptions,
|
10
|
+
)
|
11
|
+
from solana_agent.interfaces.providers.audio import AudioTranscoder
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class RealtimeService:
|
17
|
+
"""High-level service to manage a realtime audio session.
|
18
|
+
|
19
|
+
Responsibilities:
|
20
|
+
- Connect/close a realtime session (WebSocket-based)
|
21
|
+
- Update voice and VAD at runtime via session.update
|
22
|
+
- Append/commit/clear input audio buffers
|
23
|
+
- Expose separate async generators for audio and input/output transcripts
|
24
|
+
- Allow out-of-band response.create (e.g., text-to-speech without new audio)
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
session: BaseRealtimeSession,
|
30
|
+
options: Optional[RealtimeSessionOptions] = None,
|
31
|
+
transcoder: Optional[AudioTranscoder] = None,
|
32
|
+
accept_compressed_input: bool = False,
|
33
|
+
client_input_mime: str = "audio/mp4",
|
34
|
+
encode_output: bool = False,
|
35
|
+
client_output_mime: str = "audio/aac",
|
36
|
+
) -> None:
|
37
|
+
self._session = session
|
38
|
+
self._options = options or RealtimeSessionOptions()
|
39
|
+
self._connected = False
|
40
|
+
self._lock = asyncio.Lock()
|
41
|
+
self._transcoder = transcoder
|
42
|
+
# Client-side transport controls (do not affect OpenAI session formats)
|
43
|
+
self._accept_compressed_input = accept_compressed_input
|
44
|
+
self._client_input_mime = client_input_mime
|
45
|
+
self._encode_output = encode_output
|
46
|
+
self._client_output_mime = client_output_mime
|
47
|
+
|
48
|
+
async def start(self) -> None: # pragma: no cover
|
49
|
+
async with self._lock:
|
50
|
+
if self._connected:
|
51
|
+
return
|
52
|
+
logger.info("RealtimeService: starting session")
|
53
|
+
await self._session.connect()
|
54
|
+
self._connected = True
|
55
|
+
|
56
|
+
async def stop(self) -> None: # pragma: no cover
|
57
|
+
async with self._lock:
|
58
|
+
if not self._connected:
|
59
|
+
return
|
60
|
+
logger.info("RealtimeService: stopping session")
|
61
|
+
await self._session.close()
|
62
|
+
self._connected = False
|
63
|
+
|
64
|
+
# --- Configuration ---
|
65
|
+
async def configure(
|
66
|
+
self,
|
67
|
+
*,
|
68
|
+
voice: Optional[str] = None,
|
69
|
+
vad_enabled: Optional[bool] = None,
|
70
|
+
instructions: Optional[str] = None,
|
71
|
+
input_rate_hz: Optional[int] = None,
|
72
|
+
output_rate_hz: Optional[int] = None,
|
73
|
+
input_mime: Optional[str] = None,
|
74
|
+
output_mime: Optional[str] = None,
|
75
|
+
tools: Optional[list[dict[str, Any]]] = None,
|
76
|
+
tool_choice: Optional[str] = None,
|
77
|
+
) -> None: # pragma: no cover
|
78
|
+
"""Update session settings (voice, VAD, formats, tools)."""
|
79
|
+
patch: Dict[str, Any] = {}
|
80
|
+
|
81
|
+
audio_patch: Dict[str, Any] = {}
|
82
|
+
if input_mime or input_rate_hz is not None or vad_enabled is not None:
|
83
|
+
turn_detection = None
|
84
|
+
if vad_enabled is not None:
|
85
|
+
if vad_enabled:
|
86
|
+
turn_detection = {
|
87
|
+
"type": "semantic_vad",
|
88
|
+
"create_response": True,
|
89
|
+
}
|
90
|
+
else:
|
91
|
+
turn_detection = None
|
92
|
+
audio_patch["input"] = {
|
93
|
+
"format": "pcm16", # session is fixed to PCM16 server-side
|
94
|
+
"turn_detection": turn_detection,
|
95
|
+
}
|
96
|
+
|
97
|
+
if output_mime or output_rate_hz is not None or voice is not None:
|
98
|
+
audio_patch["output"] = {
|
99
|
+
"format": "pcm16", # session is fixed to PCM16 server-side
|
100
|
+
"voice": voice or self._options.voice,
|
101
|
+
"speed": 1.0,
|
102
|
+
}
|
103
|
+
|
104
|
+
if audio_patch:
|
105
|
+
patch["audio"] = audio_patch
|
106
|
+
|
107
|
+
if instructions is not None:
|
108
|
+
patch["instructions"] = instructions
|
109
|
+
if tools is not None:
|
110
|
+
patch["tools"] = tools
|
111
|
+
if tool_choice is not None:
|
112
|
+
patch["tool_choice"] = tool_choice
|
113
|
+
|
114
|
+
if patch:
|
115
|
+
logger.debug("RealtimeService.configure patch: %s", patch)
|
116
|
+
await self._session.update_session(patch)
|
117
|
+
|
118
|
+
# Update local options snapshot
|
119
|
+
if voice is not None:
|
120
|
+
self._options.voice = voice
|
121
|
+
if vad_enabled is not None:
|
122
|
+
self._options.vad_enabled = vad_enabled
|
123
|
+
if instructions is not None:
|
124
|
+
self._options.instructions = instructions
|
125
|
+
if input_rate_hz is not None:
|
126
|
+
self._options.input_rate_hz = input_rate_hz
|
127
|
+
if output_rate_hz is not None:
|
128
|
+
self._options.output_rate_hz = output_rate_hz
|
129
|
+
if input_mime is not None:
|
130
|
+
self._options.input_mime = input_mime
|
131
|
+
if output_mime is not None:
|
132
|
+
self._options.output_mime = output_mime
|
133
|
+
if tools is not None:
|
134
|
+
self._options.tools = tools
|
135
|
+
if tool_choice is not None:
|
136
|
+
self._options.tool_choice = tool_choice
|
137
|
+
|
138
|
+
# --- Audio input ---
|
139
|
+
async def append_audio(self, chunk_bytes: bytes) -> None: # pragma: no cover
|
140
|
+
"""Accepts PCM16 by default; if accept_compressed_input is True, transcodes client audio to PCM16.
|
141
|
+
|
142
|
+
This keeps the server session configured for PCM while allowing mobile clients to send MP4/AAC.
|
143
|
+
"""
|
144
|
+
logger.debug(
|
145
|
+
"RealtimeService.append_audio: len=%d, accept_compressed_input=%s, client_input_mime=%s",
|
146
|
+
len(chunk_bytes),
|
147
|
+
self._accept_compressed_input,
|
148
|
+
self._client_input_mime,
|
149
|
+
)
|
150
|
+
if self._accept_compressed_input:
|
151
|
+
if not self._transcoder:
|
152
|
+
raise ValueError(
|
153
|
+
"Compressed input enabled but no transcoder configured"
|
154
|
+
)
|
155
|
+
pcm16 = await self._transcoder.to_pcm16(
|
156
|
+
chunk_bytes, self._client_input_mime, self._options.input_rate_hz
|
157
|
+
)
|
158
|
+
await self._session.append_audio(pcm16)
|
159
|
+
logger.debug("RealtimeService.append_audio: sent PCM16 len=%d", len(pcm16))
|
160
|
+
return
|
161
|
+
# Default: pass-through PCM16
|
162
|
+
await self._session.append_audio(chunk_bytes)
|
163
|
+
logger.debug(
|
164
|
+
"RealtimeService.append_audio: sent passthrough len=%d", len(chunk_bytes)
|
165
|
+
)
|
166
|
+
|
167
|
+
async def commit_input(self) -> None: # pragma: no cover
|
168
|
+
logger.debug("RealtimeService.commit_input")
|
169
|
+
await self._session.commit_input()
|
170
|
+
|
171
|
+
async def clear_input(self) -> None: # pragma: no cover
|
172
|
+
logger.debug("RealtimeService.clear_input")
|
173
|
+
await self._session.clear_input()
|
174
|
+
|
175
|
+
# --- Out-of-band response (e.g., TTS without new audio) ---
|
176
|
+
async def create_response( # pragma: no cover
|
177
|
+
self, response_patch: Optional[Dict[str, Any]] = None
|
178
|
+
) -> None:
|
179
|
+
await self._session.create_response(response_patch)
|
180
|
+
|
181
|
+
# --- Streams ---
|
182
|
+
def iter_events(self) -> AsyncGenerator[Dict[str, Any], None]: # pragma: no cover
|
183
|
+
return self._session.iter_events()
|
184
|
+
|
185
|
+
def iter_output_audio(self) -> AsyncGenerator[bytes, None]: # pragma: no cover
|
186
|
+
return self._session.iter_output_audio()
|
187
|
+
|
188
|
+
async def iter_output_audio_encoded(
|
189
|
+
self,
|
190
|
+
) -> AsyncGenerator[bytes, None]: # pragma: no cover
|
191
|
+
"""Stream PCM16 audio, tolerating long tool executions by waiting while calls are pending.
|
192
|
+
|
193
|
+
- If no audio arrives immediately, we keep waiting as long as a function/tool call is pending.
|
194
|
+
- Bridge across multiple audio segments (e.g., pre-call and post-call responses).
|
195
|
+
- Only end the stream when no audio is available and no pending tool call remains.
|
196
|
+
"""
|
197
|
+
|
198
|
+
def _has_pending_tool() -> bool:
|
199
|
+
try:
|
200
|
+
return bool(
|
201
|
+
getattr(self._session, "has_pending_tool_call", lambda: False)()
|
202
|
+
)
|
203
|
+
except Exception:
|
204
|
+
return False
|
205
|
+
|
206
|
+
async def _produce_pcm():
|
207
|
+
max_wait_pending_sec = 600.0 # allow up to 10 minutes while tools run
|
208
|
+
waited_while_pending = 0.0
|
209
|
+
base_idle_timeout = 12.0
|
210
|
+
idle_slice = 1.0
|
211
|
+
|
212
|
+
while True:
|
213
|
+
gen = self._session.iter_output_audio()
|
214
|
+
try:
|
215
|
+
# Inner loop for one segment until generator ends
|
216
|
+
while True:
|
217
|
+
try:
|
218
|
+
chunk = await asyncio.wait_for(
|
219
|
+
gen.__anext__(), timeout=idle_slice
|
220
|
+
)
|
221
|
+
except asyncio.TimeoutError:
|
222
|
+
if _has_pending_tool():
|
223
|
+
waited_while_pending += idle_slice
|
224
|
+
if waited_while_pending <= max_wait_pending_sec:
|
225
|
+
continue
|
226
|
+
else:
|
227
|
+
logger.warning(
|
228
|
+
"RealtimeService: exceeded max pending-tool wait; ending stream"
|
229
|
+
)
|
230
|
+
return
|
231
|
+
else:
|
232
|
+
# No pending tool: accumulate idle time; stop after base timeout
|
233
|
+
waited_while_pending += idle_slice
|
234
|
+
if waited_while_pending >= base_idle_timeout:
|
235
|
+
logger.warning(
|
236
|
+
"RealtimeService: idle with no pending tool; ending stream"
|
237
|
+
)
|
238
|
+
return
|
239
|
+
continue
|
240
|
+
# Got a chunk; reset idle counter and yield
|
241
|
+
waited_while_pending = 0.0
|
242
|
+
if not chunk:
|
243
|
+
continue
|
244
|
+
yield chunk
|
245
|
+
except StopAsyncIteration:
|
246
|
+
# Segment ended; if a tool is pending, continue to next segment
|
247
|
+
if _has_pending_tool():
|
248
|
+
await asyncio.sleep(0.25)
|
249
|
+
continue
|
250
|
+
# Otherwise, no more audio segments expected
|
251
|
+
return
|
252
|
+
|
253
|
+
if self._encode_output and self._transcoder:
|
254
|
+
async for out in self._transcoder.stream_from_pcm16(
|
255
|
+
_produce_pcm(), self._client_output_mime, self._options.output_rate_hz
|
256
|
+
):
|
257
|
+
yield out
|
258
|
+
else:
|
259
|
+
async for chunk in _produce_pcm():
|
260
|
+
yield chunk
|
261
|
+
|
262
|
+
def iter_input_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
|
263
|
+
return self._session.iter_input_transcript()
|
264
|
+
|
265
|
+
def iter_output_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
|
266
|
+
return self._session.iter_output_transcript()
|
267
|
+
|
268
|
+
|
269
|
+
class TwinRealtimeService:
|
270
|
+
"""Orchestrates two realtime sessions in parallel:
|
271
|
+
|
272
|
+
- conversation: full duplex (audio out + assistant transcript, tools, etc.)
|
273
|
+
- transcription: transcription-only session per GA (input transcript deltas)
|
274
|
+
|
275
|
+
Audio input is fanned out to both sessions. Output audio is sourced from the
|
276
|
+
conversation session only. Input transcript is sourced from the transcription
|
277
|
+
session only. This aligns with the GA guidance to use a dedicated
|
278
|
+
transcription session for reliable realtime STT, while the conversation
|
279
|
+
session handles assistant speech.
|
280
|
+
"""
|
281
|
+
|
282
|
+
def __init__(
|
283
|
+
self,
|
284
|
+
conversation: BaseRealtimeSession,
|
285
|
+
transcription: BaseRealtimeSession,
|
286
|
+
*,
|
287
|
+
conv_options: Optional[RealtimeSessionOptions] = None,
|
288
|
+
trans_options: Optional[RealtimeSessionOptions] = None,
|
289
|
+
transcoder: Optional[AudioTranscoder] = None,
|
290
|
+
accept_compressed_input: bool = False,
|
291
|
+
client_input_mime: str = "audio/mp4",
|
292
|
+
encode_output: bool = False,
|
293
|
+
client_output_mime: str = "audio/aac",
|
294
|
+
) -> None:
|
295
|
+
self._conv = conversation
|
296
|
+
self._trans = transcription
|
297
|
+
self._conv_opts = conv_options or RealtimeSessionOptions()
|
298
|
+
self._trans_opts = trans_options or RealtimeSessionOptions()
|
299
|
+
self._transcoder = transcoder
|
300
|
+
self._accept_compressed_input = accept_compressed_input
|
301
|
+
self._client_input_mime = client_input_mime
|
302
|
+
self._encode_output = encode_output
|
303
|
+
self._client_output_mime = client_output_mime
|
304
|
+
self._connected = False
|
305
|
+
self._lock = asyncio.Lock()
|
306
|
+
|
307
|
+
async def start(self) -> None: # pragma: no cover
|
308
|
+
async with self._lock:
|
309
|
+
if self._connected:
|
310
|
+
return
|
311
|
+
logger.info("TwinRealtimeService: starting conversation + transcription")
|
312
|
+
await asyncio.gather(self._conv.connect(), self._trans.connect())
|
313
|
+
self._connected = True
|
314
|
+
|
315
|
+
async def stop(self) -> None: # pragma: no cover
|
316
|
+
async with self._lock:
|
317
|
+
if not self._connected:
|
318
|
+
return
|
319
|
+
logger.info("TwinRealtimeService: stopping both sessions")
|
320
|
+
try:
|
321
|
+
await asyncio.gather(self._conv.close(), self._trans.close())
|
322
|
+
finally:
|
323
|
+
self._connected = False
|
324
|
+
|
325
|
+
async def reconnect(self) -> None: # pragma: no cover
|
326
|
+
async with self._lock:
|
327
|
+
try:
|
328
|
+
await asyncio.gather(self._conv.close(), self._trans.close())
|
329
|
+
except Exception:
|
330
|
+
pass
|
331
|
+
self._connected = False
|
332
|
+
await self.start()
|
333
|
+
|
334
|
+
async def configure(
|
335
|
+
self,
|
336
|
+
*,
|
337
|
+
voice: Optional[str] = None,
|
338
|
+
vad_enabled: Optional[bool] = None,
|
339
|
+
instructions: Optional[str] = None,
|
340
|
+
input_rate_hz: Optional[int] = None,
|
341
|
+
output_rate_hz: Optional[int] = None,
|
342
|
+
input_mime: Optional[str] = None,
|
343
|
+
output_mime: Optional[str] = None,
|
344
|
+
tools: Optional[list[dict[str, Any]]] = None,
|
345
|
+
tool_choice: Optional[str] = None,
|
346
|
+
) -> None: # pragma: no cover
|
347
|
+
# Only the conversation session needs voice/tools; transcription session
|
348
|
+
# already has its own VAD model configured at connect-time.
|
349
|
+
patch: Dict[str, Any] = {}
|
350
|
+
audio_patch: Dict[str, Any] = {}
|
351
|
+
if (
|
352
|
+
vad_enabled is not None
|
353
|
+
or input_rate_hz is not None
|
354
|
+
or input_mime is not None
|
355
|
+
):
|
356
|
+
turn_detection = None
|
357
|
+
if vad_enabled is not None:
|
358
|
+
if vad_enabled:
|
359
|
+
turn_detection = {"type": "semantic_vad", "create_response": True}
|
360
|
+
else:
|
361
|
+
turn_detection = None
|
362
|
+
audio_patch["input"] = {"format": "pcm16", "turn_detection": turn_detection}
|
363
|
+
if output_rate_hz is not None or output_mime is not None or voice is not None:
|
364
|
+
audio_patch["output"] = {
|
365
|
+
"format": "pcm16",
|
366
|
+
"voice": voice or self._conv_opts.voice,
|
367
|
+
"speed": 1.0,
|
368
|
+
}
|
369
|
+
if audio_patch:
|
370
|
+
patch["audio"] = audio_patch
|
371
|
+
if instructions is not None:
|
372
|
+
patch["instructions"] = instructions
|
373
|
+
if tools is not None:
|
374
|
+
patch["tools"] = tools
|
375
|
+
if tool_choice is not None:
|
376
|
+
patch["tool_choice"] = tool_choice
|
377
|
+
|
378
|
+
if patch:
|
379
|
+
logger.debug("TwinRealtimeService.configure patch (conv): %s", patch)
|
380
|
+
await self._conv.update_session(patch)
|
381
|
+
|
382
|
+
# Update local snapshots
|
383
|
+
if voice is not None:
|
384
|
+
self._conv_opts.voice = voice
|
385
|
+
if vad_enabled is not None:
|
386
|
+
self._conv_opts.vad_enabled = vad_enabled
|
387
|
+
self._trans_opts.vad_enabled = vad_enabled
|
388
|
+
if instructions is not None:
|
389
|
+
self._conv_opts.instructions = instructions
|
390
|
+
if input_rate_hz is not None:
|
391
|
+
self._conv_opts.input_rate_hz = input_rate_hz
|
392
|
+
self._trans_opts.input_rate_hz = input_rate_hz
|
393
|
+
if output_rate_hz is not None:
|
394
|
+
self._conv_opts.output_rate_hz = output_rate_hz
|
395
|
+
if input_mime is not None:
|
396
|
+
self._conv_opts.input_mime = input_mime
|
397
|
+
self._trans_opts.input_mime = input_mime
|
398
|
+
if output_mime is not None:
|
399
|
+
self._conv_opts.output_mime = output_mime
|
400
|
+
if tools is not None:
|
401
|
+
self._conv_opts.tools = tools
|
402
|
+
if tool_choice is not None:
|
403
|
+
self._conv_opts.tool_choice = tool_choice
|
404
|
+
|
405
|
+
async def append_audio(self, chunk_bytes: bytes) -> None: # pragma: no cover
|
406
|
+
# Transcode once if needed, then fan out to both
|
407
|
+
if self._accept_compressed_input:
|
408
|
+
if not self._transcoder:
|
409
|
+
raise ValueError(
|
410
|
+
"Compressed input enabled but no transcoder configured"
|
411
|
+
)
|
412
|
+
pcm16 = await self._transcoder.to_pcm16(
|
413
|
+
chunk_bytes, self._client_input_mime, self._conv_opts.input_rate_hz
|
414
|
+
)
|
415
|
+
await asyncio.gather(
|
416
|
+
self._conv.append_audio(pcm16), self._trans.append_audio(pcm16)
|
417
|
+
)
|
418
|
+
return
|
419
|
+
await asyncio.gather(
|
420
|
+
self._conv.append_audio(chunk_bytes),
|
421
|
+
self._trans.append_audio(chunk_bytes),
|
422
|
+
)
|
423
|
+
|
424
|
+
async def commit_input(self) -> None: # pragma: no cover
|
425
|
+
await asyncio.gather(self._conv.commit_input(), self._trans.commit_input())
|
426
|
+
|
427
|
+
async def commit_conversation(self) -> None: # pragma: no cover
|
428
|
+
await self._conv.commit_input()
|
429
|
+
|
430
|
+
async def commit_transcription(self) -> None: # pragma: no cover
|
431
|
+
await self._trans.commit_input()
|
432
|
+
|
433
|
+
async def clear_input(self) -> None: # pragma: no cover
|
434
|
+
await asyncio.gather(self._conv.clear_input(), self._trans.clear_input())
|
435
|
+
|
436
|
+
async def create_response(
|
437
|
+
self, response_patch: Optional[Dict[str, Any]] = None
|
438
|
+
) -> None: # pragma: no cover
|
439
|
+
# Only conversation session creates assistant responses
|
440
|
+
await self._conv.create_response(response_patch)
|
441
|
+
|
442
|
+
# --- Streams ---
|
443
|
+
def iter_events(self) -> AsyncGenerator[Dict[str, Any], None]: # pragma: no cover
|
444
|
+
# Prefer conversation events; caller can listen to transcription via iter_input_transcript
|
445
|
+
return self._conv.iter_events()
|
446
|
+
|
447
|
+
def iter_output_audio(self) -> AsyncGenerator[bytes, None]: # pragma: no cover
|
448
|
+
return self._conv.iter_output_audio()
|
449
|
+
|
450
|
+
async def iter_output_audio_encoded(
|
451
|
+
self,
|
452
|
+
) -> AsyncGenerator[bytes, None]: # pragma: no cover
|
453
|
+
# Reuse the same encoding pipeline as RealtimeService but source from conversation
|
454
|
+
pcm_gen = self._conv.iter_output_audio()
|
455
|
+
|
456
|
+
try:
|
457
|
+
first_chunk = await asyncio.wait_for(pcm_gen.__anext__(), timeout=12.0)
|
458
|
+
except StopAsyncIteration:
|
459
|
+
logger.warning("TwinRealtimeService: no PCM produced (ended immediately)")
|
460
|
+
return
|
461
|
+
except asyncio.TimeoutError:
|
462
|
+
logger.warning("TwinRealtimeService: no PCM within timeout; closing conv")
|
463
|
+
try:
|
464
|
+
# Close both sessions to ensure clean restart on next turn
|
465
|
+
await asyncio.gather(self._conv.close(), self._trans.close())
|
466
|
+
self._connected = False
|
467
|
+
except Exception:
|
468
|
+
pass
|
469
|
+
return
|
470
|
+
|
471
|
+
async def _pcm_iter():
|
472
|
+
if first_chunk:
|
473
|
+
yield first_chunk
|
474
|
+
async for c in pcm_gen:
|
475
|
+
if not c:
|
476
|
+
continue
|
477
|
+
yield c
|
478
|
+
|
479
|
+
if self._encode_output and self._transcoder:
|
480
|
+
async for out in self._transcoder.stream_from_pcm16(
|
481
|
+
_pcm_iter(), self._client_output_mime, self._conv_opts.output_rate_hz
|
482
|
+
):
|
483
|
+
yield out
|
484
|
+
else:
|
485
|
+
async for chunk in _pcm_iter():
|
486
|
+
yield chunk
|
487
|
+
|
488
|
+
def iter_input_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
|
489
|
+
return self._trans.iter_input_transcript()
|
490
|
+
|
491
|
+
def iter_output_transcript(self) -> AsyncGenerator[str, None]: # pragma: no cover
|
492
|
+
return self._conv.iter_output_transcript()
|
493
|
+
|
494
|
+
def iter_transcription_events(
|
495
|
+
self,
|
496
|
+
) -> AsyncGenerator[Dict[str, Any], None]: # pragma: no cover
|
497
|
+
# Expose transcription session events for completion detection
|
498
|
+
return self._trans.iter_events()
|
499
|
+
|
500
|
+
def is_connected(self) -> bool: # pragma: no cover
|
501
|
+
return self._connected
|
502
|
+
|
503
|
+
def set_tool_executor(self, executor) -> None: # pragma: no cover
|
504
|
+
# Forward to conversation session (tools only apply there)
|
505
|
+
if hasattr(self._conv, "set_tool_executor"):
|
506
|
+
self._conv.set_tool_executor(executor)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: solana-agent
|
3
|
-
Version: 31.1
|
3
|
+
Version: 31.2.1
|
4
4
|
Summary: AI Agents for Solana
|
5
5
|
License: MIT
|
6
6
|
Keywords: solana,solana ai,solana agent,ai,ai agent,ai agents
|
@@ -14,11 +14,11 @@ Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
17
|
-
Requires-Dist: instructor (==1.11.
|
18
|
-
Requires-Dist: llama-index-core (==0.
|
19
|
-
Requires-Dist: llama-index-embeddings-openai (==0.5.
|
20
|
-
Requires-Dist: logfire (==4.
|
21
|
-
Requires-Dist: openai (==1.
|
17
|
+
Requires-Dist: instructor (==1.11.3)
|
18
|
+
Requires-Dist: llama-index-core (==0.14.0)
|
19
|
+
Requires-Dist: llama-index-embeddings-openai (==0.5.1)
|
20
|
+
Requires-Dist: logfire (==4.5.0)
|
21
|
+
Requires-Dist: openai (==1.107.0)
|
22
22
|
Requires-Dist: pillow (==11.3.0)
|
23
23
|
Requires-Dist: pinecone[asyncio] (==7.3.0)
|
24
24
|
Requires-Dist: pydantic (>=2)
|
@@ -26,7 +26,8 @@ Requires-Dist: pymongo (==4.14.1)
|
|
26
26
|
Requires-Dist: pypdf (==6.0.0)
|
27
27
|
Requires-Dist: rich (>=13,<14.0)
|
28
28
|
Requires-Dist: scrubadub (==2.0.1)
|
29
|
-
Requires-Dist: typer (==0.17.
|
29
|
+
Requires-Dist: typer (==0.17.4)
|
30
|
+
Requires-Dist: websockets (>=13,<16)
|
30
31
|
Requires-Dist: zep-cloud (==3.4.3)
|
31
32
|
Project-URL: Documentation, https://docs.solana-agent.com
|
32
33
|
Project-URL: Homepage, https://solana-agent.com
|
@@ -52,7 +53,7 @@ Build your AI agents in three lines of code!
|
|
52
53
|
## Why?
|
53
54
|
* Three lines of code setup
|
54
55
|
* Simple Agent Definition
|
55
|
-
*
|
56
|
+
* Streaming or Realtime Responses
|
56
57
|
* Solana Integration
|
57
58
|
* Multi-Agent Swarm
|
58
59
|
* Multi-Modal (Images & Audio & Text)
|
@@ -131,6 +132,7 @@ Smart workflows are as easy as combining your tools and prompts.
|
|
131
132
|
**OpenAI**
|
132
133
|
* [gpt-4.1](https://platform.openai.com/docs/models/gpt-4.1) (agent & router)
|
133
134
|
* [text-embedding-3-large](https://platform.openai.com/docs/models/text-embedding-3-large) (embedding)
|
135
|
+
* [gpt-realtime](https://platform.openai.com/docs/models/gpt-realtime) (realtime audio agent)
|
134
136
|
* [tts-1](https://platform.openai.com/docs/models/tts-1) (audio TTS)
|
135
137
|
* [gpt-4o-mini-transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe) (audio transcription)
|
136
138
|
|
@@ -307,6 +309,36 @@ async for response in solana_agent.process("user123", audio_content, audio_input
|
|
307
309
|
print(response, end="")
|
308
310
|
```
|
309
311
|
|
312
|
+
### Realtime Audio Streaming
|
313
|
+
|
314
|
+
If input and/or output is encoded (compressed) like mp4/aac then you must have `ffmpeg` installed.
|
315
|
+
|
316
|
+
Due to the overhead of the router (API call) - realtime only supports a single agent setup.
|
317
|
+
|
318
|
+
Realtime uses MongoDB for memory so Zep is not needed.
|
319
|
+
|
320
|
+
```python
|
321
|
+
from solana_agent import SolanaAgent
|
322
|
+
|
323
|
+
solana_agent = SolanaAgent(config=config)
|
324
|
+
|
325
|
+
# Example: mobile sends MP4/AAC; server encodes output to AAC
|
326
|
+
audio_content = await audio_file.read() # bytes
|
327
|
+
async for audio_chunk in solana_agent.process(
|
328
|
+
"user123", # required
|
329
|
+
audio_content, # required
|
330
|
+
realtime=True, # optional (default False)
|
331
|
+
output_format="audio", # required
|
332
|
+
vad=True, # enable VAD (optional)
|
333
|
+
rt_encode_input=True, # accept compressed input (optional)
|
334
|
+
rt_encode_output=True, # encode output for client (optional)
|
335
|
+
rt_voice="marin" # the voice to use for interactions (optional)
|
336
|
+
audio_input_format="mp4", # client transport (optional)
|
337
|
+
audio_output_format="aac" # client transport (optional)
|
338
|
+
):
|
339
|
+
handle_audio(audio_chunk)
|
340
|
+
```
|
341
|
+
|
310
342
|
### Image/Text Streaming
|
311
343
|
|
312
344
|
```python
|
@@ -1,28 +1,32 @@
|
|
1
1
|
solana_agent/__init__.py,sha256=iu0PnSAEZ6qzzHFVu7WVCQVbeCZmiZ6axUDDgWRn1j4,1070
|
2
2
|
solana_agent/adapters/__init__.py,sha256=tiEEuuy0NF3ngc_tGEcRTt71zVI58v3dYY9RvMrF2Cg,204
|
3
|
+
solana_agent/adapters/ffmpeg_transcoder.py,sha256=X8k4oz7CWcmhwvzXriatbE15wvQoU1jGdRdk7zp5mSA,9332
|
3
4
|
solana_agent/adapters/mongodb_adapter.py,sha256=Hq3S8VzfLmnPjV40z8yJXGqUamOJcX5GbOMd-1nNWO4,3175
|
4
|
-
solana_agent/adapters/openai_adapter.py,sha256=
|
5
|
+
solana_agent/adapters/openai_adapter.py,sha256=U3x6fMRmdvfvNt7M9-RKzV835WtPxNGrV1VRBMiRHV8,26714
|
6
|
+
solana_agent/adapters/openai_realtime_ws.py,sha256=XTgwWXF4aD38UJSH7PCLIAqt9EuUUwsHRoM1Q6Fq66A,74352
|
5
7
|
solana_agent/adapters/pinecone_adapter.py,sha256=XlfOpoKHwzpaU4KZnovO2TnEYbsw-3B53ZKQDtBeDgU,23847
|
6
8
|
solana_agent/cli.py,sha256=FGvTIQmKLp6XsQdyKtuhIIfbBtMmcCCXfigNrj4bzMc,4704
|
7
9
|
solana_agent/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
solana_agent/client/solana_agent.py,sha256=
|
10
|
+
solana_agent/client/solana_agent.py,sha256=hLtiR3xD1eFww7XRdg4dTvxlJnTCepilYmEfABn9L7E,10344
|
9
11
|
solana_agent/domains/__init__.py,sha256=HiC94wVPRy-QDJSSRywCRrhrFfTBeHjfi5z-QfZv46U,168
|
10
12
|
solana_agent/domains/agent.py,sha256=8pAi1-kIgzFNANt3dyQjw-1zbThcNdpEllbAGWi79uI,2841
|
11
13
|
solana_agent/domains/routing.py,sha256=1yR4IswGcmREGgbOOI6TKCfuM7gYGOhQjLkBqnZ-rNo,582
|
12
14
|
solana_agent/factories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
solana_agent/factories/agent_factory.py,sha256=
|
15
|
+
solana_agent/factories/agent_factory.py,sha256=d9VuD5E9khqVXU_Qu67zKU2yVvXHK2EmercDmSZ4stk,14226
|
14
16
|
solana_agent/guardrails/pii.py,sha256=FCz1IC3mmkr41QFFf5NaC0fwJrVkwFsxgyOCS2POO5I,4428
|
15
17
|
solana_agent/interfaces/__init__.py,sha256=IQs1WIM1FeKP1-kY2FEfyhol_dB-I-VAe2rD6jrVF6k,355
|
16
|
-
solana_agent/interfaces/client/client.py,sha256=
|
18
|
+
solana_agent/interfaces/client/client.py,sha256=VWMoxCflhxjwgmaCqDlU5Z9xSWgq0lrHa7ANagfsGVg,3660
|
17
19
|
solana_agent/interfaces/guardrails/guardrails.py,sha256=gZCQ1FrirW-mX6s7FoYrbRs6golsp-x269kk4kQiZzc,572
|
18
20
|
solana_agent/interfaces/plugins/plugins.py,sha256=Rz52cWBLdotwf4kV-2mC79tRYlN29zHSu1z9-y1HVPk,3329
|
21
|
+
solana_agent/interfaces/providers/audio.py,sha256=CescIuGBEUQZ4XRyxb_1VYrO9x3Q80ilp-sxpYpxAyQ,1213
|
19
22
|
solana_agent/interfaces/providers/data_storage.py,sha256=Y92Cq8BtC55VlsYLD7bo3ofqQabNnlg7Q4H1Q6CDsLU,1713
|
20
|
-
solana_agent/interfaces/providers/llm.py,sha256=
|
23
|
+
solana_agent/interfaces/providers/llm.py,sha256=nerYO7QcbdSY44_YFqf_f4lftL0HbwC_G_er6oW80tw,3484
|
21
24
|
solana_agent/interfaces/providers/memory.py,sha256=28X1LeS-bEac4yoIXdRPyuRU91oW9Kdt2NZtDmwSTxM,1360
|
25
|
+
solana_agent/interfaces/providers/realtime.py,sha256=P0xKgMOWa0Zrp46g_Z9dzbWhjGdzRgnDeman-bc1xyQ,3089
|
22
26
|
solana_agent/interfaces/providers/vector_storage.py,sha256=XPYzvoWrlDVFCS9ItBmoqCFWXXWNYY-d9I7_pvP7YYk,1561
|
23
|
-
solana_agent/interfaces/services/agent.py,sha256=
|
27
|
+
solana_agent/interfaces/services/agent.py,sha256=Hz3ldNb0NDMp8Rm9E3GM0L3kMAO3XLJ6U2HAh6gdPeU,2176
|
24
28
|
solana_agent/interfaces/services/knowledge_base.py,sha256=Mu8lCGFXPmI_IW5LRGti7octLoWZIg4k5PmGwPfe7LQ,1479
|
25
|
-
solana_agent/interfaces/services/query.py,sha256=
|
29
|
+
solana_agent/interfaces/services/query.py,sha256=jk-k6UeBFfWyZdPUr9imYLmlikTAuHegP0oWg2_ioew,2014
|
26
30
|
solana_agent/interfaces/services/routing.py,sha256=Qbn3-DQGVSQKaegHDekSFmn_XCklA0H2f0XUx9-o3wA,367
|
27
31
|
solana_agent/plugins/__init__.py,sha256=coZdgJKq1ExOaj6qB810i3rEhbjdVlrkN76ozt_Ojgo,193
|
28
32
|
solana_agent/plugins/manager.py,sha256=mO_dKSVJ8GToD3wZflMcpKDEBXRoaaMRtY267HENCI0,5542
|
@@ -30,14 +34,15 @@ solana_agent/plugins/registry.py,sha256=VAG0BWdUUIsEE-VpATtHi8qat7ziPuh7pKuzGXau
|
|
30
34
|
solana_agent/plugins/tools/__init__.py,sha256=VDjJxvUjefIy10VztQ9WDKgIegvDbIXBQWsHLhxdZ3o,125
|
31
35
|
solana_agent/plugins/tools/auto_tool.py,sha256=uihijtlc9CCqCIaRcwPuuN7o1SHIpWL2GV3vr33GG3E,1576
|
32
36
|
solana_agent/repositories/__init__.py,sha256=fP83w83CGzXLnSdq-C5wbw9EhWTYtqE2lQTgp46-X_4,163
|
33
|
-
solana_agent/repositories/memory.py,sha256=
|
37
|
+
solana_agent/repositories/memory.py,sha256=cipt9eC5YApi8ozFXAV5xq7QxQJExJmVdgGjkBjHwF0,17279
|
34
38
|
solana_agent/services/__init__.py,sha256=iko0c2MlF8b_SA_nuBGFllr2E3g_JowOrOzGcnU9tkA,162
|
35
|
-
solana_agent/services/agent.py,sha256=
|
39
|
+
solana_agent/services/agent.py,sha256=LWjsdmCeygwmjFoazOCVhrb0hdZHQDEQo_DFWZe57Lk,23133
|
36
40
|
solana_agent/services/knowledge_base.py,sha256=ZvOPrSmcNDgUzz4bJIQ4LeRl9vMZiK9hOfs71IpB7Bk,32735
|
37
|
-
solana_agent/services/query.py,sha256=
|
41
|
+
solana_agent/services/query.py,sha256=3nNGTNNQm-kYN60mIALIbc8ocWN5hQx8zT-pueKl0kA,67602
|
42
|
+
solana_agent/services/realtime.py,sha256=kd9THmffBgquUr4Oho0q8QAa7kPwH6OIR74j7YAoc6s,20592
|
38
43
|
solana_agent/services/routing.py,sha256=hsHe8HSGO_xFc0A17WIOGTidLTfLSfApQw3l2HHqkLo,7614
|
39
|
-
solana_agent-31.1.
|
40
|
-
solana_agent-31.1.
|
41
|
-
solana_agent-31.1.
|
42
|
-
solana_agent-31.1.
|
43
|
-
solana_agent-31.1.
|
44
|
+
solana_agent-31.2.1.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
|
45
|
+
solana_agent-31.2.1.dist-info/METADATA,sha256=1GipkxJJmxfMzgAdqtpCpoBKXfsKQBbAPG2Zn0Qx4I4,31240
|
46
|
+
solana_agent-31.2.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
47
|
+
solana_agent-31.2.1.dist-info/entry_points.txt,sha256=-AuT_mfqk8dlZ0pHuAjx1ouAWpTRjpqvEUa6YV3lmc0,53
|
48
|
+
solana_agent-31.2.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|