solana-agent 31.2.2__py3-none-any.whl → 31.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solana_agent/adapters/ffmpeg_transcoder.py +61 -4
- solana_agent/adapters/openai_realtime_ws.py +38 -2
- solana_agent/services/query.py +243 -154
- {solana_agent-31.2.2.dist-info → solana_agent-31.2.3.dist-info}/METADATA +29 -17
- {solana_agent-31.2.2.dist-info → solana_agent-31.2.3.dist-info}/RECORD +8 -8
- {solana_agent-31.2.2.dist-info → solana_agent-31.2.3.dist-info}/LICENSE +0 -0
- {solana_agent-31.2.2.dist-info → solana_agent-31.2.3.dist-info}/WHEEL +0 -0
- {solana_agent-31.2.2.dist-info → solana_agent-31.2.3.dist-info}/entry_points.txt +0 -0
@@ -88,13 +88,14 @@ class FFmpegTranscoder(AudioTranscoder):
|
|
88
88
|
async def from_pcm16( # pragma: no cover
|
89
89
|
self, pcm16_bytes: bytes, output_mime: str, rate_hz: int
|
90
90
|
) -> bytes:
|
91
|
-
"""Encode PCM16LE to desired format (
|
91
|
+
"""Encode PCM16LE to desired format (AAC ADTS, fragmented MP4, or MP3)."""
|
92
92
|
logger.info(
|
93
93
|
"Encode from PCM16: output_mime=%s, rate_hz=%d, input_len=%d",
|
94
94
|
output_mime,
|
95
95
|
rate_hz,
|
96
96
|
len(pcm16_bytes),
|
97
97
|
)
|
98
|
+
|
98
99
|
if output_mime in ("audio/mpeg", "audio/mp3"):
|
99
100
|
# Encode to MP3 (often better streaming compatibility on mobile)
|
100
101
|
args = [
|
@@ -122,8 +123,9 @@ class FFmpegTranscoder(AudioTranscoder):
|
|
122
123
|
"Encoded from PCM16 to %s: output_len=%d", output_mime, len(out)
|
123
124
|
)
|
124
125
|
return out
|
125
|
-
|
126
|
-
|
126
|
+
|
127
|
+
if output_mime in ("audio/aac",):
|
128
|
+
# Encode to AAC in ADTS stream; good for streaming over sockets/HTTP chunked
|
127
129
|
args = [
|
128
130
|
"-hide_banner",
|
129
131
|
"-loglevel",
|
@@ -149,6 +151,38 @@ class FFmpegTranscoder(AudioTranscoder):
|
|
149
151
|
"Encoded from PCM16 to %s: output_len=%d", output_mime, len(out)
|
150
152
|
)
|
151
153
|
return out
|
154
|
+
|
155
|
+
if output_mime in ("audio/mp4", "audio/m4a"):
|
156
|
+
# Encode to fragmented MP4 (fMP4) with AAC for better iOS compatibility
|
157
|
+
# For streaming, write an initial moov and fragment over stdout.
|
158
|
+
args = [
|
159
|
+
"-hide_banner",
|
160
|
+
"-loglevel",
|
161
|
+
"error",
|
162
|
+
"-f",
|
163
|
+
"s16le",
|
164
|
+
"-ac",
|
165
|
+
"1",
|
166
|
+
"-ar",
|
167
|
+
str(rate_hz),
|
168
|
+
"-i",
|
169
|
+
"pipe:0",
|
170
|
+
"-c:a",
|
171
|
+
"aac",
|
172
|
+
"-b:a",
|
173
|
+
"96k",
|
174
|
+
"-movflags",
|
175
|
+
"+frag_keyframe+empty_moov",
|
176
|
+
"-f",
|
177
|
+
"mp4",
|
178
|
+
"pipe:1",
|
179
|
+
]
|
180
|
+
out = await self._run_ffmpeg(args, pcm16_bytes)
|
181
|
+
logger.info(
|
182
|
+
"Encoded from PCM16 to %s (fMP4): output_len=%d", output_mime, len(out)
|
183
|
+
)
|
184
|
+
return out
|
185
|
+
|
152
186
|
# Default: passthrough
|
153
187
|
logger.info("Encode passthrough (no change), output_len=%d", len(pcm16_bytes))
|
154
188
|
return pcm16_bytes
|
@@ -187,7 +221,7 @@ class FFmpegTranscoder(AudioTranscoder):
|
|
187
221
|
"mp3",
|
188
222
|
"pipe:1",
|
189
223
|
]
|
190
|
-
elif output_mime in ("audio/aac",
|
224
|
+
elif output_mime in ("audio/aac",):
|
191
225
|
args = [
|
192
226
|
"-hide_banner",
|
193
227
|
"-loglevel",
|
@@ -208,6 +242,29 @@ class FFmpegTranscoder(AudioTranscoder):
|
|
208
242
|
"adts",
|
209
243
|
"pipe:1",
|
210
244
|
]
|
245
|
+
elif output_mime in ("audio/mp4", "audio/m4a"):
|
246
|
+
args = [
|
247
|
+
"-hide_banner",
|
248
|
+
"-loglevel",
|
249
|
+
"error",
|
250
|
+
"-f",
|
251
|
+
"s16le",
|
252
|
+
"-ac",
|
253
|
+
"1",
|
254
|
+
"-ar",
|
255
|
+
str(rate_hz),
|
256
|
+
"-i",
|
257
|
+
"pipe:0",
|
258
|
+
"-c:a",
|
259
|
+
"aac",
|
260
|
+
"-b:a",
|
261
|
+
"96k",
|
262
|
+
"-movflags",
|
263
|
+
"+frag_keyframe+empty_moov",
|
264
|
+
"-f",
|
265
|
+
"mp4",
|
266
|
+
"pipe:1",
|
267
|
+
]
|
211
268
|
else:
|
212
269
|
# Passthrough streaming: just yield input
|
213
270
|
async for chunk in pcm_iter:
|
@@ -325,7 +325,26 @@ class OpenAIRealtimeWebSocketSession(BaseRealtimeSession):
|
|
325
325
|
try:
|
326
326
|
chunk = base64.b64decode(b64)
|
327
327
|
self._audio_queue.put_nowait(chunk)
|
328
|
-
|
328
|
+
# Ownership/response tagging for diagnostics
|
329
|
+
try:
|
330
|
+
owner = getattr(self, "_owner_user_id", None)
|
331
|
+
except Exception:
|
332
|
+
owner = None
|
333
|
+
try:
|
334
|
+
rid = getattr(self, "_active_response_id", None)
|
335
|
+
except Exception:
|
336
|
+
rid = None
|
337
|
+
try:
|
338
|
+
gen = int(getattr(self, "_response_generation", 0))
|
339
|
+
except Exception:
|
340
|
+
gen = None
|
341
|
+
logger.info(
|
342
|
+
"Audio delta bytes=%d owner=%s rid=%s gen=%s",
|
343
|
+
len(chunk),
|
344
|
+
owner,
|
345
|
+
rid,
|
346
|
+
gen,
|
347
|
+
)
|
329
348
|
try:
|
330
349
|
# New response detected if we were previously inactive
|
331
350
|
if not getattr(self, "_response_active", False):
|
@@ -492,8 +511,25 @@ class OpenAIRealtimeWebSocketSession(BaseRealtimeSession):
|
|
492
511
|
"response.audio.done",
|
493
512
|
):
|
494
513
|
# End of audio stream for the response; stop audio iterator but keep WS open for transcripts
|
514
|
+
try:
|
515
|
+
owner = getattr(self, "_owner_user_id", None)
|
516
|
+
except Exception:
|
517
|
+
owner = None
|
518
|
+
try:
|
519
|
+
rid = (data.get("response") or {}).get("id") or getattr(
|
520
|
+
self, "_active_response_id", None
|
521
|
+
)
|
522
|
+
except Exception:
|
523
|
+
rid = None
|
524
|
+
try:
|
525
|
+
gen = int(getattr(self, "_response_generation", 0))
|
526
|
+
except Exception:
|
527
|
+
gen = None
|
495
528
|
logger.info(
|
496
|
-
"Realtime WS: output audio done;
|
529
|
+
"Realtime WS: output audio done; owner=%s rid=%s gen=%s",
|
530
|
+
owner,
|
531
|
+
rid,
|
532
|
+
gen,
|
497
533
|
)
|
498
534
|
# If we have a buffered transcript for this response, flush it now
|
499
535
|
try:
|
solana_agent/services/query.py
CHANGED
@@ -67,10 +67,117 @@ class QueryService(QueryServiceInterface):
|
|
67
67
|
self._sticky_sessions: Dict[str, Dict[str, Any]] = {}
|
68
68
|
# Optional realtime service attached by factory (populated in factory)
|
69
69
|
self.realtime = None # type: ignore[attr-defined]
|
70
|
-
# Persistent realtime WS per user for
|
71
|
-
|
70
|
+
# Persistent realtime WS pool per user for reuse across turns/devices
|
71
|
+
# { user_id: [RealtimeService, ...] }
|
72
|
+
self._rt_services: Dict[str, List[Any]] = {}
|
73
|
+
# Global lock for creating/finding per-user sessions
|
72
74
|
self._rt_lock = asyncio.Lock()
|
73
75
|
|
76
|
+
async def _try_acquire_lock(self, lock: asyncio.Lock) -> bool:
|
77
|
+
try:
|
78
|
+
await asyncio.wait_for(lock.acquire(), timeout=0)
|
79
|
+
return True
|
80
|
+
except asyncio.TimeoutError:
|
81
|
+
return False
|
82
|
+
except Exception:
|
83
|
+
return False
|
84
|
+
|
85
|
+
async def _alloc_realtime_session(
|
86
|
+
self,
|
87
|
+
user_id: str,
|
88
|
+
*,
|
89
|
+
api_key: str,
|
90
|
+
rt_voice: str,
|
91
|
+
final_instructions: str,
|
92
|
+
initial_tools: Optional[List[Dict[str, Any]]],
|
93
|
+
encode_in: bool,
|
94
|
+
encode_out: bool,
|
95
|
+
audio_input_format: str,
|
96
|
+
audio_output_format: str,
|
97
|
+
) -> Any:
|
98
|
+
"""Get a free (or new) realtime session for this user. Marks it busy via an internal lock.
|
99
|
+
|
100
|
+
Returns the RealtimeService with an acquired _in_use_lock that MUST be released by caller.
|
101
|
+
"""
|
102
|
+
from solana_agent.interfaces.providers.realtime import (
|
103
|
+
RealtimeSessionOptions,
|
104
|
+
)
|
105
|
+
from solana_agent.adapters.openai_realtime_ws import (
|
106
|
+
OpenAIRealtimeWebSocketSession,
|
107
|
+
)
|
108
|
+
from solana_agent.adapters.ffmpeg_transcoder import FFmpegTranscoder
|
109
|
+
|
110
|
+
def _mime_from(fmt: str) -> str:
|
111
|
+
f = (fmt or "").lower()
|
112
|
+
return {
|
113
|
+
"aac": "audio/aac",
|
114
|
+
"mp3": "audio/mpeg",
|
115
|
+
"mp4": "audio/mp4",
|
116
|
+
"m4a": "audio/mp4",
|
117
|
+
"mpeg": "audio/mpeg",
|
118
|
+
"mpga": "audio/mpeg",
|
119
|
+
"wav": "audio/wav",
|
120
|
+
"flac": "audio/flac",
|
121
|
+
"opus": "audio/opus",
|
122
|
+
"ogg": "audio/ogg",
|
123
|
+
"webm": "audio/webm",
|
124
|
+
"pcm": "audio/pcm",
|
125
|
+
}.get(f, "audio/pcm")
|
126
|
+
|
127
|
+
async with self._rt_lock:
|
128
|
+
pool = self._rt_services.get(user_id) or []
|
129
|
+
# Try to reuse an idle session strictly owned by this user
|
130
|
+
for rt in pool:
|
131
|
+
# Extra safety: never reuse a session from another user
|
132
|
+
owner = getattr(rt, "_owner_user_id", None)
|
133
|
+
if owner is not None and owner != user_id:
|
134
|
+
continue
|
135
|
+
lock = getattr(rt, "_in_use_lock", None)
|
136
|
+
if lock is None:
|
137
|
+
lock = asyncio.Lock()
|
138
|
+
setattr(rt, "_in_use_lock", lock)
|
139
|
+
if not lock.locked():
|
140
|
+
if await self._try_acquire_lock(lock):
|
141
|
+
return rt
|
142
|
+
# None free: create a new session
|
143
|
+
opts = RealtimeSessionOptions(
|
144
|
+
model="gpt-realtime",
|
145
|
+
voice=rt_voice,
|
146
|
+
vad_enabled=False,
|
147
|
+
input_rate_hz=24000,
|
148
|
+
output_rate_hz=24000,
|
149
|
+
input_mime="audio/pcm",
|
150
|
+
output_mime="audio/pcm",
|
151
|
+
tools=initial_tools or None,
|
152
|
+
tool_choice="auto",
|
153
|
+
)
|
154
|
+
try:
|
155
|
+
opts.instructions = final_instructions
|
156
|
+
opts.voice = rt_voice
|
157
|
+
except Exception:
|
158
|
+
pass
|
159
|
+
conv_session = OpenAIRealtimeWebSocketSession(api_key=api_key, options=opts)
|
160
|
+
transcoder = FFmpegTranscoder() if (encode_in or encode_out) else None
|
161
|
+
from solana_agent.services.realtime import RealtimeService
|
162
|
+
|
163
|
+
rt = RealtimeService(
|
164
|
+
session=conv_session,
|
165
|
+
options=opts,
|
166
|
+
transcoder=transcoder,
|
167
|
+
accept_compressed_input=encode_in,
|
168
|
+
client_input_mime=_mime_from(audio_input_format),
|
169
|
+
encode_output=encode_out,
|
170
|
+
client_output_mime=_mime_from(audio_output_format),
|
171
|
+
)
|
172
|
+
# Tag ownership to prevent any cross-user reuse
|
173
|
+
setattr(rt, "_owner_user_id", user_id)
|
174
|
+
setattr(rt, "_in_use_lock", asyncio.Lock())
|
175
|
+
# Mark busy
|
176
|
+
await getattr(rt, "_in_use_lock").acquire()
|
177
|
+
pool.append(rt)
|
178
|
+
self._rt_services[user_id] = pool
|
179
|
+
return rt
|
180
|
+
|
74
181
|
def _get_sticky_agent(self, user_id: str) -> Optional[str]:
|
75
182
|
sess = self._sticky_sessions.get(user_id)
|
76
183
|
return sess.get("agent") if isinstance(sess, dict) else None
|
@@ -554,14 +661,7 @@ class QueryService(QueryServiceInterface):
|
|
554
661
|
final_instructions = "\n\n".join([p for p in parts if p])
|
555
662
|
|
556
663
|
# 4) Open a single WS session for assistant audio
|
557
|
-
|
558
|
-
OpenAIRealtimeWebSocketSession,
|
559
|
-
)
|
560
|
-
from solana_agent.interfaces.providers.realtime import (
|
561
|
-
RealtimeSessionOptions,
|
562
|
-
)
|
563
|
-
from solana_agent.services.realtime import RealtimeService
|
564
|
-
from solana_agent.adapters.ffmpeg_transcoder import FFmpegTranscoder
|
664
|
+
# Realtime imports handled inside allocator helper
|
565
665
|
|
566
666
|
api_key = None
|
567
667
|
try:
|
@@ -600,171 +700,160 @@ class QueryService(QueryServiceInterface):
|
|
600
700
|
or (is_audio_bytes and audio_input_format.lower() != "pcm")
|
601
701
|
)
|
602
702
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
703
|
+
# Allocate or reuse a realtime session for this specific request/user
|
704
|
+
rt = await self._alloc_realtime_session(
|
705
|
+
user_id,
|
706
|
+
api_key=api_key,
|
707
|
+
rt_voice=rt_voice,
|
708
|
+
final_instructions=final_instructions,
|
709
|
+
initial_tools=initial_tools,
|
710
|
+
encode_in=encode_in,
|
711
|
+
encode_out=encode_out,
|
712
|
+
audio_input_format=audio_input_format,
|
713
|
+
audio_output_format=audio_output_format,
|
714
|
+
)
|
715
|
+
# Ensure lock is released no matter what
|
716
|
+
try:
|
717
|
+
# Tool executor
|
718
|
+
async def _exec(
|
719
|
+
tool_name: str, args: Dict[str, Any]
|
720
|
+
) -> Dict[str, Any]:
|
618
721
|
try:
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
api_key=api_key, options=opts
|
625
|
-
)
|
626
|
-
transcoder = (
|
627
|
-
FFmpegTranscoder() if (encode_in or encode_out) else None
|
628
|
-
)
|
629
|
-
rt = RealtimeService(
|
630
|
-
session=conv_session,
|
631
|
-
options=opts,
|
632
|
-
transcoder=transcoder,
|
633
|
-
accept_compressed_input=encode_in,
|
634
|
-
client_input_mime=_mime_from(audio_input_format),
|
635
|
-
encode_output=encode_out,
|
636
|
-
client_output_mime=_mime_from(audio_output_format),
|
637
|
-
)
|
638
|
-
self._rt_services[user_id] = rt
|
722
|
+
return await self.agent_service.execute_tool(
|
723
|
+
agent_name, tool_name, args or {}
|
724
|
+
)
|
725
|
+
except Exception as e:
|
726
|
+
return {"status": "error", "message": str(e)}
|
639
727
|
|
640
|
-
|
641
|
-
async def _exec(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
|
728
|
+
# If possible, set on underlying session
|
642
729
|
try:
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
return {"status": "error", "message": str(e)}
|
648
|
-
|
649
|
-
# If possible, set on underlying session
|
650
|
-
try:
|
651
|
-
if hasattr(rt, "_session"):
|
652
|
-
getattr(rt, "_session").set_tool_executor(_exec) # type: ignore[attr-defined]
|
653
|
-
except Exception:
|
654
|
-
pass
|
655
|
-
|
656
|
-
# Connect/configure
|
657
|
-
if not getattr(rt, "_connected", False):
|
658
|
-
await rt.start()
|
659
|
-
await rt.configure(
|
660
|
-
voice=rt_voice,
|
661
|
-
vad_enabled=bool(vad) if vad is not None else False,
|
662
|
-
instructions=final_instructions,
|
663
|
-
tools=initial_tools or None,
|
664
|
-
tool_choice="auto",
|
665
|
-
)
|
730
|
+
if hasattr(rt, "_session"):
|
731
|
+
getattr(rt, "_session").set_tool_executor(_exec) # type: ignore[attr-defined]
|
732
|
+
except Exception:
|
733
|
+
pass
|
666
734
|
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
pass
|
735
|
+
# Connect/configure
|
736
|
+
if not getattr(rt, "_connected", False):
|
737
|
+
await rt.start()
|
738
|
+
await rt.configure(
|
739
|
+
voice=rt_voice,
|
740
|
+
vad_enabled=bool(vad) if vad is not None else False,
|
741
|
+
instructions=final_instructions,
|
742
|
+
tools=initial_tools or None,
|
743
|
+
tool_choice="auto",
|
744
|
+
)
|
678
745
|
|
679
|
-
|
680
|
-
|
681
|
-
|
746
|
+
# Ensure clean input buffers for this turn
|
747
|
+
try:
|
748
|
+
await rt.clear_input()
|
749
|
+
except Exception:
|
750
|
+
pass
|
751
|
+
# Also reset any leftover output audio so new turn doesn't replay old chunks
|
682
752
|
try:
|
683
|
-
|
753
|
+
if hasattr(rt, "reset_output_stream"):
|
754
|
+
rt.reset_output_stream()
|
684
755
|
except Exception:
|
685
756
|
pass
|
686
757
|
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
758
|
+
# Persist once per turn
|
759
|
+
turn_id = await self.realtime_begin_turn(user_id)
|
760
|
+
if turn_id and user_text:
|
761
|
+
try:
|
762
|
+
await self.realtime_update_user(user_id, turn_id, user_text)
|
763
|
+
except Exception:
|
764
|
+
pass
|
765
|
+
|
766
|
+
# Feed audio into WS if audio bytes provided; else use input_text
|
767
|
+
if is_audio_bytes:
|
768
|
+
bq = bytes(query)
|
769
|
+
logger.info(
|
770
|
+
"Realtime: appending input audio to WS via FFmpeg, len=%d, fmt=%s",
|
771
|
+
len(bq),
|
772
|
+
audio_input_format,
|
773
|
+
)
|
774
|
+
await rt.append_audio(bq)
|
775
|
+
vad_enabled_value = bool(vad) if vad is not None else False
|
776
|
+
if not vad_enabled_value:
|
777
|
+
await rt.commit_input()
|
778
|
+
# Manually trigger response when VAD is disabled
|
779
|
+
await rt.create_response({})
|
780
|
+
else:
|
781
|
+
# With server VAD enabled, the model will auto-create a response at end of speech
|
782
|
+
logger.debug(
|
783
|
+
"Realtime: VAD enabled — skipping manual response.create"
|
784
|
+
)
|
701
785
|
else:
|
702
|
-
#
|
703
|
-
|
704
|
-
|
786
|
+
# Rely on configured session voice; attach input_text only
|
787
|
+
await rt.create_response(
|
788
|
+
{
|
789
|
+
"modalities": ["audio"],
|
790
|
+
"input": [
|
791
|
+
{"type": "input_text", "text": user_text or ""}
|
792
|
+
],
|
793
|
+
}
|
705
794
|
)
|
706
|
-
else:
|
707
|
-
# Rely on configured session voice; attach input_text only
|
708
|
-
await rt.create_response(
|
709
|
-
{
|
710
|
-
"modalities": ["audio"],
|
711
|
-
"input": [{"type": "input_text", "text": user_text or ""}],
|
712
|
-
}
|
713
|
-
)
|
714
795
|
|
715
|
-
|
716
|
-
|
717
|
-
|
796
|
+
# Collect audio and transcripts
|
797
|
+
user_tr = ""
|
798
|
+
asst_tr = ""
|
718
799
|
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
800
|
+
async def _drain_in_tr():
|
801
|
+
nonlocal user_tr
|
802
|
+
async for t in rt.iter_input_transcript():
|
803
|
+
if t:
|
804
|
+
user_tr += t
|
724
805
|
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
806
|
+
async def _drain_out_tr():
|
807
|
+
nonlocal asst_tr
|
808
|
+
async for t in rt.iter_output_transcript():
|
809
|
+
if t:
|
810
|
+
asst_tr += t
|
730
811
|
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
812
|
+
in_task = asyncio.create_task(_drain_in_tr())
|
813
|
+
out_task = asyncio.create_task(_drain_out_tr())
|
814
|
+
try:
|
815
|
+
async for audio_chunk in rt.iter_output_audio_encoded():
|
816
|
+
yield audio_chunk
|
817
|
+
finally:
|
818
|
+
in_task.cancel()
|
819
|
+
out_task.cancel()
|
820
|
+
# If no WS input transcript was captured, fall back to HTTP STT result
|
821
|
+
if not user_tr:
|
822
|
+
try:
|
823
|
+
if "stt_task" in locals() and stt_task is not None:
|
824
|
+
user_tr = await stt_task
|
825
|
+
except Exception:
|
826
|
+
pass
|
827
|
+
if turn_id:
|
828
|
+
try:
|
829
|
+
if user_tr:
|
830
|
+
await self.realtime_update_user(
|
831
|
+
user_id, turn_id, user_tr
|
832
|
+
)
|
833
|
+
if asst_tr:
|
834
|
+
await self.realtime_update_assistant(
|
835
|
+
user_id, turn_id, asst_tr
|
836
|
+
)
|
837
|
+
except Exception:
|
838
|
+
pass
|
839
|
+
try:
|
840
|
+
await self.realtime_finalize_turn(user_id, turn_id)
|
841
|
+
except Exception:
|
842
|
+
pass
|
843
|
+
# Clear input buffer for next turn reuse
|
758
844
|
try:
|
759
|
-
await
|
845
|
+
await rt.clear_input()
|
760
846
|
except Exception:
|
761
847
|
pass
|
762
|
-
|
848
|
+
finally:
|
849
|
+
# Always release the session for reuse by other concurrent requests/devices
|
763
850
|
try:
|
764
|
-
|
851
|
+
lock = getattr(rt, "_in_use_lock", None)
|
852
|
+
if lock and lock.locked():
|
853
|
+
lock.release()
|
765
854
|
except Exception:
|
766
855
|
pass
|
767
|
-
|
856
|
+
return
|
768
857
|
|
769
858
|
# 1) Transcribe audio or accept text
|
770
859
|
user_text = ""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: solana-agent
|
3
|
-
Version: 31.2.
|
3
|
+
Version: 31.2.3
|
4
4
|
Summary: AI Agents for Solana
|
5
5
|
License: MIT
|
6
6
|
Keywords: solana,solana ai,solana agent,ai,ai agent,ai agents
|
@@ -317,27 +317,39 @@ Due to the overhead of the router (API call) - realtime only supports a single a
|
|
317
317
|
|
318
318
|
Realtime uses MongoDB for memory so Zep is not needed.
|
319
319
|
|
320
|
+
This example will work using expo-audio on Android and iOS.
|
321
|
+
|
320
322
|
```python
|
321
323
|
from solana_agent import SolanaAgent
|
322
324
|
|
323
325
|
solana_agent = SolanaAgent(config=config)
|
324
326
|
|
325
|
-
|
326
|
-
|
327
|
-
async
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
):
|
339
|
-
|
340
|
-
|
327
|
+
audio_content = await audio_file.read()
|
328
|
+
|
329
|
+
async def generate():
|
330
|
+
async for chunk in solana_agent.process(
|
331
|
+
user_id=user_id,
|
332
|
+
message=audio_content,
|
333
|
+
realtime=True,
|
334
|
+
rt_encode_input=True,
|
335
|
+
rt_encode_output=True,
|
336
|
+
rt_voice="marin",
|
337
|
+
output_format="audio",
|
338
|
+
audio_output_format="m4a",
|
339
|
+
audio_input_format="mp4",
|
340
|
+
):
|
341
|
+
yield chunk
|
342
|
+
|
343
|
+
return StreamingResponse(
|
344
|
+
content=generate(),
|
345
|
+
media_type="audio/mp4",
|
346
|
+
headers={
|
347
|
+
"Cache-Control": "no-store",
|
348
|
+
"Pragma": "no-cache",
|
349
|
+
"Content-Disposition": "inline; filename=stream.m4a",
|
350
|
+
"X-Accel-Buffering": "no",
|
351
|
+
},
|
352
|
+
)
|
341
353
|
|
342
354
|
### Image/Text Streaming
|
343
355
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
solana_agent/__init__.py,sha256=iu0PnSAEZ6qzzHFVu7WVCQVbeCZmiZ6axUDDgWRn1j4,1070
|
2
2
|
solana_agent/adapters/__init__.py,sha256=tiEEuuy0NF3ngc_tGEcRTt71zVI58v3dYY9RvMrF2Cg,204
|
3
|
-
solana_agent/adapters/ffmpeg_transcoder.py,sha256=
|
3
|
+
solana_agent/adapters/ffmpeg_transcoder.py,sha256=uq5tp5b2TMvkYptwvQ9_GX4n87mClILMA8SJGNxRWRc,10881
|
4
4
|
solana_agent/adapters/mongodb_adapter.py,sha256=Hq3S8VzfLmnPjV40z8yJXGqUamOJcX5GbOMd-1nNWO4,3175
|
5
5
|
solana_agent/adapters/openai_adapter.py,sha256=U3x6fMRmdvfvNt7M9-RKzV835WtPxNGrV1VRBMiRHV8,26714
|
6
|
-
solana_agent/adapters/openai_realtime_ws.py,sha256=
|
6
|
+
solana_agent/adapters/openai_realtime_ws.py,sha256=2AFtfJGft5OE8oYZMRKVw89Yg-a4Wgc7bIrMYeRT49g,77252
|
7
7
|
solana_agent/adapters/pinecone_adapter.py,sha256=XlfOpoKHwzpaU4KZnovO2TnEYbsw-3B53ZKQDtBeDgU,23847
|
8
8
|
solana_agent/cli.py,sha256=FGvTIQmKLp6XsQdyKtuhIIfbBtMmcCCXfigNrj4bzMc,4704
|
9
9
|
solana_agent/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -38,11 +38,11 @@ solana_agent/repositories/memory.py,sha256=cipt9eC5YApi8ozFXAV5xq7QxQJExJmVdgGjk
|
|
38
38
|
solana_agent/services/__init__.py,sha256=iko0c2MlF8b_SA_nuBGFllr2E3g_JowOrOzGcnU9tkA,162
|
39
39
|
solana_agent/services/agent.py,sha256=LWjsdmCeygwmjFoazOCVhrb0hdZHQDEQo_DFWZe57Lk,23133
|
40
40
|
solana_agent/services/knowledge_base.py,sha256=ZvOPrSmcNDgUzz4bJIQ4LeRl9vMZiK9hOfs71IpB7Bk,32735
|
41
|
-
solana_agent/services/query.py,sha256=
|
41
|
+
solana_agent/services/query.py,sha256=oqNFbQsz2FiSswGkt8ZlNaOR8DAz66hgWXD5kHc7c-M,71428
|
42
42
|
solana_agent/services/realtime.py,sha256=6_44-JaKN0V4gkizaisGLPsopM5Z8xymQcCbq5V3yEc,21054
|
43
43
|
solana_agent/services/routing.py,sha256=hsHe8HSGO_xFc0A17WIOGTidLTfLSfApQw3l2HHqkLo,7614
|
44
|
-
solana_agent-31.2.
|
45
|
-
solana_agent-31.2.
|
46
|
-
solana_agent-31.2.
|
47
|
-
solana_agent-31.2.
|
48
|
-
solana_agent-31.2.
|
44
|
+
solana_agent-31.2.3.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
|
45
|
+
solana_agent-31.2.3.dist-info/METADATA,sha256=FiPGGHeW_QJ6tpSDreJKXFDz5Abbe0bSTZFW0V7C4gM,31168
|
46
|
+
solana_agent-31.2.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
47
|
+
solana_agent-31.2.3.dist-info/entry_points.txt,sha256=-AuT_mfqk8dlZ0pHuAjx1ouAWpTRjpqvEUa6YV3lmc0,53
|
48
|
+
solana_agent-31.2.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|