solana-agent 31.2.2__py3-none-any.whl → 31.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,13 +88,14 @@ class FFmpegTranscoder(AudioTranscoder):
88
88
  async def from_pcm16( # pragma: no cover
89
89
  self, pcm16_bytes: bytes, output_mime: str, rate_hz: int
90
90
  ) -> bytes:
91
- """Encode PCM16LE to desired format (currently AAC ADTS for mobile streaming)."""
91
+ """Encode PCM16LE to desired format (AAC ADTS, fragmented MP4, or MP3)."""
92
92
  logger.info(
93
93
  "Encode from PCM16: output_mime=%s, rate_hz=%d, input_len=%d",
94
94
  output_mime,
95
95
  rate_hz,
96
96
  len(pcm16_bytes),
97
97
  )
98
+
98
99
  if output_mime in ("audio/mpeg", "audio/mp3"):
99
100
  # Encode to MP3 (often better streaming compatibility on mobile)
100
101
  args = [
@@ -122,8 +123,9 @@ class FFmpegTranscoder(AudioTranscoder):
122
123
  "Encoded from PCM16 to %s: output_len=%d", output_mime, len(out)
123
124
  )
124
125
  return out
125
- if output_mime in ("audio/aac", "audio/mp4", "audio/m4a"):
126
- # Encode to AAC in ADTS stream; clients can play it as AAC.
126
+
127
+ if output_mime in ("audio/aac",):
128
+ # Encode to AAC in ADTS stream; good for streaming over sockets/HTTP chunked
127
129
  args = [
128
130
  "-hide_banner",
129
131
  "-loglevel",
@@ -149,6 +151,38 @@ class FFmpegTranscoder(AudioTranscoder):
149
151
  "Encoded from PCM16 to %s: output_len=%d", output_mime, len(out)
150
152
  )
151
153
  return out
154
+
155
+ if output_mime in ("audio/mp4", "audio/m4a"):
156
+ # Encode to fragmented MP4 (fMP4) with AAC for better iOS compatibility
157
+ # For streaming, write an initial moov and fragment over stdout.
158
+ args = [
159
+ "-hide_banner",
160
+ "-loglevel",
161
+ "error",
162
+ "-f",
163
+ "s16le",
164
+ "-ac",
165
+ "1",
166
+ "-ar",
167
+ str(rate_hz),
168
+ "-i",
169
+ "pipe:0",
170
+ "-c:a",
171
+ "aac",
172
+ "-b:a",
173
+ "96k",
174
+ "-movflags",
175
+ "+frag_keyframe+empty_moov",
176
+ "-f",
177
+ "mp4",
178
+ "pipe:1",
179
+ ]
180
+ out = await self._run_ffmpeg(args, pcm16_bytes)
181
+ logger.info(
182
+ "Encoded from PCM16 to %s (fMP4): output_len=%d", output_mime, len(out)
183
+ )
184
+ return out
185
+
152
186
  # Default: passthrough
153
187
  logger.info("Encode passthrough (no change), output_len=%d", len(pcm16_bytes))
154
188
  return pcm16_bytes
@@ -187,7 +221,7 @@ class FFmpegTranscoder(AudioTranscoder):
187
221
  "mp3",
188
222
  "pipe:1",
189
223
  ]
190
- elif output_mime in ("audio/aac", "audio/mp4", "audio/m4a"):
224
+ elif output_mime in ("audio/aac",):
191
225
  args = [
192
226
  "-hide_banner",
193
227
  "-loglevel",
@@ -208,6 +242,29 @@ class FFmpegTranscoder(AudioTranscoder):
208
242
  "adts",
209
243
  "pipe:1",
210
244
  ]
245
+ elif output_mime in ("audio/mp4", "audio/m4a"):
246
+ args = [
247
+ "-hide_banner",
248
+ "-loglevel",
249
+ "error",
250
+ "-f",
251
+ "s16le",
252
+ "-ac",
253
+ "1",
254
+ "-ar",
255
+ str(rate_hz),
256
+ "-i",
257
+ "pipe:0",
258
+ "-c:a",
259
+ "aac",
260
+ "-b:a",
261
+ "96k",
262
+ "-movflags",
263
+ "+frag_keyframe+empty_moov",
264
+ "-f",
265
+ "mp4",
266
+ "pipe:1",
267
+ ]
211
268
  else:
212
269
  # Passthrough streaming: just yield input
213
270
  async for chunk in pcm_iter:
@@ -325,7 +325,26 @@ class OpenAIRealtimeWebSocketSession(BaseRealtimeSession):
325
325
  try:
326
326
  chunk = base64.b64decode(b64)
327
327
  self._audio_queue.put_nowait(chunk)
328
- logger.info("Audio delta bytes=%d", len(chunk))
328
+ # Ownership/response tagging for diagnostics
329
+ try:
330
+ owner = getattr(self, "_owner_user_id", None)
331
+ except Exception:
332
+ owner = None
333
+ try:
334
+ rid = getattr(self, "_active_response_id", None)
335
+ except Exception:
336
+ rid = None
337
+ try:
338
+ gen = int(getattr(self, "_response_generation", 0))
339
+ except Exception:
340
+ gen = None
341
+ logger.info(
342
+ "Audio delta bytes=%d owner=%s rid=%s gen=%s",
343
+ len(chunk),
344
+ owner,
345
+ rid,
346
+ gen,
347
+ )
329
348
  try:
330
349
  # New response detected if we were previously inactive
331
350
  if not getattr(self, "_response_active", False):
@@ -492,8 +511,25 @@ class OpenAIRealtimeWebSocketSession(BaseRealtimeSession):
492
511
  "response.audio.done",
493
512
  ):
494
513
  # End of audio stream for the response; stop audio iterator but keep WS open for transcripts
514
+ try:
515
+ owner = getattr(self, "_owner_user_id", None)
516
+ except Exception:
517
+ owner = None
518
+ try:
519
+ rid = (data.get("response") or {}).get("id") or getattr(
520
+ self, "_active_response_id", None
521
+ )
522
+ except Exception:
523
+ rid = None
524
+ try:
525
+ gen = int(getattr(self, "_response_generation", 0))
526
+ except Exception:
527
+ gen = None
495
528
  logger.info(
496
- "Realtime WS: output audio done; ending audio stream"
529
+ "Realtime WS: output audio done; owner=%s rid=%s gen=%s",
530
+ owner,
531
+ rid,
532
+ gen,
497
533
  )
498
534
  # If we have a buffered transcript for this response, flush it now
499
535
  try:
@@ -67,10 +67,117 @@ class QueryService(QueryServiceInterface):
67
67
  self._sticky_sessions: Dict[str, Dict[str, Any]] = {}
68
68
  # Optional realtime service attached by factory (populated in factory)
69
69
  self.realtime = None # type: ignore[attr-defined]
70
- # Persistent realtime WS per user for push-to-talk reuse
71
- self._rt_services = {}
70
+ # Persistent realtime WS pool per user for reuse across turns/devices
71
+ # { user_id: [RealtimeService, ...] }
72
+ self._rt_services: Dict[str, List[Any]] = {}
73
+ # Global lock for creating/finding per-user sessions
72
74
  self._rt_lock = asyncio.Lock()
73
75
 
76
+ async def _try_acquire_lock(self, lock: asyncio.Lock) -> bool:
77
+ try:
78
+ await asyncio.wait_for(lock.acquire(), timeout=0)
79
+ return True
80
+ except asyncio.TimeoutError:
81
+ return False
82
+ except Exception:
83
+ return False
84
+
85
+ async def _alloc_realtime_session(
86
+ self,
87
+ user_id: str,
88
+ *,
89
+ api_key: str,
90
+ rt_voice: str,
91
+ final_instructions: str,
92
+ initial_tools: Optional[List[Dict[str, Any]]],
93
+ encode_in: bool,
94
+ encode_out: bool,
95
+ audio_input_format: str,
96
+ audio_output_format: str,
97
+ ) -> Any:
98
+ """Get a free (or new) realtime session for this user. Marks it busy via an internal lock.
99
+
100
+ Returns the RealtimeService with an acquired _in_use_lock that MUST be released by caller.
101
+ """
102
+ from solana_agent.interfaces.providers.realtime import (
103
+ RealtimeSessionOptions,
104
+ )
105
+ from solana_agent.adapters.openai_realtime_ws import (
106
+ OpenAIRealtimeWebSocketSession,
107
+ )
108
+ from solana_agent.adapters.ffmpeg_transcoder import FFmpegTranscoder
109
+
110
+ def _mime_from(fmt: str) -> str:
111
+ f = (fmt or "").lower()
112
+ return {
113
+ "aac": "audio/aac",
114
+ "mp3": "audio/mpeg",
115
+ "mp4": "audio/mp4",
116
+ "m4a": "audio/mp4",
117
+ "mpeg": "audio/mpeg",
118
+ "mpga": "audio/mpeg",
119
+ "wav": "audio/wav",
120
+ "flac": "audio/flac",
121
+ "opus": "audio/opus",
122
+ "ogg": "audio/ogg",
123
+ "webm": "audio/webm",
124
+ "pcm": "audio/pcm",
125
+ }.get(f, "audio/pcm")
126
+
127
+ async with self._rt_lock:
128
+ pool = self._rt_services.get(user_id) or []
129
+ # Try to reuse an idle session strictly owned by this user
130
+ for rt in pool:
131
+ # Extra safety: never reuse a session from another user
132
+ owner = getattr(rt, "_owner_user_id", None)
133
+ if owner is not None and owner != user_id:
134
+ continue
135
+ lock = getattr(rt, "_in_use_lock", None)
136
+ if lock is None:
137
+ lock = asyncio.Lock()
138
+ setattr(rt, "_in_use_lock", lock)
139
+ if not lock.locked():
140
+ if await self._try_acquire_lock(lock):
141
+ return rt
142
+ # None free: create a new session
143
+ opts = RealtimeSessionOptions(
144
+ model="gpt-realtime",
145
+ voice=rt_voice,
146
+ vad_enabled=False,
147
+ input_rate_hz=24000,
148
+ output_rate_hz=24000,
149
+ input_mime="audio/pcm",
150
+ output_mime="audio/pcm",
151
+ tools=initial_tools or None,
152
+ tool_choice="auto",
153
+ )
154
+ try:
155
+ opts.instructions = final_instructions
156
+ opts.voice = rt_voice
157
+ except Exception:
158
+ pass
159
+ conv_session = OpenAIRealtimeWebSocketSession(api_key=api_key, options=opts)
160
+ transcoder = FFmpegTranscoder() if (encode_in or encode_out) else None
161
+ from solana_agent.services.realtime import RealtimeService
162
+
163
+ rt = RealtimeService(
164
+ session=conv_session,
165
+ options=opts,
166
+ transcoder=transcoder,
167
+ accept_compressed_input=encode_in,
168
+ client_input_mime=_mime_from(audio_input_format),
169
+ encode_output=encode_out,
170
+ client_output_mime=_mime_from(audio_output_format),
171
+ )
172
+ # Tag ownership to prevent any cross-user reuse
173
+ setattr(rt, "_owner_user_id", user_id)
174
+ setattr(rt, "_in_use_lock", asyncio.Lock())
175
+ # Mark busy
176
+ await getattr(rt, "_in_use_lock").acquire()
177
+ pool.append(rt)
178
+ self._rt_services[user_id] = pool
179
+ return rt
180
+
74
181
  def _get_sticky_agent(self, user_id: str) -> Optional[str]:
75
182
  sess = self._sticky_sessions.get(user_id)
76
183
  return sess.get("agent") if isinstance(sess, dict) else None
@@ -554,14 +661,7 @@ class QueryService(QueryServiceInterface):
554
661
  final_instructions = "\n\n".join([p for p in parts if p])
555
662
 
556
663
  # 4) Open a single WS session for assistant audio
557
- from solana_agent.adapters.openai_realtime_ws import (
558
- OpenAIRealtimeWebSocketSession,
559
- )
560
- from solana_agent.interfaces.providers.realtime import (
561
- RealtimeSessionOptions,
562
- )
563
- from solana_agent.services.realtime import RealtimeService
564
- from solana_agent.adapters.ffmpeg_transcoder import FFmpegTranscoder
664
+ # Realtime imports handled inside allocator helper
565
665
 
566
666
  api_key = None
567
667
  try:
@@ -600,171 +700,160 @@ class QueryService(QueryServiceInterface):
600
700
  or (is_audio_bytes and audio_input_format.lower() != "pcm")
601
701
  )
602
702
 
603
- async with self._rt_lock:
604
- rt = self._rt_services.get(user_id)
605
- if not rt or not isinstance(rt, RealtimeService):
606
- opts = RealtimeSessionOptions(
607
- model="gpt-realtime",
608
- voice=rt_voice,
609
- vad_enabled=False, # no input audio
610
- input_rate_hz=24000,
611
- output_rate_hz=24000,
612
- input_mime="audio/pcm",
613
- output_mime="audio/pcm",
614
- tools=initial_tools or None,
615
- tool_choice="auto",
616
- )
617
- # Ensure initial session.update carries instructions/voice
703
+ # Allocate or reuse a realtime session for this specific request/user
704
+ rt = await self._alloc_realtime_session(
705
+ user_id,
706
+ api_key=api_key,
707
+ rt_voice=rt_voice,
708
+ final_instructions=final_instructions,
709
+ initial_tools=initial_tools,
710
+ encode_in=encode_in,
711
+ encode_out=encode_out,
712
+ audio_input_format=audio_input_format,
713
+ audio_output_format=audio_output_format,
714
+ )
715
+ # Ensure lock is released no matter what
716
+ try:
717
+ # Tool executor
718
+ async def _exec(
719
+ tool_name: str, args: Dict[str, Any]
720
+ ) -> Dict[str, Any]:
618
721
  try:
619
- opts.instructions = final_instructions
620
- opts.voice = rt_voice
621
- except Exception:
622
- pass
623
- conv_session = OpenAIRealtimeWebSocketSession(
624
- api_key=api_key, options=opts
625
- )
626
- transcoder = (
627
- FFmpegTranscoder() if (encode_in or encode_out) else None
628
- )
629
- rt = RealtimeService(
630
- session=conv_session,
631
- options=opts,
632
- transcoder=transcoder,
633
- accept_compressed_input=encode_in,
634
- client_input_mime=_mime_from(audio_input_format),
635
- encode_output=encode_out,
636
- client_output_mime=_mime_from(audio_output_format),
637
- )
638
- self._rt_services[user_id] = rt
722
+ return await self.agent_service.execute_tool(
723
+ agent_name, tool_name, args or {}
724
+ )
725
+ except Exception as e:
726
+ return {"status": "error", "message": str(e)}
639
727
 
640
- # Tool executor
641
- async def _exec(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
728
+ # If possible, set on underlying session
642
729
  try:
643
- return await self.agent_service.execute_tool(
644
- agent_name, tool_name, args or {}
645
- )
646
- except Exception as e:
647
- return {"status": "error", "message": str(e)}
648
-
649
- # If possible, set on underlying session
650
- try:
651
- if hasattr(rt, "_session"):
652
- getattr(rt, "_session").set_tool_executor(_exec) # type: ignore[attr-defined]
653
- except Exception:
654
- pass
655
-
656
- # Connect/configure
657
- if not getattr(rt, "_connected", False):
658
- await rt.start()
659
- await rt.configure(
660
- voice=rt_voice,
661
- vad_enabled=bool(vad) if vad is not None else False,
662
- instructions=final_instructions,
663
- tools=initial_tools or None,
664
- tool_choice="auto",
665
- )
730
+ if hasattr(rt, "_session"):
731
+ getattr(rt, "_session").set_tool_executor(_exec) # type: ignore[attr-defined]
732
+ except Exception:
733
+ pass
666
734
 
667
- # Ensure clean input buffers for this turn
668
- try:
669
- await rt.clear_input()
670
- except Exception:
671
- pass
672
- # Also reset any leftover output audio so new turn doesn't replay old chunks
673
- try:
674
- if hasattr(rt, "reset_output_stream"):
675
- rt.reset_output_stream()
676
- except Exception:
677
- pass
735
+ # Connect/configure
736
+ if not getattr(rt, "_connected", False):
737
+ await rt.start()
738
+ await rt.configure(
739
+ voice=rt_voice,
740
+ vad_enabled=bool(vad) if vad is not None else False,
741
+ instructions=final_instructions,
742
+ tools=initial_tools or None,
743
+ tool_choice="auto",
744
+ )
678
745
 
679
- # Persist once per turn
680
- turn_id = await self.realtime_begin_turn(user_id)
681
- if turn_id and user_text:
746
+ # Ensure clean input buffers for this turn
747
+ try:
748
+ await rt.clear_input()
749
+ except Exception:
750
+ pass
751
+ # Also reset any leftover output audio so new turn doesn't replay old chunks
682
752
  try:
683
- await self.realtime_update_user(user_id, turn_id, user_text)
753
+ if hasattr(rt, "reset_output_stream"):
754
+ rt.reset_output_stream()
684
755
  except Exception:
685
756
  pass
686
757
 
687
- # Feed audio into WS if audio bytes provided; else use input_text
688
- if is_audio_bytes:
689
- bq = bytes(query)
690
- logger.info(
691
- "Realtime: appending input audio to WS via FFmpeg, len=%d, fmt=%s",
692
- len(bq),
693
- audio_input_format,
694
- )
695
- await rt.append_audio(bq)
696
- vad_enabled_value = bool(vad) if vad is not None else False
697
- if not vad_enabled_value:
698
- await rt.commit_input()
699
- # Manually trigger response when VAD is disabled
700
- await rt.create_response({})
758
+ # Persist once per turn
759
+ turn_id = await self.realtime_begin_turn(user_id)
760
+ if turn_id and user_text:
761
+ try:
762
+ await self.realtime_update_user(user_id, turn_id, user_text)
763
+ except Exception:
764
+ pass
765
+
766
+ # Feed audio into WS if audio bytes provided; else use input_text
767
+ if is_audio_bytes:
768
+ bq = bytes(query)
769
+ logger.info(
770
+ "Realtime: appending input audio to WS via FFmpeg, len=%d, fmt=%s",
771
+ len(bq),
772
+ audio_input_format,
773
+ )
774
+ await rt.append_audio(bq)
775
+ vad_enabled_value = bool(vad) if vad is not None else False
776
+ if not vad_enabled_value:
777
+ await rt.commit_input()
778
+ # Manually trigger response when VAD is disabled
779
+ await rt.create_response({})
780
+ else:
781
+ # With server VAD enabled, the model will auto-create a response at end of speech
782
+ logger.debug(
783
+ "Realtime: VAD enabled — skipping manual response.create"
784
+ )
701
785
  else:
702
- # With server VAD enabled, the model will auto-create a response at end of speech
703
- logger.debug(
704
- "Realtime: VAD enabled — skipping manual response.create"
786
+ # Rely on configured session voice; attach input_text only
787
+ await rt.create_response(
788
+ {
789
+ "modalities": ["audio"],
790
+ "input": [
791
+ {"type": "input_text", "text": user_text or ""}
792
+ ],
793
+ }
705
794
  )
706
- else:
707
- # Rely on configured session voice; attach input_text only
708
- await rt.create_response(
709
- {
710
- "modalities": ["audio"],
711
- "input": [{"type": "input_text", "text": user_text or ""}],
712
- }
713
- )
714
795
 
715
- # Collect audio and transcripts
716
- user_tr = ""
717
- asst_tr = ""
796
+ # Collect audio and transcripts
797
+ user_tr = ""
798
+ asst_tr = ""
718
799
 
719
- async def _drain_in_tr():
720
- nonlocal user_tr
721
- async for t in rt.iter_input_transcript():
722
- if t:
723
- user_tr += t
800
+ async def _drain_in_tr():
801
+ nonlocal user_tr
802
+ async for t in rt.iter_input_transcript():
803
+ if t:
804
+ user_tr += t
724
805
 
725
- async def _drain_out_tr():
726
- nonlocal asst_tr
727
- async for t in rt.iter_output_transcript():
728
- if t:
729
- asst_tr += t
806
+ async def _drain_out_tr():
807
+ nonlocal asst_tr
808
+ async for t in rt.iter_output_transcript():
809
+ if t:
810
+ asst_tr += t
730
811
 
731
- in_task = asyncio.create_task(_drain_in_tr())
732
- out_task = asyncio.create_task(_drain_out_tr())
733
- try:
734
- async for audio_chunk in rt.iter_output_audio_encoded():
735
- yield audio_chunk
736
- finally:
737
- in_task.cancel()
738
- out_task.cancel()
739
- # If no WS input transcript was captured, fall back to HTTP STT result
740
- if not user_tr:
741
- try:
742
- if "stt_task" in locals() and stt_task is not None:
743
- user_tr = await stt_task
744
- except Exception:
745
- pass
746
- if turn_id:
747
- try:
748
- if user_tr:
749
- await self.realtime_update_user(
750
- user_id, turn_id, user_tr
751
- )
752
- if asst_tr:
753
- await self.realtime_update_assistant(
754
- user_id, turn_id, asst_tr
755
- )
756
- except Exception:
757
- pass
812
+ in_task = asyncio.create_task(_drain_in_tr())
813
+ out_task = asyncio.create_task(_drain_out_tr())
814
+ try:
815
+ async for audio_chunk in rt.iter_output_audio_encoded():
816
+ yield audio_chunk
817
+ finally:
818
+ in_task.cancel()
819
+ out_task.cancel()
820
+ # If no WS input transcript was captured, fall back to HTTP STT result
821
+ if not user_tr:
822
+ try:
823
+ if "stt_task" in locals() and stt_task is not None:
824
+ user_tr = await stt_task
825
+ except Exception:
826
+ pass
827
+ if turn_id:
828
+ try:
829
+ if user_tr:
830
+ await self.realtime_update_user(
831
+ user_id, turn_id, user_tr
832
+ )
833
+ if asst_tr:
834
+ await self.realtime_update_assistant(
835
+ user_id, turn_id, asst_tr
836
+ )
837
+ except Exception:
838
+ pass
839
+ try:
840
+ await self.realtime_finalize_turn(user_id, turn_id)
841
+ except Exception:
842
+ pass
843
+ # Clear input buffer for next turn reuse
758
844
  try:
759
- await self.realtime_finalize_turn(user_id, turn_id)
845
+ await rt.clear_input()
760
846
  except Exception:
761
847
  pass
762
- # Clear input buffer for next turn reuse
848
+ finally:
849
+ # Always release the session for reuse by other concurrent requests/devices
763
850
  try:
764
- await rt.clear_input()
851
+ lock = getattr(rt, "_in_use_lock", None)
852
+ if lock and lock.locked():
853
+ lock.release()
765
854
  except Exception:
766
855
  pass
767
- return
856
+ return
768
857
 
769
858
  # 1) Transcribe audio or accept text
770
859
  user_text = ""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: solana-agent
3
- Version: 31.2.2
3
+ Version: 31.2.3
4
4
  Summary: AI Agents for Solana
5
5
  License: MIT
6
6
  Keywords: solana,solana ai,solana agent,ai,ai agent,ai agents
@@ -317,27 +317,39 @@ Due to the overhead of the router (API call) - realtime only supports a single a
317
317
 
318
318
  Realtime uses MongoDB for memory so Zep is not needed.
319
319
 
320
+ This example will work using expo-audio on Android and iOS.
321
+
320
322
  ```python
321
323
  from solana_agent import SolanaAgent
322
324
 
323
325
  solana_agent = SolanaAgent(config=config)
324
326
 
325
- # Example: mobile sends MP4/AAC; server encodes output to AAC
326
- audio_content = await audio_file.read() # bytes
327
- async for audio_chunk in solana_agent.process(
328
- "user123", # required
329
- audio_content, # required
330
- realtime=True, # optional (default False)
331
- output_format="audio", # required
332
- vad=True, # enable VAD (optional)
333
- rt_encode_input=True, # accept compressed input (optional)
334
- rt_encode_output=True, # encode output for client (optional)
335
- rt_voice="marin" # the voice to use for interactions (optional)
336
- audio_input_format="mp4", # client transport (optional)
337
- audio_output_format="aac" # client transport (optional)
338
- ):
339
- handle_audio(audio_chunk)
340
- ```
327
+ audio_content = await audio_file.read()
328
+
329
+ async def generate():
330
+ async for chunk in solana_agent.process(
331
+ user_id=user_id,
332
+ message=audio_content,
333
+ realtime=True,
334
+ rt_encode_input=True,
335
+ rt_encode_output=True,
336
+ rt_voice="marin",
337
+ output_format="audio",
338
+ audio_output_format="m4a",
339
+ audio_input_format="mp4",
340
+ ):
341
+ yield chunk
342
+
343
+ return StreamingResponse(
344
+ content=generate(),
345
+ media_type="audio/mp4",
346
+ headers={
347
+ "Cache-Control": "no-store",
348
+ "Pragma": "no-cache",
349
+ "Content-Disposition": "inline; filename=stream.m4a",
350
+ "X-Accel-Buffering": "no",
351
+ },
352
+ )
341
353
 
342
354
  ### Image/Text Streaming
343
355
 
@@ -1,9 +1,9 @@
1
1
  solana_agent/__init__.py,sha256=iu0PnSAEZ6qzzHFVu7WVCQVbeCZmiZ6axUDDgWRn1j4,1070
2
2
  solana_agent/adapters/__init__.py,sha256=tiEEuuy0NF3ngc_tGEcRTt71zVI58v3dYY9RvMrF2Cg,204
3
- solana_agent/adapters/ffmpeg_transcoder.py,sha256=X8k4oz7CWcmhwvzXriatbE15wvQoU1jGdRdk7zp5mSA,9332
3
+ solana_agent/adapters/ffmpeg_transcoder.py,sha256=uq5tp5b2TMvkYptwvQ9_GX4n87mClILMA8SJGNxRWRc,10881
4
4
  solana_agent/adapters/mongodb_adapter.py,sha256=Hq3S8VzfLmnPjV40z8yJXGqUamOJcX5GbOMd-1nNWO4,3175
5
5
  solana_agent/adapters/openai_adapter.py,sha256=U3x6fMRmdvfvNt7M9-RKzV835WtPxNGrV1VRBMiRHV8,26714
6
- solana_agent/adapters/openai_realtime_ws.py,sha256=o21Ftm7fMlYpbaJNFVveJdNTHO_DxxpYCd2Tu-xXEGo,75489
6
+ solana_agent/adapters/openai_realtime_ws.py,sha256=2AFtfJGft5OE8oYZMRKVw89Yg-a4Wgc7bIrMYeRT49g,77252
7
7
  solana_agent/adapters/pinecone_adapter.py,sha256=XlfOpoKHwzpaU4KZnovO2TnEYbsw-3B53ZKQDtBeDgU,23847
8
8
  solana_agent/cli.py,sha256=FGvTIQmKLp6XsQdyKtuhIIfbBtMmcCCXfigNrj4bzMc,4704
9
9
  solana_agent/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -38,11 +38,11 @@ solana_agent/repositories/memory.py,sha256=cipt9eC5YApi8ozFXAV5xq7QxQJExJmVdgGjk
38
38
  solana_agent/services/__init__.py,sha256=iko0c2MlF8b_SA_nuBGFllr2E3g_JowOrOzGcnU9tkA,162
39
39
  solana_agent/services/agent.py,sha256=LWjsdmCeygwmjFoazOCVhrb0hdZHQDEQo_DFWZe57Lk,23133
40
40
  solana_agent/services/knowledge_base.py,sha256=ZvOPrSmcNDgUzz4bJIQ4LeRl9vMZiK9hOfs71IpB7Bk,32735
41
- solana_agent/services/query.py,sha256=aydUIjwAz2bq7NwHEmPEPUy5kW9q2YKMoNvY856N4-c,67883
41
+ solana_agent/services/query.py,sha256=oqNFbQsz2FiSswGkt8ZlNaOR8DAz66hgWXD5kHc7c-M,71428
42
42
  solana_agent/services/realtime.py,sha256=6_44-JaKN0V4gkizaisGLPsopM5Z8xymQcCbq5V3yEc,21054
43
43
  solana_agent/services/routing.py,sha256=hsHe8HSGO_xFc0A17WIOGTidLTfLSfApQw3l2HHqkLo,7614
44
- solana_agent-31.2.2.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
45
- solana_agent-31.2.2.dist-info/METADATA,sha256=v8m96t0zBLOlCEUdZC-dZIsSb-GzfqqztgVS8WYV1ZI,31240
46
- solana_agent-31.2.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
47
- solana_agent-31.2.2.dist-info/entry_points.txt,sha256=-AuT_mfqk8dlZ0pHuAjx1ouAWpTRjpqvEUa6YV3lmc0,53
48
- solana_agent-31.2.2.dist-info/RECORD,,
44
+ solana_agent-31.2.3.dist-info/LICENSE,sha256=BnSRc-NSFuyF2s496l_4EyrwAP6YimvxWcjPiJ0J7g4,1057
45
+ solana_agent-31.2.3.dist-info/METADATA,sha256=FiPGGHeW_QJ6tpSDreJKXFDz5Abbe0bSTZFW0V7C4gM,31168
46
+ solana_agent-31.2.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
47
+ solana_agent-31.2.3.dist-info/entry_points.txt,sha256=-AuT_mfqk8dlZ0pHuAjx1ouAWpTRjpqvEUa6YV3lmc0,53
48
+ solana_agent-31.2.3.dist-info/RECORD,,