solana-agent 31.1.7__py3-none-any.whl → 31.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solana_agent/adapters/ffmpeg_transcoder.py +279 -0
- solana_agent/adapters/openai_adapter.py +5 -0
- solana_agent/adapters/openai_realtime_ws.py +1613 -0
- solana_agent/client/solana_agent.py +29 -3
- solana_agent/factories/agent_factory.py +2 -1
- solana_agent/interfaces/client/client.py +18 -1
- solana_agent/interfaces/providers/audio.py +40 -0
- solana_agent/interfaces/providers/llm.py +0 -1
- solana_agent/interfaces/providers/realtime.py +100 -0
- solana_agent/interfaces/services/agent.py +0 -1
- solana_agent/interfaces/services/query.py +12 -1
- solana_agent/repositories/memory.py +184 -19
- solana_agent/services/agent.py +0 -5
- solana_agent/services/query.py +561 -6
- solana_agent/services/realtime.py +506 -0
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.0.dist-info}/METADATA +40 -8
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.0.dist-info}/RECORD +20 -15
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.0.dist-info}/LICENSE +0 -0
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.0.dist-info}/WHEEL +0 -0
- {solana_agent-31.1.7.dist-info → solana_agent-31.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,279 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import contextlib
|
5
|
+
import logging
|
6
|
+
from typing import List, AsyncGenerator
|
7
|
+
|
8
|
+
from solana_agent.interfaces.providers.audio import AudioTranscoder
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
class FFmpegTranscoder(AudioTranscoder):
|
14
|
+
"""FFmpeg-based transcoder. Requires 'ffmpeg' binary in PATH.
|
15
|
+
|
16
|
+
This uses subprocess to stream bytes through ffmpeg for encode/decode.
|
17
|
+
"""
|
18
|
+
|
19
|
+
async def _run_ffmpeg(
|
20
|
+
self, args: List[str], data: bytes
|
21
|
+
) -> bytes: # pragma: no cover
|
22
|
+
logger.info("FFmpeg: starting process args=%s, input_len=%d", args, len(data))
|
23
|
+
proc = await asyncio.create_subprocess_exec(
|
24
|
+
"ffmpeg",
|
25
|
+
*args,
|
26
|
+
stdin=asyncio.subprocess.PIPE,
|
27
|
+
stdout=asyncio.subprocess.PIPE,
|
28
|
+
stderr=asyncio.subprocess.PIPE,
|
29
|
+
)
|
30
|
+
stdout, stderr = await proc.communicate(input=data)
|
31
|
+
if proc.returncode != 0:
|
32
|
+
err = (stderr or b"").decode("utf-8", errors="ignore")
|
33
|
+
logger.error("FFmpeg failed (code=%s): %s", proc.returncode, err[:2000])
|
34
|
+
raise RuntimeError("ffmpeg failed to transcode audio")
|
35
|
+
logger.info("FFmpeg: finished successfully, output_len=%d", len(stdout or b""))
|
36
|
+
if stderr:
|
37
|
+
logger.debug(
|
38
|
+
"FFmpeg stderr: %s", stderr.decode("utf-8", errors="ignore")[:2000]
|
39
|
+
)
|
40
|
+
return stdout
|
41
|
+
|
42
|
+
async def to_pcm16( # pragma: no cover
|
43
|
+
self, audio_bytes: bytes, input_mime: str, rate_hz: int
|
44
|
+
) -> bytes:
|
45
|
+
"""Decode compressed audio to mono PCM16LE at rate_hz."""
|
46
|
+
logger.info(
|
47
|
+
"Transcode to PCM16: input_mime=%s, rate_hz=%d, input_len=%d",
|
48
|
+
input_mime,
|
49
|
+
rate_hz,
|
50
|
+
len(audio_bytes),
|
51
|
+
)
|
52
|
+
# Prefer to hint format for MP4/AAC; ffmpeg can still autodetect if hint is wrong.
|
53
|
+
hinted_format = None
|
54
|
+
if input_mime in ("audio/mp4", "audio/aac", "audio/m4a"):
|
55
|
+
hinted_format = "mp4"
|
56
|
+
elif input_mime in ("audio/ogg", "audio/webm"):
|
57
|
+
hinted_format = None # container detection is decent here
|
58
|
+
elif input_mime in ("audio/wav", "audio/x-wav"):
|
59
|
+
hinted_format = "wav"
|
60
|
+
|
61
|
+
args = [
|
62
|
+
"-hide_banner",
|
63
|
+
"-loglevel",
|
64
|
+
"error",
|
65
|
+
]
|
66
|
+
if hinted_format:
|
67
|
+
args += ["-f", hinted_format]
|
68
|
+
args += [
|
69
|
+
"-i",
|
70
|
+
"pipe:0",
|
71
|
+
"-acodec",
|
72
|
+
"pcm_s16le",
|
73
|
+
"-ac",
|
74
|
+
"1",
|
75
|
+
"-ar",
|
76
|
+
str(rate_hz),
|
77
|
+
"-f",
|
78
|
+
"s16le",
|
79
|
+
"pipe:1",
|
80
|
+
]
|
81
|
+
out = await self._run_ffmpeg(args, audio_bytes)
|
82
|
+
logger.info("Transcoded to PCM16: output_len=%d", len(out))
|
83
|
+
return out
|
84
|
+
|
85
|
+
async def from_pcm16( # pragma: no cover
|
86
|
+
self, pcm16_bytes: bytes, output_mime: str, rate_hz: int
|
87
|
+
) -> bytes:
|
88
|
+
"""Encode PCM16LE to desired format (currently AAC ADTS for mobile streaming)."""
|
89
|
+
logger.info(
|
90
|
+
"Encode from PCM16: output_mime=%s, rate_hz=%d, input_len=%d",
|
91
|
+
output_mime,
|
92
|
+
rate_hz,
|
93
|
+
len(pcm16_bytes),
|
94
|
+
)
|
95
|
+
if output_mime in ("audio/mpeg", "audio/mp3"):
|
96
|
+
# Encode to MP3 (often better streaming compatibility on mobile)
|
97
|
+
args = [
|
98
|
+
"-hide_banner",
|
99
|
+
"-loglevel",
|
100
|
+
"error",
|
101
|
+
"-f",
|
102
|
+
"s16le",
|
103
|
+
"-ac",
|
104
|
+
"1",
|
105
|
+
"-ar",
|
106
|
+
str(rate_hz),
|
107
|
+
"-i",
|
108
|
+
"pipe:0",
|
109
|
+
"-c:a",
|
110
|
+
"libmp3lame",
|
111
|
+
"-b:a",
|
112
|
+
"128k",
|
113
|
+
"-f",
|
114
|
+
"mp3",
|
115
|
+
"pipe:1",
|
116
|
+
]
|
117
|
+
out = await self._run_ffmpeg(args, pcm16_bytes)
|
118
|
+
logger.info(
|
119
|
+
"Encoded from PCM16 to %s: output_len=%d", output_mime, len(out)
|
120
|
+
)
|
121
|
+
return out
|
122
|
+
if output_mime in ("audio/aac", "audio/mp4", "audio/m4a"):
|
123
|
+
# Encode to AAC in ADTS stream; clients can play it as AAC.
|
124
|
+
args = [
|
125
|
+
"-hide_banner",
|
126
|
+
"-loglevel",
|
127
|
+
"error",
|
128
|
+
"-f",
|
129
|
+
"s16le",
|
130
|
+
"-ac",
|
131
|
+
"1",
|
132
|
+
"-ar",
|
133
|
+
str(rate_hz),
|
134
|
+
"-i",
|
135
|
+
"pipe:0",
|
136
|
+
"-c:a",
|
137
|
+
"aac",
|
138
|
+
"-b:a",
|
139
|
+
"96k",
|
140
|
+
"-f",
|
141
|
+
"adts",
|
142
|
+
"pipe:1",
|
143
|
+
]
|
144
|
+
out = await self._run_ffmpeg(args, pcm16_bytes)
|
145
|
+
logger.info(
|
146
|
+
"Encoded from PCM16 to %s: output_len=%d", output_mime, len(out)
|
147
|
+
)
|
148
|
+
return out
|
149
|
+
# Default: passthrough
|
150
|
+
logger.info("Encode passthrough (no change), output_len=%d", len(pcm16_bytes))
|
151
|
+
return pcm16_bytes
|
152
|
+
|
153
|
+
async def stream_from_pcm16( # pragma: no cover
|
154
|
+
self,
|
155
|
+
pcm_iter: AsyncGenerator[bytes, None],
|
156
|
+
output_mime: str,
|
157
|
+
rate_hz: int,
|
158
|
+
read_chunk_size: int = 4096,
|
159
|
+
) -> AsyncGenerator[bytes, None]:
|
160
|
+
"""Start a single continuous encoder and stream encoded audio chunks.
|
161
|
+
|
162
|
+
- Launches one ffmpeg subprocess for the entire response.
|
163
|
+
- Feeds PCM16LE mono bytes from pcm_iter into stdin.
|
164
|
+
- Yields encoded bytes from stdout as they become available.
|
165
|
+
"""
|
166
|
+
if output_mime in ("audio/mpeg", "audio/mp3"):
|
167
|
+
args = [
|
168
|
+
"-hide_banner",
|
169
|
+
"-loglevel",
|
170
|
+
"error",
|
171
|
+
"-f",
|
172
|
+
"s16le",
|
173
|
+
"-ac",
|
174
|
+
"1",
|
175
|
+
"-ar",
|
176
|
+
str(rate_hz),
|
177
|
+
"-i",
|
178
|
+
"pipe:0",
|
179
|
+
"-c:a",
|
180
|
+
"libmp3lame",
|
181
|
+
"-b:a",
|
182
|
+
"128k",
|
183
|
+
"-f",
|
184
|
+
"mp3",
|
185
|
+
"pipe:1",
|
186
|
+
]
|
187
|
+
elif output_mime in ("audio/aac", "audio/mp4", "audio/m4a"):
|
188
|
+
args = [
|
189
|
+
"-hide_banner",
|
190
|
+
"-loglevel",
|
191
|
+
"error",
|
192
|
+
"-f",
|
193
|
+
"s16le",
|
194
|
+
"-ac",
|
195
|
+
"1",
|
196
|
+
"-ar",
|
197
|
+
str(rate_hz),
|
198
|
+
"-i",
|
199
|
+
"pipe:0",
|
200
|
+
"-c:a",
|
201
|
+
"aac",
|
202
|
+
"-b:a",
|
203
|
+
"96k",
|
204
|
+
"-f",
|
205
|
+
"adts",
|
206
|
+
"pipe:1",
|
207
|
+
]
|
208
|
+
else:
|
209
|
+
# Passthrough streaming: just yield input
|
210
|
+
async for chunk in pcm_iter:
|
211
|
+
yield chunk
|
212
|
+
return
|
213
|
+
|
214
|
+
logger.info("FFmpeg(stream): starting args=%s", args)
|
215
|
+
proc = await asyncio.create_subprocess_exec(
|
216
|
+
"ffmpeg",
|
217
|
+
*args,
|
218
|
+
stdin=asyncio.subprocess.PIPE,
|
219
|
+
stdout=asyncio.subprocess.PIPE,
|
220
|
+
stderr=asyncio.subprocess.PIPE,
|
221
|
+
)
|
222
|
+
|
223
|
+
assert proc.stdin is not None and proc.stdout is not None
|
224
|
+
|
225
|
+
async def _writer():
|
226
|
+
try:
|
227
|
+
async for pcm in pcm_iter:
|
228
|
+
if not pcm:
|
229
|
+
continue
|
230
|
+
proc.stdin.write(pcm)
|
231
|
+
# Backpressure
|
232
|
+
await proc.stdin.drain()
|
233
|
+
except asyncio.CancelledError:
|
234
|
+
# Swallow cancellation; stdin will be closed below.
|
235
|
+
pass
|
236
|
+
except Exception as e:
|
237
|
+
logger.debug("FFmpeg(stream) writer error: %s", str(e))
|
238
|
+
finally:
|
239
|
+
with contextlib.suppress(Exception):
|
240
|
+
proc.stdin.close()
|
241
|
+
|
242
|
+
writer_task = asyncio.create_task(_writer())
|
243
|
+
|
244
|
+
buf = bytearray()
|
245
|
+
try:
|
246
|
+
while True:
|
247
|
+
data = await proc.stdout.read(read_chunk_size)
|
248
|
+
if not data:
|
249
|
+
break
|
250
|
+
buf.extend(data)
|
251
|
+
# Emit fixed-size chunks even if read returns a larger blob
|
252
|
+
while len(buf) >= read_chunk_size:
|
253
|
+
yield bytes(buf[:read_chunk_size])
|
254
|
+
del buf[:read_chunk_size]
|
255
|
+
# Flush any remainder
|
256
|
+
if buf:
|
257
|
+
yield bytes(buf)
|
258
|
+
finally:
|
259
|
+
# Ensure writer is done
|
260
|
+
if not writer_task.done():
|
261
|
+
with contextlib.suppress(Exception):
|
262
|
+
writer_task.cancel()
|
263
|
+
try:
|
264
|
+
await writer_task
|
265
|
+
except asyncio.CancelledError:
|
266
|
+
pass
|
267
|
+
except Exception:
|
268
|
+
pass
|
269
|
+
# Drain remaining stderr and check return code
|
270
|
+
try:
|
271
|
+
stderr = await proc.stderr.read() if proc.stderr else b""
|
272
|
+
code = await proc.wait()
|
273
|
+
if code != 0:
|
274
|
+
err = (stderr or b"").decode("utf-8", errors="ignore")
|
275
|
+
logger.error(
|
276
|
+
"FFmpeg(stream) failed (code=%s): %s", code, err[:2000]
|
277
|
+
)
|
278
|
+
except Exception:
|
279
|
+
pass
|
@@ -56,6 +56,7 @@ class OpenAIAdapter(LLMProvider):
|
|
56
56
|
"""OpenAI implementation of LLMProvider with web search capabilities."""
|
57
57
|
|
58
58
|
def __init__(self, api_key: str, logfire_api_key: Optional[str] = None):
|
59
|
+
self.api_key = api_key
|
59
60
|
self.client = AsyncOpenAI(api_key=api_key)
|
60
61
|
|
61
62
|
self.logfire = False
|
@@ -76,6 +77,10 @@ class OpenAIAdapter(LLMProvider):
|
|
76
77
|
self.embedding_model = DEFAULT_EMBEDDING_MODEL
|
77
78
|
self.embedding_dimensions = DEFAULT_EMBEDDING_DIMENSIONS
|
78
79
|
|
80
|
+
def get_api_key(self) -> Optional[str]: # pragma: no cover
|
81
|
+
"""Return the API key used to configure the OpenAI client."""
|
82
|
+
return getattr(self, "api_key", None)
|
83
|
+
|
79
84
|
async def tts(
|
80
85
|
self,
|
81
86
|
text: str,
|