solana-agent 24.1.0__tar.gz → 24.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {solana_agent-24.1.0 → solana_agent-24.1.2}/PKG-INFO +1 -12
- {solana_agent-24.1.0 → solana_agent-24.1.2}/README.md +0 -9
- {solana_agent-24.1.0 → solana_agent-24.1.2}/pyproject.toml +1 -3
- solana_agent-24.1.2/solana_agent/adapters/llm_adapter.py +158 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/client/solana_agent.py +0 -3
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/client/client.py +0 -1
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/providers/llm.py +0 -10
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/services/agent.py +0 -1
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/services/query.py +0 -1
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/services/agent.py +2 -17
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/services/query.py +0 -4
- solana_agent-24.1.0/solana_agent/adapters/llm_adapter.py +0 -332
- {solana_agent-24.1.0 → solana_agent-24.1.2}/LICENSE +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/adapters/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/adapters/mongodb_adapter.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/client/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/domains/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/domains/agent.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/domains/routing.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/factories/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/factories/agent_factory.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/plugins/plugins.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/providers/data_storage.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/providers/memory.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/services/routing.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/plugins/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/plugins/manager.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/plugins/registry.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/plugins/tools/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/plugins/tools/auto_tool.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/repositories/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/repositories/memory.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/services/__init__.py +0 -0
- {solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/services/routing.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: solana-agent
|
3
|
-
Version: 24.1.
|
3
|
+
Version: 24.1.2
|
4
4
|
Summary: Agentic IQ
|
5
5
|
License: MIT
|
6
6
|
Keywords: ai,openai,ai agents,agi
|
@@ -14,11 +14,9 @@ Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
17
|
-
Requires-Dist: httpx (>=0.28.1,<0.29.0)
|
18
17
|
Requires-Dist: openai (>=1.71.0,<2.0.0)
|
19
18
|
Requires-Dist: pydantic (>=2.11.2,<3.0.0)
|
20
19
|
Requires-Dist: pymongo (>=4.11.3,<5.0.0)
|
21
|
-
Requires-Dist: websockets (>=15.0.1,<16.0.0)
|
22
20
|
Requires-Dist: zep-cloud (>=2.9.0,<3.0.0)
|
23
21
|
Project-URL: Documentation, https://docs.solana-agent.com
|
24
22
|
Project-URL: Repository, https://github.com/truemagic-coder/solana-agent
|
@@ -375,15 +373,6 @@ async for response in solana_agent.process("user123", audio_content, output_form
|
|
375
373
|
print(response, end="")
|
376
374
|
```
|
377
375
|
|
378
|
-
### Real-Time Audio Transcription
|
379
|
-
|
380
|
-
It is possible to disable real-time audio transcription responses to save on costs.
|
381
|
-
|
382
|
-
```python
|
383
|
-
async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
|
384
|
-
print(response, end="")
|
385
|
-
```
|
386
|
-
|
387
376
|
## Tools
|
388
377
|
|
389
378
|
Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.
|
@@ -349,15 +349,6 @@ async for response in solana_agent.process("user123", audio_content, output_form
|
|
349
349
|
print(response, end="")
|
350
350
|
```
|
351
351
|
|
352
|
-
### Real-Time Audio Transcription
|
353
|
-
|
354
|
-
It is possible to disable real-time audio transcription responses to save on costs.
|
355
|
-
|
356
|
-
```python
|
357
|
-
async for response in solana_agent.process("user123", "What is the latest news on Canada?", audio_transcription_real_time=False):
|
358
|
-
print(response, end="")
|
359
|
-
```
|
360
|
-
|
361
352
|
## Tools
|
362
353
|
|
363
354
|
Tools can be used from plugins like Solana Agent Kit (sakit) or via inline tools. Tools available via plugins integrate automatically with Solana Agent.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "solana-agent"
|
3
|
-
version = "24.1.
|
3
|
+
version = "24.1.2"
|
4
4
|
description = "Agentic IQ"
|
5
5
|
authors = ["Bevan Hunt <bevan@bevanhunt.com>"]
|
6
6
|
license = "MIT"
|
@@ -27,8 +27,6 @@ openai = "^1.71.0"
|
|
27
27
|
pydantic = "^2.11.2"
|
28
28
|
pymongo = "^4.11.3"
|
29
29
|
zep-cloud = "^2.9.0"
|
30
|
-
httpx = "^0.28.1"
|
31
|
-
websockets = "^15.0.1"
|
32
30
|
|
33
31
|
[tool.poetry.group.dev.dependencies]
|
34
32
|
pytest = "^8.3.5"
|
@@ -0,0 +1,158 @@
|
|
1
|
+
"""
|
2
|
+
LLM provider adapters for the Solana Agent system.
|
3
|
+
|
4
|
+
These adapters implement the LLMProvider interface for different LLM services.
|
5
|
+
"""
|
6
|
+
from typing import AsyncGenerator, Literal, Type, TypeVar
|
7
|
+
|
8
|
+
from openai import AsyncOpenAI
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from solana_agent.interfaces.providers.llm import LLMProvider
|
12
|
+
|
13
|
+
T = TypeVar('T', bound=BaseModel)
|
14
|
+
|
15
|
+
|
16
|
+
class OpenAIAdapter(LLMProvider):
|
17
|
+
"""OpenAI implementation of LLMProvider with web search capabilities."""
|
18
|
+
|
19
|
+
def __init__(self, api_key: str):
|
20
|
+
self.client = AsyncOpenAI(api_key=api_key)
|
21
|
+
self.parse_model = "gpt-4o-mini"
|
22
|
+
self.text_model = "gpt-4o-mini"
|
23
|
+
self.transcription_model = "gpt-4o-mini-transcribe"
|
24
|
+
self.tts_model = "gpt-4o-mini-tts"
|
25
|
+
|
26
|
+
async def tts(
|
27
|
+
self,
|
28
|
+
text: str,
|
29
|
+
instructions: str = "You speak in a friendly and helpful manner.",
|
30
|
+
voice: Literal["alloy", "ash", "ballad", "coral", "echo",
|
31
|
+
"fable", "onyx", "nova", "sage", "shimmer"] = "nova",
|
32
|
+
response_format: Literal['mp3', 'opus',
|
33
|
+
'aac', 'flac', 'wav', 'pcm'] = "aac",
|
34
|
+
) -> AsyncGenerator[bytes, None]: # pragma: no cover
|
35
|
+
"""Stream text-to-speech audio from OpenAI models.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
text: Text to convert to speech
|
39
|
+
instructions: Optional instructions for speech generation
|
40
|
+
voice: Voice to use for synthesis
|
41
|
+
response_format: Audio format
|
42
|
+
|
43
|
+
Yields:
|
44
|
+
Audio bytes as they become available
|
45
|
+
"""
|
46
|
+
try:
|
47
|
+
async with self.client.audio.speech.with_streaming_response.create(
|
48
|
+
model=self.tts_model,
|
49
|
+
voice=voice,
|
50
|
+
instructions=instructions,
|
51
|
+
input=text,
|
52
|
+
response_format=response_format
|
53
|
+
) as stream:
|
54
|
+
# Stream the bytes in 16KB chunks
|
55
|
+
async for chunk in stream.iter_bytes(chunk_size=1024 * 16):
|
56
|
+
yield chunk
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
print(f"Error in text_to_speech: {str(e)}")
|
60
|
+
import traceback
|
61
|
+
print(traceback.format_exc())
|
62
|
+
yield b"" # Return empty bytes on error
|
63
|
+
|
64
|
+
except Exception as e:
|
65
|
+
print(f"Error in text_to_speech: {str(e)}")
|
66
|
+
import traceback
|
67
|
+
print(traceback.format_exc())
|
68
|
+
yield b"" # Return empty bytes on error
|
69
|
+
|
70
|
+
async def transcribe_audio(
|
71
|
+
self,
|
72
|
+
audio_bytes: bytes,
|
73
|
+
input_format: Literal[
|
74
|
+
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
75
|
+
] = "mp4",
|
76
|
+
) -> AsyncGenerator[str, None]: # pragma: no cover
|
77
|
+
"""Stream transcription of an audio file.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
audio_bytes: Audio file bytes
|
81
|
+
input_format: Format of the input audio file
|
82
|
+
|
83
|
+
Yields:
|
84
|
+
Transcript text chunks as they become available
|
85
|
+
"""
|
86
|
+
try:
|
87
|
+
async with self.client.audio.transcriptions.with_streaming_response.create(
|
88
|
+
model=self.transcription_model,
|
89
|
+
file=(f"file.{input_format}", audio_bytes),
|
90
|
+
response_format="text",
|
91
|
+
) as stream:
|
92
|
+
# Stream the text in 16KB chunks
|
93
|
+
async for chunk in stream.iter_text(chunk_size=1024 * 16):
|
94
|
+
yield chunk
|
95
|
+
|
96
|
+
except Exception as e:
|
97
|
+
print(f"Error in transcribe_audio: {str(e)}")
|
98
|
+
import traceback
|
99
|
+
print(traceback.format_exc())
|
100
|
+
yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
|
101
|
+
|
102
|
+
async def generate_text(
|
103
|
+
self,
|
104
|
+
prompt: str,
|
105
|
+
system_prompt: str = "",
|
106
|
+
) -> AsyncGenerator[str, None]: # pragma: no cover
|
107
|
+
"""Generate text from OpenAI models."""
|
108
|
+
messages = []
|
109
|
+
|
110
|
+
if system_prompt:
|
111
|
+
messages.append({"role": "system", "content": system_prompt})
|
112
|
+
|
113
|
+
messages.append({"role": "user", "content": prompt})
|
114
|
+
|
115
|
+
# Prepare request parameters
|
116
|
+
request_params = {
|
117
|
+
"messages": messages,
|
118
|
+
"stream": True,
|
119
|
+
"model": self.text_model,
|
120
|
+
}
|
121
|
+
try:
|
122
|
+
response = await self.client.chat.completions.create(**request_params)
|
123
|
+
|
124
|
+
async for chunk in response:
|
125
|
+
if chunk.choices:
|
126
|
+
if chunk.choices[0].delta.content:
|
127
|
+
text = chunk.choices[0].delta.content
|
128
|
+
yield text
|
129
|
+
|
130
|
+
except Exception as e:
|
131
|
+
print(f"Error in generate_text: {str(e)}")
|
132
|
+
import traceback
|
133
|
+
print(traceback.format_exc())
|
134
|
+
yield f"I apologize, but I encountered an error: {str(e)}"
|
135
|
+
|
136
|
+
async def parse_structured_output(
|
137
|
+
self,
|
138
|
+
prompt: str,
|
139
|
+
system_prompt: str,
|
140
|
+
model_class: Type[T],
|
141
|
+
) -> T: # pragma: no cover
|
142
|
+
"""Generate structured output using Pydantic model parsing."""
|
143
|
+
messages = []
|
144
|
+
if system_prompt:
|
145
|
+
messages.append({"role": "system", "content": system_prompt})
|
146
|
+
|
147
|
+
messages.append({"role": "user", "content": prompt})
|
148
|
+
|
149
|
+
try:
|
150
|
+
# First try the beta parsing API
|
151
|
+
completion = await self.client.beta.chat.completions.parse(
|
152
|
+
model=self.parse_model,
|
153
|
+
messages=messages,
|
154
|
+
response_format=model_class,
|
155
|
+
)
|
156
|
+
return completion.choices[0].message.parsed
|
157
|
+
except Exception as e:
|
158
|
+
print(f"Error with beta.parse method: {e}")
|
@@ -55,7 +55,6 @@ class SolanaAgent(SolanaAgentInterface):
|
|
55
55
|
audio_input_format: Literal[
|
56
56
|
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
57
57
|
] = "mp4",
|
58
|
-
audio_transcription_real_time: bool = True,
|
59
58
|
router: Optional[RoutingInterface] = None,
|
60
59
|
) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
|
61
60
|
"""Process a user message and return the response stream.
|
@@ -69,7 +68,6 @@ class SolanaAgent(SolanaAgentInterface):
|
|
69
68
|
audio_instructions: Audio voice instructions
|
70
69
|
audio_output_format: Audio output format
|
71
70
|
audio_input_format: Audio input format
|
72
|
-
audio_transcription_real_time: Flag for real-time audio transcription
|
73
71
|
router: Optional routing service for processing
|
74
72
|
|
75
73
|
Returns:
|
@@ -85,7 +83,6 @@ class SolanaAgent(SolanaAgentInterface):
|
|
85
83
|
audio_input_format=audio_input_format,
|
86
84
|
prompt=prompt,
|
87
85
|
router=router,
|
88
|
-
audio_transcription_real_time=audio_transcription_real_time,
|
89
86
|
):
|
90
87
|
yield chunk
|
91
88
|
|
@@ -24,7 +24,6 @@ class SolanaAgent(ABC):
|
|
24
24
|
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
25
25
|
] = "mp4",
|
26
26
|
router: Optional[RoutingInterface] = None,
|
27
|
-
audio_transcription_real_time: bool = True,
|
28
27
|
) -> AsyncGenerator[Union[str, bytes], None]:
|
29
28
|
"""Process a user message and return the response stream."""
|
30
29
|
pass
|
@@ -49,13 +49,3 @@ class LLMProvider(ABC):
|
|
49
49
|
) -> AsyncGenerator[str, None]:
|
50
50
|
"""Transcribe audio from the language model."""
|
51
51
|
pass
|
52
|
-
|
53
|
-
@abstractmethod
|
54
|
-
async def realtime_audio_transcription(
|
55
|
-
self,
|
56
|
-
audio_generator: AsyncGenerator[bytes, None],
|
57
|
-
transcription_config: Optional[Dict[str, Any]] = None,
|
58
|
-
on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
59
|
-
) -> AsyncGenerator[str, None]:
|
60
|
-
"""Stream real-time audio transcription from the language model."""
|
61
|
-
pass
|
@@ -34,7 +34,6 @@ class AgentService(ABC):
|
|
34
34
|
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
35
35
|
] = "mp4",
|
36
36
|
prompt: Optional[str] = None,
|
37
|
-
audio_transcription_real_time: bool = True,
|
38
37
|
) -> AsyncGenerator[Union[str, bytes], None]:
|
39
38
|
"""Generate a response from an agent."""
|
40
39
|
pass
|
@@ -23,7 +23,6 @@ class QueryService(ABC):
|
|
23
23
|
] = "mp4",
|
24
24
|
prompt: Optional[str] = None,
|
25
25
|
router: Optional[RoutingInterface] = None,
|
26
|
-
audio_transcription_real_time: bool = True,
|
27
26
|
) -> AsyncGenerator[Union[str, bytes], None]:
|
28
27
|
"""Process the user request and generate a response."""
|
29
28
|
pass
|
@@ -176,7 +176,6 @@ class AgentService(AgentServiceInterface):
|
|
176
176
|
audio_input_format: Literal[
|
177
177
|
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
178
178
|
] = "mp4",
|
179
|
-
audio_transcription_real_time: bool = True,
|
180
179
|
prompt: Optional[str] = None,
|
181
180
|
) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
|
182
181
|
"""Generate a response with support for text/audio input/output."""
|
@@ -195,22 +194,8 @@ class AgentService(AgentServiceInterface):
|
|
195
194
|
# Handle audio input if provided - KEEP REAL-TIME AUDIO TRANSCRIPTION
|
196
195
|
query_text = ""
|
197
196
|
if not isinstance(query, str):
|
198
|
-
|
199
|
-
|
200
|
-
print("Using realtime audio transcription")
|
201
|
-
async for transcript in self.llm_provider.realtime_audio_transcription(
|
202
|
-
audio_generator=self._bytes_to_generator(query),
|
203
|
-
transcription_config={
|
204
|
-
"input_audio_format": audio_input_format}
|
205
|
-
):
|
206
|
-
query_text += transcript
|
207
|
-
else:
|
208
|
-
# Fall back to standard transcription
|
209
|
-
print("Using standard audio transcription")
|
210
|
-
async for transcript in self.llm_provider.transcribe_audio(query, input_format=audio_input_format):
|
211
|
-
query_text += transcript
|
212
|
-
|
213
|
-
print(f"Transcribed query: {query_text}")
|
197
|
+
async for transcript in self.llm_provider.transcribe_audio(query, input_format=audio_input_format):
|
198
|
+
query_text += transcript
|
214
199
|
else:
|
215
200
|
query_text = query
|
216
201
|
|
@@ -47,7 +47,6 @@ class QueryService(QueryServiceInterface):
|
|
47
47
|
audio_input_format: Literal[
|
48
48
|
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
49
49
|
] = "mp4",
|
50
|
-
audio_transcription_real_time: bool = True,
|
51
50
|
prompt: Optional[str] = None,
|
52
51
|
router: Optional[RoutingServiceInterface] = None,
|
53
52
|
) -> AsyncGenerator[Union[str, bytes], None]: # pragma: no cover
|
@@ -61,7 +60,6 @@ class QueryService(QueryServiceInterface):
|
|
61
60
|
audio_instructions: Audio voice instructions
|
62
61
|
audio_output_format: Audio output format
|
63
62
|
audio_input_format: Audio input format
|
64
|
-
audio_transcription_real_time: Flag for real-time audio transcription
|
65
63
|
prompt: Optional prompt for the agent
|
66
64
|
router: Optional routing service for processing
|
67
65
|
|
@@ -122,7 +120,6 @@ class QueryService(QueryServiceInterface):
|
|
122
120
|
audio_output_format=audio_output_format,
|
123
121
|
audio_instructions=audio_instructions,
|
124
122
|
prompt=prompt,
|
125
|
-
audio_transcription_real_time=audio_transcription_real_time,
|
126
123
|
):
|
127
124
|
yield audio_chunk
|
128
125
|
|
@@ -141,7 +138,6 @@ class QueryService(QueryServiceInterface):
|
|
141
138
|
memory_context=memory_context,
|
142
139
|
output_format="text",
|
143
140
|
prompt=prompt,
|
144
|
-
audio_transcription_real_time=audio_transcription_real_time,
|
145
141
|
):
|
146
142
|
yield chunk
|
147
143
|
full_text_response += chunk
|
@@ -1,332 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
LLM provider adapters for the Solana Agent system.
|
3
|
-
|
4
|
-
These adapters implement the LLMProvider interface for different LLM services.
|
5
|
-
"""
|
6
|
-
import asyncio
|
7
|
-
import json
|
8
|
-
from typing import Any, AsyncGenerator, Callable, Dict, Literal, Optional, Type, TypeVar
|
9
|
-
|
10
|
-
import httpx
|
11
|
-
from openai import AsyncOpenAI
|
12
|
-
from pydantic import BaseModel
|
13
|
-
import websockets
|
14
|
-
|
15
|
-
from solana_agent.interfaces.providers.llm import LLMProvider
|
16
|
-
|
17
|
-
T = TypeVar('T', bound=BaseModel)
|
18
|
-
|
19
|
-
|
20
|
-
class OpenAIAdapter(LLMProvider):
|
21
|
-
"""OpenAI implementation of LLMProvider with web search capabilities."""
|
22
|
-
|
23
|
-
def __init__(self, api_key: str):
|
24
|
-
self.client = AsyncOpenAI(api_key=api_key)
|
25
|
-
self.parse_model = "gpt-4o-mini"
|
26
|
-
self.text_model = "gpt-4o-mini"
|
27
|
-
self.transcription_model = "gpt-4o-mini-transcribe"
|
28
|
-
self.tts_model = "gpt-4o-mini-tts"
|
29
|
-
|
30
|
-
async def tts(
|
31
|
-
self,
|
32
|
-
text: str,
|
33
|
-
instructions: str = "You speak in a friendly and helpful manner.",
|
34
|
-
voice: Literal["alloy", "ash", "ballad", "coral", "echo",
|
35
|
-
"fable", "onyx", "nova", "sage", "shimmer"] = "nova",
|
36
|
-
response_format: Literal['mp3', 'opus',
|
37
|
-
'aac', 'flac', 'wav', 'pcm'] = "aac",
|
38
|
-
) -> AsyncGenerator[bytes, None]: # pragma: no cover
|
39
|
-
"""Stream text-to-speech audio from OpenAI models.
|
40
|
-
|
41
|
-
Args:
|
42
|
-
text: Text to convert to speech
|
43
|
-
instructions: Optional instructions for speech generation
|
44
|
-
voice: Voice to use for synthesis
|
45
|
-
response_format: Audio format
|
46
|
-
|
47
|
-
Yields:
|
48
|
-
Audio bytes as they become available
|
49
|
-
"""
|
50
|
-
try:
|
51
|
-
async with self.client.audio.speech.with_streaming_response.create(
|
52
|
-
model=self.tts_model,
|
53
|
-
voice=voice,
|
54
|
-
instructions=instructions,
|
55
|
-
input=text,
|
56
|
-
response_format=response_format
|
57
|
-
) as stream:
|
58
|
-
# Stream the bytes in 16KB chunks
|
59
|
-
async for chunk in stream.iter_bytes(chunk_size=1024 * 16):
|
60
|
-
yield chunk
|
61
|
-
|
62
|
-
except Exception as e:
|
63
|
-
print(f"Error in text_to_speech: {str(e)}")
|
64
|
-
import traceback
|
65
|
-
print(traceback.format_exc())
|
66
|
-
yield b"" # Return empty bytes on error
|
67
|
-
|
68
|
-
except Exception as e:
|
69
|
-
print(f"Error in text_to_speech: {str(e)}")
|
70
|
-
import traceback
|
71
|
-
print(traceback.format_exc())
|
72
|
-
yield b"" # Return empty bytes on error
|
73
|
-
|
74
|
-
async def transcribe_audio(
|
75
|
-
self,
|
76
|
-
audio_bytes: bytes,
|
77
|
-
input_format: Literal[
|
78
|
-
"flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "wav", "webm"
|
79
|
-
] = "mp4",
|
80
|
-
) -> AsyncGenerator[str, None]: # pragma: no cover
|
81
|
-
"""Stream transcription of an audio file.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
audio_bytes: Audio file bytes
|
85
|
-
input_format: Format of the input audio file
|
86
|
-
|
87
|
-
Yields:
|
88
|
-
Transcript text chunks as they become available
|
89
|
-
"""
|
90
|
-
try:
|
91
|
-
async with self.client.audio.transcriptions.with_streaming_response.create(
|
92
|
-
model=self.transcription_model,
|
93
|
-
file=(f"file.{input_format}", audio_bytes),
|
94
|
-
response_format="text",
|
95
|
-
) as stream:
|
96
|
-
# Stream the text in 16KB chunks
|
97
|
-
async for chunk in stream.iter_text(chunk_size=1024 * 16):
|
98
|
-
yield chunk
|
99
|
-
|
100
|
-
except Exception as e:
|
101
|
-
print(f"Error in transcribe_audio: {str(e)}")
|
102
|
-
import traceback
|
103
|
-
print(traceback.format_exc())
|
104
|
-
yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
|
105
|
-
|
106
|
-
async def generate_text(
|
107
|
-
self,
|
108
|
-
prompt: str,
|
109
|
-
system_prompt: str = "",
|
110
|
-
) -> AsyncGenerator[str, None]: # pragma: no cover
|
111
|
-
"""Generate text from OpenAI models."""
|
112
|
-
messages = []
|
113
|
-
|
114
|
-
if system_prompt:
|
115
|
-
messages.append({"role": "system", "content": system_prompt})
|
116
|
-
|
117
|
-
messages.append({"role": "user", "content": prompt})
|
118
|
-
|
119
|
-
# Prepare request parameters
|
120
|
-
request_params = {
|
121
|
-
"messages": messages,
|
122
|
-
"stream": True,
|
123
|
-
"model": self.text_model,
|
124
|
-
}
|
125
|
-
try:
|
126
|
-
response = await self.client.chat.completions.create(**request_params)
|
127
|
-
|
128
|
-
async for chunk in response:
|
129
|
-
if chunk.choices:
|
130
|
-
if chunk.choices[0].delta.content:
|
131
|
-
text = chunk.choices[0].delta.content
|
132
|
-
yield text
|
133
|
-
|
134
|
-
except Exception as e:
|
135
|
-
print(f"Error in generate_text: {str(e)}")
|
136
|
-
import traceback
|
137
|
-
print(traceback.format_exc())
|
138
|
-
yield f"I apologize, but I encountered an error: {str(e)}"
|
139
|
-
|
140
|
-
async def parse_structured_output(
|
141
|
-
self,
|
142
|
-
prompt: str,
|
143
|
-
system_prompt: str,
|
144
|
-
model_class: Type[T],
|
145
|
-
) -> T: # pragma: no cover
|
146
|
-
"""Generate structured output using Pydantic model parsing."""
|
147
|
-
messages = []
|
148
|
-
if system_prompt:
|
149
|
-
messages.append({"role": "system", "content": system_prompt})
|
150
|
-
|
151
|
-
messages.append({"role": "user", "content": prompt})
|
152
|
-
|
153
|
-
try:
|
154
|
-
# First try the beta parsing API
|
155
|
-
completion = await self.client.beta.chat.completions.parse(
|
156
|
-
model=self.parse_model,
|
157
|
-
messages=messages,
|
158
|
-
response_format=model_class,
|
159
|
-
)
|
160
|
-
return completion.choices[0].message.parsed
|
161
|
-
except Exception as e:
|
162
|
-
print(f"Error with beta.parse method: {e}")
|
163
|
-
|
164
|
-
async def create_realtime_session(
|
165
|
-
self,
|
166
|
-
model: str = "gpt-4o-mini-realtime-preview",
|
167
|
-
modalities: list = ["audio", "text"],
|
168
|
-
instructions: str = "You are a helpful assistant.",
|
169
|
-
voice: str = "alloy",
|
170
|
-
input_audio_format: str = "pcm16",
|
171
|
-
output_audio_format: str = "pcm16",
|
172
|
-
) -> Dict[str, Any]: # pragma: no cover
|
173
|
-
"""Create a realtime session token for WebSocket communication."""
|
174
|
-
try:
|
175
|
-
# Get the API key from the AsyncOpenAI client
|
176
|
-
api_key = self.client.api_key
|
177
|
-
|
178
|
-
# Create an async HTTP client
|
179
|
-
async with httpx.AsyncClient() as client:
|
180
|
-
response = await client.post(
|
181
|
-
"https://api.openai.com/v1/realtime/sessions",
|
182
|
-
json={
|
183
|
-
"model": model,
|
184
|
-
"modalities": modalities,
|
185
|
-
"instructions": instructions,
|
186
|
-
"voice": voice,
|
187
|
-
"input_audio_format": input_audio_format,
|
188
|
-
"output_audio_format": output_audio_format,
|
189
|
-
},
|
190
|
-
headers={
|
191
|
-
"Authorization": f"Bearer {api_key}",
|
192
|
-
"Content-Type": "application/json",
|
193
|
-
"OpenAI-Beta": "realtime=v1"
|
194
|
-
}
|
195
|
-
)
|
196
|
-
|
197
|
-
if response.status_code == 200:
|
198
|
-
return response.json()
|
199
|
-
else:
|
200
|
-
raise Exception(
|
201
|
-
f"Failed to create realtime session: {response.text}")
|
202
|
-
except Exception as e:
|
203
|
-
print(f"Error creating realtime session: {str(e)}")
|
204
|
-
raise
|
205
|
-
|
206
|
-
async def realtime_audio_transcription(
|
207
|
-
self,
|
208
|
-
audio_generator: AsyncGenerator[bytes, None],
|
209
|
-
transcription_config: Optional[Dict[str, Any]] = None,
|
210
|
-
on_event: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
211
|
-
) -> AsyncGenerator[str, None]: # pragma: no cover
|
212
|
-
"""Stream real-time audio transcription using the Realtime API.
|
213
|
-
|
214
|
-
Args:
|
215
|
-
audio_generator: Async generator that yields audio chunks
|
216
|
-
transcription_config: Optional custom configuration for transcription
|
217
|
-
on_event: Optional callback function for handling raw events
|
218
|
-
|
219
|
-
Yields:
|
220
|
-
Transcription text as it becomes available
|
221
|
-
"""
|
222
|
-
# Create default transcription config if none provided
|
223
|
-
if transcription_config is None:
|
224
|
-
transcription_config = {
|
225
|
-
"input_audio_format": "pcm16",
|
226
|
-
"input_audio_transcription": {
|
227
|
-
"model": "gpt-4o-mini-transcribe"
|
228
|
-
},
|
229
|
-
"turn_detection": {
|
230
|
-
"type": "server_vad",
|
231
|
-
"threshold": 0.5,
|
232
|
-
"prefix_padding_ms": 300,
|
233
|
-
"silence_duration_ms": 200
|
234
|
-
}
|
235
|
-
}
|
236
|
-
|
237
|
-
try:
|
238
|
-
# Get the API key from the AsyncOpenAI client
|
239
|
-
api_key = self.client.api_key
|
240
|
-
|
241
|
-
# Create transcription session
|
242
|
-
async with httpx.AsyncClient() as client:
|
243
|
-
response = await client.post(
|
244
|
-
"https://api.openai.com/v1/realtime/transcription_sessions",
|
245
|
-
json=transcription_config,
|
246
|
-
headers={
|
247
|
-
"Authorization": f"Bearer {api_key}",
|
248
|
-
"Content-Type": "application/json",
|
249
|
-
"OpenAI-Beta": "realtime=v1"
|
250
|
-
}
|
251
|
-
)
|
252
|
-
|
253
|
-
if response.status_code != 200:
|
254
|
-
raise Exception(
|
255
|
-
f"Failed to create transcription session: {response.text}")
|
256
|
-
|
257
|
-
session = response.json()
|
258
|
-
client_secret = session["client_secret"]["value"]
|
259
|
-
|
260
|
-
# Connect to WebSocket with proper headers as dictionary
|
261
|
-
url = "wss://api.openai.com/v1/realtime?model=gpt-4o-mini-transcribe"
|
262
|
-
headers = {
|
263
|
-
"Authorization": f"Bearer {client_secret}",
|
264
|
-
"OpenAI-Beta": "realtime=v1"
|
265
|
-
}
|
266
|
-
|
267
|
-
async with websockets.connect(url, additional_headers=headers) as websocket:
|
268
|
-
# Handle WebSocket communication in the background
|
269
|
-
audio_task = None
|
270
|
-
|
271
|
-
async def send_audio():
|
272
|
-
try:
|
273
|
-
async for audio_chunk in audio_generator:
|
274
|
-
# Base64 encode the audio
|
275
|
-
import base64
|
276
|
-
encoded_audio = base64.b64encode(
|
277
|
-
audio_chunk).decode('utf-8')
|
278
|
-
|
279
|
-
# Send audio chunk
|
280
|
-
await websocket.send(json.dumps({
|
281
|
-
"type": "input_audio_buffer.append",
|
282
|
-
"audio": encoded_audio
|
283
|
-
}))
|
284
|
-
|
285
|
-
# Small delay to prevent flooding
|
286
|
-
await asyncio.sleep(0.05)
|
287
|
-
|
288
|
-
# Commit the audio buffer when done
|
289
|
-
await websocket.send(json.dumps({
|
290
|
-
"type": "input_audio_buffer.commit"
|
291
|
-
}))
|
292
|
-
except Exception as e:
|
293
|
-
print(f"Error sending audio: {str(e)}")
|
294
|
-
|
295
|
-
# Start sending audio in the background
|
296
|
-
audio_task = asyncio.create_task(send_audio())
|
297
|
-
|
298
|
-
# Process transcription events
|
299
|
-
try:
|
300
|
-
while True:
|
301
|
-
message = await websocket.recv()
|
302
|
-
event = json.loads(message)
|
303
|
-
|
304
|
-
if on_event:
|
305
|
-
# Check if on_event is a coroutine function and await it if needed
|
306
|
-
if asyncio.iscoroutinefunction(on_event):
|
307
|
-
await on_event(event)
|
308
|
-
else:
|
309
|
-
on_event(event)
|
310
|
-
|
311
|
-
# Extract transcription deltas
|
312
|
-
if event["type"] == "conversation.item.input_audio_transcription.delta":
|
313
|
-
yield event["delta"]
|
314
|
-
|
315
|
-
# Also handle completed transcriptions
|
316
|
-
elif event["type"] == "conversation.item.input_audio_transcription.completed":
|
317
|
-
yield event["transcript"]
|
318
|
-
break
|
319
|
-
finally:
|
320
|
-
# Clean up audio task if it's still running
|
321
|
-
if audio_task and not audio_task.done():
|
322
|
-
audio_task.cancel()
|
323
|
-
try:
|
324
|
-
await audio_task
|
325
|
-
except asyncio.CancelledError:
|
326
|
-
pass
|
327
|
-
|
328
|
-
except Exception as e:
|
329
|
-
print(f"Error in realtime audio transcription: {str(e)}")
|
330
|
-
import traceback
|
331
|
-
print(traceback.format_exc())
|
332
|
-
yield f"I apologize, but I encountered an error transcribing the audio: {str(e)}"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{solana_agent-24.1.0 → solana_agent-24.1.2}/solana_agent/interfaces/providers/data_storage.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|