webscout 8.2.2__py3-none-any.whl → 8.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIbase.py +144 -7
- webscout/Provider/Cloudflare.py +5 -0
- webscout/Provider/OPENAI/textpollinations.py +90 -44
- webscout/Provider/OPENAI/toolbaz.py +4 -4
- webscout/Provider/TTS/__init__.py +1 -0
- webscout/Provider/TTS/base.py +159 -0
- webscout/Provider/TTS/deepgram.py +16 -16
- webscout/Provider/TTS/elevenlabs.py +5 -5
- webscout/Provider/TTS/gesserit.py +6 -5
- webscout/Provider/TTS/murfai.py +7 -7
- webscout/Provider/TTS/parler.py +6 -6
- webscout/Provider/TTS/speechma.py +22 -22
- webscout/Provider/TTS/streamElements.py +7 -7
- webscout/Provider/TextPollinationsAI.py +56 -41
- webscout/Provider/toolbaz.py +4 -4
- webscout/version.py +1 -1
- {webscout-8.2.2.dist-info → webscout-8.2.3.dist-info}/METADATA +1 -1
- {webscout-8.2.2.dist-info → webscout-8.2.3.dist-info}/RECORD +22 -21
- {webscout-8.2.2.dist-info → webscout-8.2.3.dist-info}/LICENSE.md +0 -0
- {webscout-8.2.2.dist-info → webscout-8.2.3.dist-info}/WHEEL +0 -0
- {webscout-8.2.2.dist-info → webscout-8.2.3.dist-info}/entry_points.txt +0 -0
- {webscout-8.2.2.dist-info → webscout-8.2.3.dist-info}/top_level.txt +0 -0
webscout/AIbase.py
CHANGED
|
@@ -68,15 +68,152 @@ class AsyncProvider(ABC):
|
|
|
68
68
|
class TTSProvider(ABC):
|
|
69
69
|
|
|
70
70
|
@abstractmethod
|
|
71
|
-
def tts(self, text: str) ->
|
|
71
|
+
def tts(self, text: str, voice: str = None, verbose: bool = False) -> str:
|
|
72
|
+
"""Convert text to speech and save to a temporary file.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
text (str): The text to convert to speech
|
|
76
|
+
voice (str, optional): The voice to use. Defaults to provider's default voice.
|
|
77
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
str: Path to the generated audio file
|
|
81
|
+
"""
|
|
72
82
|
raise NotImplementedError("Method needs to be implemented in subclass")
|
|
73
83
|
|
|
84
|
+
def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
|
|
85
|
+
"""Save audio to a specific destination.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
audio_file (str): Path to the source audio file
|
|
89
|
+
destination (str, optional): Destination path. Defaults to current directory with timestamp.
|
|
90
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
str: Path to the saved audio file
|
|
94
|
+
"""
|
|
95
|
+
import shutil
|
|
96
|
+
import os
|
|
97
|
+
from pathlib import Path
|
|
98
|
+
import time
|
|
99
|
+
|
|
100
|
+
source_path = Path(audio_file)
|
|
101
|
+
|
|
102
|
+
if not source_path.exists():
|
|
103
|
+
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
|
104
|
+
|
|
105
|
+
if destination is None:
|
|
106
|
+
# Create a default destination with timestamp in current directory
|
|
107
|
+
timestamp = int(time.time())
|
|
108
|
+
destination = os.path.join(os.getcwd(), f"tts_audio_{timestamp}{source_path.suffix}")
|
|
109
|
+
|
|
110
|
+
# Ensure the destination directory exists
|
|
111
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
112
|
+
|
|
113
|
+
# Copy the file
|
|
114
|
+
shutil.copy2(source_path, destination)
|
|
115
|
+
|
|
116
|
+
if verbose:
|
|
117
|
+
print(f"[debug] Audio saved to {destination}")
|
|
118
|
+
|
|
119
|
+
return destination
|
|
120
|
+
|
|
121
|
+
def stream_audio(self, text: str, voice: str = None, chunk_size: int = 1024, verbose: bool = False) -> Generator[bytes, None, None]:
|
|
122
|
+
"""Stream audio in chunks.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
text (str): The text to convert to speech
|
|
126
|
+
voice (str, optional): The voice to use. Defaults to provider's default voice.
|
|
127
|
+
chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
|
|
128
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
129
|
+
|
|
130
|
+
Yields:
|
|
131
|
+
Generator[bytes, None, None]: Audio data chunks
|
|
132
|
+
"""
|
|
133
|
+
# Generate the audio file
|
|
134
|
+
audio_file = self.tts(text, voice=voice, verbose=verbose)
|
|
135
|
+
|
|
136
|
+
# Stream the file in chunks
|
|
137
|
+
with open(audio_file, 'rb') as f:
|
|
138
|
+
while chunk := f.read(chunk_size):
|
|
139
|
+
yield chunk
|
|
140
|
+
|
|
74
141
|
class AsyncTTSProvider(ABC):
|
|
75
142
|
|
|
76
143
|
@abstractmethod
|
|
77
|
-
async def tts(self, text: str) ->
|
|
144
|
+
async def tts(self, text: str, voice: str = None, verbose: bool = False) -> str:
|
|
145
|
+
"""Convert text to speech and save to a temporary file asynchronously.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
text (str): The text to convert to speech
|
|
149
|
+
voice (str, optional): The voice to use. Defaults to provider's default voice.
|
|
150
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
str: Path to the generated audio file
|
|
154
|
+
"""
|
|
78
155
|
raise NotImplementedError("Method needs to be implemented in subclass")
|
|
79
156
|
|
|
157
|
+
async def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
|
|
158
|
+
"""Save audio to a specific destination asynchronously.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
audio_file (str): Path to the source audio file
|
|
162
|
+
destination (str, optional): Destination path. Defaults to current directory with timestamp.
|
|
163
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
str: Path to the saved audio file
|
|
167
|
+
"""
|
|
168
|
+
import shutil
|
|
169
|
+
import os
|
|
170
|
+
from pathlib import Path
|
|
171
|
+
import time
|
|
172
|
+
import asyncio
|
|
173
|
+
|
|
174
|
+
source_path = Path(audio_file)
|
|
175
|
+
|
|
176
|
+
if not source_path.exists():
|
|
177
|
+
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
|
178
|
+
|
|
179
|
+
if destination is None:
|
|
180
|
+
# Create a default destination with timestamp in current directory
|
|
181
|
+
timestamp = int(time.time())
|
|
182
|
+
destination = os.path.join(os.getcwd(), f"tts_audio_{timestamp}{source_path.suffix}")
|
|
183
|
+
|
|
184
|
+
# Ensure the destination directory exists
|
|
185
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
186
|
+
|
|
187
|
+
# Copy the file using asyncio to avoid blocking
|
|
188
|
+
await asyncio.to_thread(shutil.copy2, source_path, destination)
|
|
189
|
+
|
|
190
|
+
if verbose:
|
|
191
|
+
print(f"[debug] Audio saved to {destination}")
|
|
192
|
+
|
|
193
|
+
return destination
|
|
194
|
+
|
|
195
|
+
async def stream_audio(self, text: str, voice: str = None, chunk_size: int = 1024, verbose: bool = False) -> AsyncGenerator[bytes, None]:
|
|
196
|
+
"""Stream audio in chunks asynchronously.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
text (str): The text to convert to speech
|
|
200
|
+
voice (str, optional): The voice to use. Defaults to provider's default voice.
|
|
201
|
+
chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
|
|
202
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
203
|
+
|
|
204
|
+
Yields:
|
|
205
|
+
AsyncGenerator[bytes, None]: Audio data chunks
|
|
206
|
+
"""
|
|
207
|
+
import aiofiles
|
|
208
|
+
|
|
209
|
+
# Generate the audio file
|
|
210
|
+
audio_file = await self.tts(text, voice=voice, verbose=verbose)
|
|
211
|
+
|
|
212
|
+
# Stream the file in chunks
|
|
213
|
+
async with aiofiles.open(audio_file, 'rb') as f:
|
|
214
|
+
while chunk := await f.read(chunk_size):
|
|
215
|
+
yield chunk
|
|
216
|
+
|
|
80
217
|
class ImageProvider(ABC):
|
|
81
218
|
|
|
82
219
|
@abstractmethod
|
|
@@ -85,9 +222,9 @@ class ImageProvider(ABC):
|
|
|
85
222
|
|
|
86
223
|
@abstractmethod
|
|
87
224
|
def save(
|
|
88
|
-
self,
|
|
89
|
-
response: List[bytes],
|
|
90
|
-
name: Optional[str] = None,
|
|
225
|
+
self,
|
|
226
|
+
response: List[bytes],
|
|
227
|
+
name: Optional[str] = None,
|
|
91
228
|
dir: Optional[Union[str, Path]] = None
|
|
92
229
|
) -> List[str]:
|
|
93
230
|
raise NotImplementedError("Method needs to be implemented in subclass")
|
|
@@ -96,8 +233,8 @@ class AsyncImageProvider(ABC):
|
|
|
96
233
|
|
|
97
234
|
@abstractmethod
|
|
98
235
|
async def generate(
|
|
99
|
-
self,
|
|
100
|
-
prompt: str,
|
|
236
|
+
self,
|
|
237
|
+
prompt: str,
|
|
101
238
|
amount: int = 1
|
|
102
239
|
) -> Union[AsyncGenerator[bytes, None], List[bytes]]:
|
|
103
240
|
raise NotImplementedError("Method needs to be implemented in subclass")
|
webscout/Provider/Cloudflare.py
CHANGED
|
@@ -23,6 +23,7 @@ class Cloudflare(Provider):
|
|
|
23
23
|
"@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
|
|
24
24
|
"@cf/thebloke/discolm-german-7b-v1-awq",
|
|
25
25
|
"@cf/tiiuae/falcon-7b-instruct",
|
|
26
|
+
"@cf/google/gemma-3-12b-it",
|
|
26
27
|
"@hf/google/gemma-7b-it",
|
|
27
28
|
"@hf/nousresearch/hermes-2-pro-mistral-7b",
|
|
28
29
|
"@hf/thebloke/llama-2-13b-chat-awq",
|
|
@@ -37,12 +38,14 @@ class Cloudflare(Provider):
|
|
|
37
38
|
"@cf/meta/llama-3.2-1b-instruct",
|
|
38
39
|
"@cf/meta/llama-3.2-3b-instruct",
|
|
39
40
|
"@cf/meta/llama-3.3-70b-instruct-fp8-fast",
|
|
41
|
+
"@cf/meta/llama-4-scout-17b-16e-instruct",
|
|
40
42
|
"@cf/meta/llama-guard-3-8b",
|
|
41
43
|
"@hf/thebloke/llamaguard-7b-awq",
|
|
42
44
|
"@hf/meta-llama/meta-llama-3-8b-instruct",
|
|
43
45
|
"@cf/mistral/mistral-7b-instruct-v0.1",
|
|
44
46
|
"@hf/thebloke/mistral-7b-instruct-v0.1-awq",
|
|
45
47
|
"@hf/mistral/mistral-7b-instruct-v0.2",
|
|
48
|
+
"@cf/mistralai/mistral-small-3.1-24b-instruct",
|
|
46
49
|
"@hf/thebloke/neural-chat-7b-v3-1-awq",
|
|
47
50
|
"@cf/openchat/openchat-3.5-0106",
|
|
48
51
|
"@hf/thebloke/openhermes-2.5-mistral-7b-awq",
|
|
@@ -51,6 +54,8 @@ class Cloudflare(Provider):
|
|
|
51
54
|
"@cf/qwen/qwen1.5-1.8b-chat",
|
|
52
55
|
"@cf/qwen/qwen1.5-14b-chat-awq",
|
|
53
56
|
"@cf/qwen/qwen1.5-7b-chat-awq",
|
|
57
|
+
"@cf/qwen/qwen2.5-coder-32b-instruct",
|
|
58
|
+
"@cf/qwen/qwq-32b",
|
|
54
59
|
"@cf/defog/sqlcoder-7b-2",
|
|
55
60
|
"@hf/nexusflow/starling-lm-7b-beta",
|
|
56
61
|
"@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
|
|
@@ -8,7 +8,7 @@ from typing import List, Dict, Optional, Union, Generator, Any
|
|
|
8
8
|
from .base import OpenAICompatibleProvider, BaseChat, BaseCompletions
|
|
9
9
|
from .utils import (
|
|
10
10
|
ChatCompletionChunk, ChatCompletion, Choice, ChoiceDelta,
|
|
11
|
-
ChatCompletionMessage, CompletionUsage
|
|
11
|
+
ChatCompletionMessage, CompletionUsage, ToolCall, ToolFunction
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
# Import LitAgent for browser fingerprinting
|
|
@@ -32,6 +32,8 @@ class Completions(BaseCompletions):
|
|
|
32
32
|
stream: bool = False,
|
|
33
33
|
temperature: Optional[float] = None,
|
|
34
34
|
top_p: Optional[float] = None,
|
|
35
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
36
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
35
37
|
**kwargs: Any
|
|
36
38
|
) -> Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]]:
|
|
37
39
|
"""
|
|
@@ -49,6 +51,10 @@ class Completions(BaseCompletions):
|
|
|
49
51
|
payload["temperature"] = temperature
|
|
50
52
|
if top_p is not None:
|
|
51
53
|
payload["top_p"] = top_p
|
|
54
|
+
if tools is not None:
|
|
55
|
+
payload["tools"] = tools
|
|
56
|
+
if tool_choice is not None:
|
|
57
|
+
payload["tool_choice"] = tool_choice
|
|
52
58
|
|
|
53
59
|
payload.update(kwargs)
|
|
54
60
|
|
|
@@ -95,16 +101,39 @@ class Completions(BaseCompletions):
|
|
|
95
101
|
json_data = json.loads(line[6:])
|
|
96
102
|
if 'choices' in json_data and len(json_data['choices']) > 0:
|
|
97
103
|
choice = json_data['choices'][0]
|
|
98
|
-
if 'delta' in choice
|
|
99
|
-
|
|
100
|
-
|
|
104
|
+
if 'delta' in choice:
|
|
105
|
+
delta_obj = ChoiceDelta()
|
|
106
|
+
|
|
107
|
+
# Handle content in delta
|
|
108
|
+
if 'content' in choice['delta']:
|
|
109
|
+
content = choice['delta']['content']
|
|
110
|
+
full_response += content
|
|
111
|
+
delta_obj.content = content
|
|
112
|
+
|
|
113
|
+
# Handle tool calls in delta
|
|
114
|
+
if 'tool_calls' in choice['delta']:
|
|
115
|
+
tool_calls = []
|
|
116
|
+
for tool_call_data in choice['delta']['tool_calls']:
|
|
117
|
+
if 'function' in tool_call_data:
|
|
118
|
+
function = ToolFunction(
|
|
119
|
+
name=tool_call_data['function'].get('name', ''),
|
|
120
|
+
arguments=tool_call_data['function'].get('arguments', '')
|
|
121
|
+
)
|
|
122
|
+
tool_call = ToolCall(
|
|
123
|
+
id=tool_call_data.get('id', str(uuid.uuid4())),
|
|
124
|
+
type=tool_call_data.get('type', 'function'),
|
|
125
|
+
function=function
|
|
126
|
+
)
|
|
127
|
+
tool_calls.append(tool_call)
|
|
128
|
+
|
|
129
|
+
if tool_calls:
|
|
130
|
+
delta_obj.tool_calls = tool_calls
|
|
101
131
|
|
|
102
132
|
# Create and yield a chunk
|
|
103
|
-
|
|
104
|
-
choice = Choice(index=0, delta=delta, finish_reason=None)
|
|
133
|
+
choice_obj = Choice(index=0, delta=delta_obj, finish_reason=None)
|
|
105
134
|
chunk = ChatCompletionChunk(
|
|
106
135
|
id=request_id,
|
|
107
|
-
choices=[
|
|
136
|
+
choices=[choice_obj],
|
|
108
137
|
created=created_time,
|
|
109
138
|
model=model
|
|
110
139
|
)
|
|
@@ -155,18 +184,40 @@ class Completions(BaseCompletions):
|
|
|
155
184
|
|
|
156
185
|
# Extract the content
|
|
157
186
|
if 'choices' in response_json and len(response_json['choices']) > 0:
|
|
158
|
-
|
|
159
|
-
|
|
187
|
+
choice_data = response_json['choices'][0]
|
|
188
|
+
if 'message' in choice_data:
|
|
189
|
+
message_data = choice_data['message']
|
|
190
|
+
|
|
191
|
+
# Extract content
|
|
192
|
+
full_content = message_data.get('content', '')
|
|
193
|
+
|
|
194
|
+
# Create the completion message with potential tool calls
|
|
195
|
+
message = ChatCompletionMessage(role="assistant", content=full_content)
|
|
196
|
+
|
|
197
|
+
# Handle tool calls if present
|
|
198
|
+
if 'tool_calls' in message_data:
|
|
199
|
+
tool_calls = []
|
|
200
|
+
for tool_call_data in message_data['tool_calls']:
|
|
201
|
+
if 'function' in tool_call_data:
|
|
202
|
+
function = ToolFunction(
|
|
203
|
+
name=tool_call_data['function'].get('name', ''),
|
|
204
|
+
arguments=tool_call_data['function'].get('arguments', '')
|
|
205
|
+
)
|
|
206
|
+
tool_call = ToolCall(
|
|
207
|
+
id=tool_call_data.get('id', str(uuid.uuid4())),
|
|
208
|
+
type=tool_call_data.get('type', 'function'),
|
|
209
|
+
function=function
|
|
210
|
+
)
|
|
211
|
+
tool_calls.append(tool_call)
|
|
212
|
+
|
|
213
|
+
if tool_calls:
|
|
214
|
+
message.tool_calls = tool_calls
|
|
160
215
|
else:
|
|
161
|
-
|
|
216
|
+
# Fallback if no message is present
|
|
217
|
+
message = ChatCompletionMessage(role="assistant", content="")
|
|
162
218
|
else:
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
# Create the completion message
|
|
166
|
-
message = ChatCompletionMessage(
|
|
167
|
-
role="assistant",
|
|
168
|
-
content=full_content
|
|
169
|
-
)
|
|
219
|
+
# Fallback if no choices are present
|
|
220
|
+
message = ChatCompletionMessage(role="assistant", content="")
|
|
170
221
|
|
|
171
222
|
# Create the choice
|
|
172
223
|
choice = Choice(
|
|
@@ -217,33 +268,28 @@ class TextPollinations(OpenAICompatibleProvider):
|
|
|
217
268
|
"""
|
|
218
269
|
|
|
219
270
|
AVAILABLE_MODELS = [
|
|
220
|
-
"openai",
|
|
221
|
-
"openai-large",
|
|
222
|
-
"openai-reasoning",
|
|
223
|
-
"qwen-coder",
|
|
224
|
-
"llama",
|
|
225
|
-
"llamascout",
|
|
226
|
-
"mistral",
|
|
227
|
-
"unity",
|
|
228
|
-
"midijourney",
|
|
229
|
-
"rtist",
|
|
230
|
-
"searchgpt",
|
|
231
|
-
"evil",
|
|
232
|
-
"deepseek-reasoning",
|
|
233
|
-
"deepseek-reasoning-large",
|
|
234
|
-
"
|
|
235
|
-
"
|
|
236
|
-
"
|
|
237
|
-
"
|
|
238
|
-
"
|
|
239
|
-
"
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
"roblox-rp",
|
|
243
|
-
"deepseek",
|
|
244
|
-
"sur",
|
|
245
|
-
"llama-scaleway",
|
|
246
|
-
"openai-audio",
|
|
271
|
+
"openai", # OpenAI GPT-4.1-nano (Azure) - vision capable
|
|
272
|
+
"openai-large", # OpenAI GPT-4.1 mini (Azure) - vision capable
|
|
273
|
+
"openai-reasoning", # OpenAI o4-mini (Azure) - vision capable, reasoning
|
|
274
|
+
"qwen-coder", # Qwen 2.5 Coder 32B (Scaleway)
|
|
275
|
+
"llama", # Llama 3.3 70B (Cloudflare)
|
|
276
|
+
"llamascout", # Llama 4 Scout 17B (Cloudflare)
|
|
277
|
+
"mistral", # Mistral Small 3 (Scaleway) - vision capable
|
|
278
|
+
"unity", # Unity Mistral Large (Scaleway) - vision capable, uncensored
|
|
279
|
+
"midijourney", # Midijourney (Azure)
|
|
280
|
+
"rtist", # Rtist (Azure)
|
|
281
|
+
"searchgpt", # SearchGPT (Azure) - vision capable
|
|
282
|
+
"evil", # Evil (Scaleway) - vision capable, uncensored
|
|
283
|
+
"deepseek-reasoning", # DeepSeek-R1 Distill Qwen 32B (Cloudflare) - reasoning
|
|
284
|
+
"deepseek-reasoning-large", # DeepSeek R1 - Llama 70B (Scaleway) - reasoning
|
|
285
|
+
"phi", # Phi-4 Instruct (Cloudflare) - vision and audio capable
|
|
286
|
+
"llama-vision", # Llama 3.2 11B Vision (Cloudflare) - vision capable
|
|
287
|
+
"gemini", # gemini-2.5-flash-preview-04-17 (Azure) - vision and audio capable
|
|
288
|
+
"hormoz", # Hormoz 8b (Modal)
|
|
289
|
+
"hypnosis-tracy", # Hypnosis Tracy 7B (Azure) - audio capable
|
|
290
|
+
"deepseek", # DeepSeek-V3 (DeepSeek)
|
|
291
|
+
"sur", # Sur AI Assistant (Mistral) (Scaleway) - vision capable
|
|
292
|
+
"openai-audio", # OpenAI GPT-4o-audio-preview (Azure) - vision and audio capable
|
|
247
293
|
]
|
|
248
294
|
|
|
249
295
|
def __init__(
|
|
@@ -284,20 +284,20 @@ class Toolbaz(OpenAICompatibleProvider):
|
|
|
284
284
|
"""
|
|
285
285
|
|
|
286
286
|
AVAILABLE_MODELS = [
|
|
287
|
+
"gemini-2.5-flash",
|
|
287
288
|
"gemini-2.0-flash-thinking",
|
|
288
289
|
"gemini-2.0-flash",
|
|
289
290
|
"gemini-1.5-flash",
|
|
290
291
|
"gpt-4o-latest",
|
|
291
|
-
"gpt-4o-mini",
|
|
292
292
|
"gpt-4o",
|
|
293
293
|
"deepseek-r1",
|
|
294
|
+
"Llama-4-Maverick",
|
|
295
|
+
"Llama-4-Scout",
|
|
294
296
|
"Llama-3.3-70B",
|
|
295
|
-
"Llama-3.1-405B",
|
|
296
|
-
"Llama-3.1-70B",
|
|
297
297
|
"Qwen2.5-72B",
|
|
298
298
|
"Qwen2-72B",
|
|
299
299
|
"grok-2-1212",
|
|
300
|
-
"grok-beta",
|
|
300
|
+
"grok-3-beta",
|
|
301
301
|
"toolbaz_v3.5_pro",
|
|
302
302
|
"toolbaz_v3",
|
|
303
303
|
"mixtral_8x22b",
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base class for TTS providers with common functionality.
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Generator, Optional
|
|
8
|
+
from webscout.AIbase import TTSProvider
|
|
9
|
+
|
|
10
|
+
class BaseTTSProvider(TTSProvider):
|
|
11
|
+
"""
|
|
12
|
+
Base class for TTS providers with common functionality.
|
|
13
|
+
|
|
14
|
+
This class implements common methods like save_audio and stream_audio
|
|
15
|
+
that can be used by all TTS providers.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
"""Initialize the base TTS provider."""
|
|
20
|
+
self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
|
|
21
|
+
|
|
22
|
+
def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Save audio to a specific destination.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
audio_file (str): Path to the source audio file
|
|
28
|
+
destination (str, optional): Destination path. Defaults to current directory with timestamp.
|
|
29
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
str: Path to the saved audio file
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
FileNotFoundError: If the audio file doesn't exist
|
|
36
|
+
"""
|
|
37
|
+
import shutil
|
|
38
|
+
import time
|
|
39
|
+
|
|
40
|
+
source_path = Path(audio_file)
|
|
41
|
+
|
|
42
|
+
if not source_path.exists():
|
|
43
|
+
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
|
44
|
+
|
|
45
|
+
if destination is None:
|
|
46
|
+
# Create a default destination with timestamp in current directory
|
|
47
|
+
timestamp = int(time.time())
|
|
48
|
+
destination = os.path.join(os.getcwd(), f"tts_audio_{timestamp}{source_path.suffix}")
|
|
49
|
+
|
|
50
|
+
# Ensure the destination directory exists
|
|
51
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
52
|
+
|
|
53
|
+
# Copy the file
|
|
54
|
+
shutil.copy2(source_path, destination)
|
|
55
|
+
|
|
56
|
+
if verbose:
|
|
57
|
+
print(f"[debug] Audio saved to {destination}")
|
|
58
|
+
|
|
59
|
+
return destination
|
|
60
|
+
|
|
61
|
+
def stream_audio(self, text: str, voice: str = None, chunk_size: int = 1024, verbose: bool = False) -> Generator[bytes, None, None]:
|
|
62
|
+
"""
|
|
63
|
+
Stream audio in chunks.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
text (str): The text to convert to speech
|
|
67
|
+
voice (str, optional): The voice to use. Defaults to provider's default voice.
|
|
68
|
+
chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
|
|
69
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
70
|
+
|
|
71
|
+
Yields:
|
|
72
|
+
Generator[bytes, None, None]: Audio data chunks
|
|
73
|
+
"""
|
|
74
|
+
# Generate the audio file
|
|
75
|
+
audio_file = self.tts(text, voice=voice, verbose=verbose)
|
|
76
|
+
|
|
77
|
+
# Stream the file in chunks
|
|
78
|
+
with open(audio_file, 'rb') as f:
|
|
79
|
+
while chunk := f.read(chunk_size):
|
|
80
|
+
yield chunk
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class AsyncBaseTTSProvider:
|
|
84
|
+
"""
|
|
85
|
+
Base class for async TTS providers with common functionality.
|
|
86
|
+
|
|
87
|
+
This class implements common async methods like save_audio and stream_audio
|
|
88
|
+
that can be used by all async TTS providers.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self):
|
|
92
|
+
"""Initialize the async base TTS provider."""
|
|
93
|
+
self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
|
|
94
|
+
|
|
95
|
+
async def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Save audio to a specific destination asynchronously.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
audio_file (str): Path to the source audio file
|
|
101
|
+
destination (str, optional): Destination path. Defaults to current directory with timestamp.
|
|
102
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
str: Path to the saved audio file
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
FileNotFoundError: If the audio file doesn't exist
|
|
109
|
+
"""
|
|
110
|
+
import shutil
|
|
111
|
+
import time
|
|
112
|
+
import asyncio
|
|
113
|
+
|
|
114
|
+
source_path = Path(audio_file)
|
|
115
|
+
|
|
116
|
+
if not source_path.exists():
|
|
117
|
+
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
|
118
|
+
|
|
119
|
+
if destination is None:
|
|
120
|
+
# Create a default destination with timestamp in current directory
|
|
121
|
+
timestamp = int(time.time())
|
|
122
|
+
destination = os.path.join(os.getcwd(), f"tts_audio_{timestamp}{source_path.suffix}")
|
|
123
|
+
|
|
124
|
+
# Ensure the destination directory exists
|
|
125
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
126
|
+
|
|
127
|
+
# Copy the file using asyncio to avoid blocking
|
|
128
|
+
await asyncio.to_thread(shutil.copy2, source_path, destination)
|
|
129
|
+
|
|
130
|
+
if verbose:
|
|
131
|
+
print(f"[debug] Audio saved to {destination}")
|
|
132
|
+
|
|
133
|
+
return destination
|
|
134
|
+
|
|
135
|
+
async def stream_audio(self, text: str, voice: str = None, chunk_size: int = 1024, verbose: bool = False):
|
|
136
|
+
"""
|
|
137
|
+
Stream audio in chunks asynchronously.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
text (str): The text to convert to speech
|
|
141
|
+
voice (str, optional): The voice to use. Defaults to provider's default voice.
|
|
142
|
+
chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
|
|
143
|
+
verbose (bool, optional): Whether to print debug information. Defaults to False.
|
|
144
|
+
|
|
145
|
+
Yields:
|
|
146
|
+
AsyncGenerator[bytes, None]: Audio data chunks
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
import aiofiles
|
|
150
|
+
except ImportError:
|
|
151
|
+
raise ImportError("The 'aiofiles' package is required for async streaming. Install it with 'pip install aiofiles'.")
|
|
152
|
+
|
|
153
|
+
# Generate the audio file
|
|
154
|
+
audio_file = await self.tts(text, voice=voice, verbose=verbose)
|
|
155
|
+
|
|
156
|
+
# Stream the file in chunks
|
|
157
|
+
async with aiofiles.open(audio_file, 'rb') as f:
|
|
158
|
+
while chunk := await f.read(chunk_size):
|
|
159
|
+
yield chunk
|
|
@@ -5,12 +5,12 @@ import base64
|
|
|
5
5
|
import tempfile
|
|
6
6
|
from io import BytesIO
|
|
7
7
|
from webscout import exceptions
|
|
8
|
-
from webscout.AIbase import TTSProvider
|
|
9
8
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
9
|
from webscout.litagent import LitAgent
|
|
11
|
-
from . import utils
|
|
10
|
+
from . import utils
|
|
11
|
+
from .base import BaseTTSProvider
|
|
12
12
|
|
|
13
|
-
class DeepgramTTS(
|
|
13
|
+
class DeepgramTTS(BaseTTSProvider):
|
|
14
14
|
"""
|
|
15
15
|
Text-to-speech provider using the DeepgramTTS API.
|
|
16
16
|
"""
|
|
@@ -27,12 +27,12 @@ class DeepgramTTS(TTSProvider):
|
|
|
27
27
|
|
|
28
28
|
def __init__(self, timeout: int = 20, proxies: dict = None):
|
|
29
29
|
"""Initializes the DeepgramTTS TTS client."""
|
|
30
|
+
super().__init__()
|
|
30
31
|
self.session = requests.Session()
|
|
31
32
|
self.session.headers.update(self.headers)
|
|
32
33
|
if proxies:
|
|
33
34
|
self.session.proxies.update(proxies)
|
|
34
35
|
self.timeout = timeout
|
|
35
|
-
self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
|
|
36
36
|
|
|
37
37
|
def tts(self, text: str, voice: str = "Brian", verbose: bool = True) -> str:
|
|
38
38
|
"""
|
|
@@ -80,15 +80,15 @@ class DeepgramTTS(TTSProvider):
|
|
|
80
80
|
"""
|
|
81
81
|
max_retries = 3
|
|
82
82
|
retry_count = 0
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
while retry_count < max_retries:
|
|
85
85
|
try:
|
|
86
86
|
payload = {"text": part_text, "model": self.all_voices[voice]}
|
|
87
87
|
response = self.session.post(
|
|
88
|
-
url=url,
|
|
89
|
-
headers=self.headers,
|
|
90
|
-
json=payload,
|
|
91
|
-
stream=True,
|
|
88
|
+
url=url,
|
|
89
|
+
headers=self.headers,
|
|
90
|
+
json=payload,
|
|
91
|
+
stream=True,
|
|
92
92
|
timeout=self.timeout
|
|
93
93
|
)
|
|
94
94
|
response.raise_for_status()
|
|
@@ -99,29 +99,29 @@ class DeepgramTTS(TTSProvider):
|
|
|
99
99
|
if verbose:
|
|
100
100
|
print(f"[debug] Chunk {part_number} processed successfully")
|
|
101
101
|
return part_number, audio_data
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
if verbose:
|
|
104
104
|
print(f"[debug] No data received for chunk {part_number}. Attempt {retry_count + 1}/{max_retries}")
|
|
105
|
-
|
|
105
|
+
|
|
106
106
|
except requests.RequestException as e:
|
|
107
107
|
if verbose:
|
|
108
108
|
print(f"[debug] Error processing chunk {part_number}: {str(e)}. Attempt {retry_count + 1}/{max_retries}")
|
|
109
109
|
if retry_count == max_retries - 1:
|
|
110
110
|
raise
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
retry_count += 1
|
|
113
113
|
time.sleep(1)
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
raise RuntimeError(f"Failed to generate audio for chunk {part_number} after {max_retries} attempts")
|
|
116
116
|
|
|
117
117
|
try:
|
|
118
118
|
# Using ThreadPoolExecutor to handle requests concurrently
|
|
119
119
|
with ThreadPoolExecutor() as executor:
|
|
120
120
|
futures = {
|
|
121
|
-
executor.submit(generate_audio_for_chunk, sentence.strip(), chunk_num): chunk_num
|
|
121
|
+
executor.submit(generate_audio_for_chunk, sentence.strip(), chunk_num): chunk_num
|
|
122
122
|
for chunk_num, sentence in enumerate(sentences, start=1)
|
|
123
123
|
}
|
|
124
|
-
|
|
124
|
+
|
|
125
125
|
# Dictionary to store results with order preserved
|
|
126
126
|
audio_chunks = {}
|
|
127
127
|
|
|
@@ -152,5 +152,5 @@ if __name__ == "__main__":
|
|
|
152
152
|
text = "This is a test of the DeepgramTTS text-to-speech API. It supports multiple sentences. Let's see how it works!"
|
|
153
153
|
|
|
154
154
|
print("[debug] Generating audio...")
|
|
155
|
-
audio_file = deepgram.tts(text, voice="Asteria")
|
|
155
|
+
audio_file = deepgram.tts(text, voice="Asteria")
|
|
156
156
|
print(f"Audio saved to: {audio_file}")
|