webscout 8.3.5__py3-none-any.whl → 8.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (159) hide show
  1. webscout/AIutel.py +2 -0
  2. webscout/Bard.py +12 -6
  3. webscout/DWEBS.py +66 -57
  4. webscout/Provider/{UNFINISHED → AISEARCH}/PERPLEXED_search.py +34 -74
  5. webscout/Provider/AISEARCH/__init__.py +18 -11
  6. webscout/Provider/AISEARCH/scira_search.py +3 -1
  7. webscout/Provider/Aitopia.py +2 -3
  8. webscout/Provider/Andi.py +3 -3
  9. webscout/Provider/ChatGPTClone.py +1 -1
  10. webscout/Provider/ChatSandbox.py +1 -0
  11. webscout/Provider/Cloudflare.py +1 -1
  12. webscout/Provider/Cohere.py +1 -0
  13. webscout/Provider/Deepinfra.py +13 -10
  14. webscout/Provider/ExaAI.py +1 -1
  15. webscout/Provider/ExaChat.py +1 -80
  16. webscout/Provider/Flowith.py +6 -1
  17. webscout/Provider/Gemini.py +7 -5
  18. webscout/Provider/GeminiProxy.py +1 -0
  19. webscout/Provider/GithubChat.py +4 -1
  20. webscout/Provider/Groq.py +1 -1
  21. webscout/Provider/HeckAI.py +8 -4
  22. webscout/Provider/Jadve.py +23 -38
  23. webscout/Provider/K2Think.py +308 -0
  24. webscout/Provider/Koboldai.py +8 -186
  25. webscout/Provider/LambdaChat.py +2 -4
  26. webscout/Provider/Nemotron.py +3 -4
  27. webscout/Provider/Netwrck.py +6 -8
  28. webscout/Provider/OLLAMA.py +1 -0
  29. webscout/Provider/OPENAI/Cloudflare.py +6 -7
  30. webscout/Provider/OPENAI/FalconH1.py +2 -7
  31. webscout/Provider/OPENAI/FreeGemini.py +6 -8
  32. webscout/Provider/OPENAI/{monochat.py → K2Think.py} +180 -77
  33. webscout/Provider/OPENAI/NEMOTRON.py +3 -6
  34. webscout/Provider/OPENAI/PI.py +5 -4
  35. webscout/Provider/OPENAI/Qwen3.py +2 -3
  36. webscout/Provider/OPENAI/README.md +2 -1
  37. webscout/Provider/OPENAI/TogetherAI.py +52 -57
  38. webscout/Provider/OPENAI/TwoAI.py +3 -4
  39. webscout/Provider/OPENAI/__init__.py +17 -56
  40. webscout/Provider/OPENAI/ai4chat.py +313 -303
  41. webscout/Provider/OPENAI/base.py +9 -29
  42. webscout/Provider/OPENAI/chatgpt.py +7 -2
  43. webscout/Provider/OPENAI/chatgptclone.py +4 -7
  44. webscout/Provider/OPENAI/chatsandbox.py +84 -59
  45. webscout/Provider/OPENAI/deepinfra.py +12 -6
  46. webscout/Provider/OPENAI/e2b.py +60 -8
  47. webscout/Provider/OPENAI/flowith.py +4 -3
  48. webscout/Provider/OPENAI/generate_api_key.py +48 -0
  49. webscout/Provider/OPENAI/heckai.py +4 -1
  50. webscout/Provider/OPENAI/netwrck.py +9 -12
  51. webscout/Provider/OPENAI/refact.py +274 -0
  52. webscout/Provider/OPENAI/scirachat.py +6 -0
  53. webscout/Provider/OPENAI/textpollinations.py +3 -14
  54. webscout/Provider/OPENAI/toolbaz.py +14 -10
  55. webscout/Provider/OpenGPT.py +1 -1
  56. webscout/Provider/Openai.py +150 -402
  57. webscout/Provider/PI.py +1 -0
  58. webscout/Provider/Perplexitylabs.py +1 -2
  59. webscout/Provider/QwenLM.py +107 -89
  60. webscout/Provider/STT/__init__.py +17 -2
  61. webscout/Provider/{Llama3.py → Sambanova.py} +9 -10
  62. webscout/Provider/StandardInput.py +1 -1
  63. webscout/Provider/TTI/__init__.py +18 -12
  64. webscout/Provider/TTI/bing.py +14 -2
  65. webscout/Provider/TTI/together.py +10 -9
  66. webscout/Provider/TTS/README.md +0 -1
  67. webscout/Provider/TTS/__init__.py +18 -11
  68. webscout/Provider/TTS/base.py +479 -159
  69. webscout/Provider/TTS/deepgram.py +409 -156
  70. webscout/Provider/TTS/elevenlabs.py +425 -111
  71. webscout/Provider/TTS/freetts.py +317 -140
  72. webscout/Provider/TTS/gesserit.py +192 -128
  73. webscout/Provider/TTS/murfai.py +248 -113
  74. webscout/Provider/TTS/openai_fm.py +347 -129
  75. webscout/Provider/TTS/speechma.py +620 -586
  76. webscout/Provider/TeachAnything.py +1 -0
  77. webscout/Provider/TextPollinationsAI.py +5 -15
  78. webscout/Provider/TogetherAI.py +136 -142
  79. webscout/Provider/TwoAI.py +53 -309
  80. webscout/Provider/TypliAI.py +2 -1
  81. webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +1 -1
  82. webscout/Provider/UNFINISHED/VercelAIGateway.py +339 -0
  83. webscout/Provider/Venice.py +2 -1
  84. webscout/Provider/VercelAI.py +1 -0
  85. webscout/Provider/WiseCat.py +2 -1
  86. webscout/Provider/WrDoChat.py +2 -1
  87. webscout/Provider/__init__.py +18 -174
  88. webscout/Provider/ai4chat.py +1 -1
  89. webscout/Provider/akashgpt.py +7 -10
  90. webscout/Provider/cerebras.py +194 -38
  91. webscout/Provider/chatglm.py +170 -83
  92. webscout/Provider/cleeai.py +1 -2
  93. webscout/Provider/deepseek_assistant.py +1 -1
  94. webscout/Provider/elmo.py +1 -1
  95. webscout/Provider/geminiapi.py +1 -1
  96. webscout/Provider/granite.py +1 -1
  97. webscout/Provider/hermes.py +1 -3
  98. webscout/Provider/julius.py +1 -0
  99. webscout/Provider/learnfastai.py +1 -1
  100. webscout/Provider/llama3mitril.py +1 -1
  101. webscout/Provider/llmchat.py +1 -1
  102. webscout/Provider/llmchatco.py +1 -1
  103. webscout/Provider/meta.py +3 -3
  104. webscout/Provider/oivscode.py +2 -2
  105. webscout/Provider/scira_chat.py +51 -124
  106. webscout/Provider/searchchat.py +1 -0
  107. webscout/Provider/sonus.py +1 -1
  108. webscout/Provider/toolbaz.py +15 -11
  109. webscout/Provider/turboseek.py +31 -22
  110. webscout/Provider/typefully.py +2 -1
  111. webscout/Provider/x0gpt.py +1 -0
  112. webscout/Provider/yep.py +2 -1
  113. webscout/conversation.py +22 -20
  114. webscout/sanitize.py +14 -10
  115. webscout/scout/README.md +20 -23
  116. webscout/scout/core/crawler.py +125 -38
  117. webscout/scout/core/scout.py +26 -5
  118. webscout/tempid.py +6 -0
  119. webscout/version.py +1 -1
  120. webscout/webscout_search.py +13 -6
  121. webscout/webscout_search_async.py +10 -8
  122. webscout/yep_search.py +13 -5
  123. {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/METADATA +3 -1
  124. {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/RECORD +132 -155
  125. webscout/Provider/AllenAI.py +0 -440
  126. webscout/Provider/Blackboxai.py +0 -793
  127. webscout/Provider/FreeGemini.py +0 -250
  128. webscout/Provider/Glider.py +0 -225
  129. webscout/Provider/Hunyuan.py +0 -283
  130. webscout/Provider/MCPCore.py +0 -322
  131. webscout/Provider/MiniMax.py +0 -207
  132. webscout/Provider/OPENAI/BLACKBOXAI.py +0 -1045
  133. webscout/Provider/OPENAI/MiniMax.py +0 -298
  134. webscout/Provider/OPENAI/autoproxy.py +0 -1067
  135. webscout/Provider/OPENAI/c4ai.py +0 -394
  136. webscout/Provider/OPENAI/copilot.py +0 -305
  137. webscout/Provider/OPENAI/glider.py +0 -330
  138. webscout/Provider/OPENAI/mcpcore.py +0 -431
  139. webscout/Provider/OPENAI/multichat.py +0 -378
  140. webscout/Provider/Reka.py +0 -214
  141. webscout/Provider/TTS/sthir.py +0 -94
  142. webscout/Provider/UNFINISHED/fetch_together_models.py +0 -90
  143. webscout/Provider/asksteve.py +0 -220
  144. webscout/Provider/copilot.py +0 -422
  145. webscout/Provider/freeaichat.py +0 -294
  146. webscout/Provider/koala.py +0 -182
  147. webscout/Provider/lmarena.py +0 -198
  148. webscout/Provider/monochat.py +0 -275
  149. webscout/Provider/multichat.py +0 -375
  150. webscout/Provider/scnet.py +0 -244
  151. webscout/Provider/talkai.py +0 -194
  152. /webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +0 -0
  153. /webscout/Provider/{Qodo.py → UNFINISHED/Qodo.py} +0 -0
  154. /webscout/Provider/{XenAI.py → UNFINISHED/XenAI.py} +0 -0
  155. /webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +0 -0
  156. {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/WHEEL +0 -0
  157. {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/entry_points.txt +0 -0
  158. {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/licenses/LICENSE.md +0 -0
  159. {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/top_level.txt +0 -0
@@ -1,159 +1,479 @@
1
- """
2
- Base class for TTS providers with common functionality.
3
- """
4
- import os
5
- import tempfile
6
- from pathlib import Path
7
- from typing import Generator, Optional
8
- from webscout.AIbase import TTSProvider
9
-
10
- class BaseTTSProvider(TTSProvider):
11
- """
12
- Base class for TTS providers with common functionality.
13
-
14
- This class implements common methods like save_audio and stream_audio
15
- that can be used by all TTS providers.
16
- """
17
-
18
- def __init__(self):
19
- """Initialize the base TTS provider."""
20
- self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
21
-
22
- def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
23
- """
24
- Save audio to a specific destination.
25
-
26
- Args:
27
- audio_file (str): Path to the source audio file
28
- destination (str, optional): Destination path. Defaults to current directory with timestamp.
29
- verbose (bool, optional): Whether to print debug information. Defaults to False.
30
-
31
- Returns:
32
- str: Path to the saved audio file
33
-
34
- Raises:
35
- FileNotFoundError: If the audio file doesn't exist
36
- """
37
- import shutil
38
- import time
39
-
40
- source_path = Path(audio_file)
41
-
42
- if not source_path.exists():
43
- raise FileNotFoundError(f"Audio file not found: {audio_file}")
44
-
45
- if destination is None:
46
- # Create a default destination with timestamp in current directory
47
- timestamp = int(time.time())
48
- destination = os.path.join(os.getcwd(), f"tts_audio_{timestamp}{source_path.suffix}")
49
-
50
- # Ensure the destination directory exists
51
- os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
52
-
53
- # Copy the file
54
- shutil.copy2(source_path, destination)
55
-
56
- if verbose:
57
- print(f"[debug] Audio saved to {destination}")
58
-
59
- return destination
60
-
61
- def stream_audio(self, text: str, voice: str = None, chunk_size: int = 1024, verbose: bool = False) -> Generator[bytes, None, None]:
62
- """
63
- Stream audio in chunks.
64
-
65
- Args:
66
- text (str): The text to convert to speech
67
- voice (str, optional): The voice to use. Defaults to provider's default voice.
68
- chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
69
- verbose (bool, optional): Whether to print debug information. Defaults to False.
70
-
71
- Yields:
72
- Generator[bytes, None, None]: Audio data chunks
73
- """
74
- # Generate the audio file
75
- audio_file = self.tts(text, voice=voice, verbose=verbose)
76
-
77
- # Stream the file in chunks
78
- with open(audio_file, 'rb') as f:
79
- while chunk := f.read(chunk_size):
80
- yield chunk
81
-
82
-
83
- class AsyncBaseTTSProvider:
84
- """
85
- Base class for async TTS providers with common functionality.
86
-
87
- This class implements common async methods like save_audio and stream_audio
88
- that can be used by all async TTS providers.
89
- """
90
-
91
- def __init__(self):
92
- """Initialize the async base TTS provider."""
93
- self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
94
-
95
- async def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
96
- """
97
- Save audio to a specific destination asynchronously.
98
-
99
- Args:
100
- audio_file (str): Path to the source audio file
101
- destination (str, optional): Destination path. Defaults to current directory with timestamp.
102
- verbose (bool, optional): Whether to print debug information. Defaults to False.
103
-
104
- Returns:
105
- str: Path to the saved audio file
106
-
107
- Raises:
108
- FileNotFoundError: If the audio file doesn't exist
109
- """
110
- import shutil
111
- import time
112
- import asyncio
113
-
114
- source_path = Path(audio_file)
115
-
116
- if not source_path.exists():
117
- raise FileNotFoundError(f"Audio file not found: {audio_file}")
118
-
119
- if destination is None:
120
- # Create a default destination with timestamp in current directory
121
- timestamp = int(time.time())
122
- destination = os.path.join(os.getcwd(), f"tts_audio_{timestamp}{source_path.suffix}")
123
-
124
- # Ensure the destination directory exists
125
- os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
126
-
127
- # Copy the file using asyncio to avoid blocking
128
- await asyncio.to_thread(shutil.copy2, source_path, destination)
129
-
130
- if verbose:
131
- print(f"[debug] Audio saved to {destination}")
132
-
133
- return destination
134
-
135
- async def stream_audio(self, text: str, voice: str = None, chunk_size: int = 1024, verbose: bool = False):
136
- """
137
- Stream audio in chunks asynchronously.
138
-
139
- Args:
140
- text (str): The text to convert to speech
141
- voice (str, optional): The voice to use. Defaults to provider's default voice.
142
- chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
143
- verbose (bool, optional): Whether to print debug information. Defaults to False.
144
-
145
- Yields:
146
- AsyncGenerator[bytes, None]: Audio data chunks
147
- """
148
- try:
149
- import aiofiles
150
- except ImportError:
151
- raise ImportError("The 'aiofiles' package is required for async streaming. Install it with 'pip install aiofiles'.")
152
-
153
- # Generate the audio file
154
- audio_file = await self.tts(text, voice=voice, verbose=verbose)
155
-
156
- # Stream the file in chunks
157
- async with aiofiles.open(audio_file, 'rb') as f:
158
- while chunk := await f.read(chunk_size):
159
- yield chunk
1
+ """
2
+ Base class for TTS providers with OpenAI-compatible functionality.
3
+ """
4
+ import os
5
+ import tempfile
6
+ from pathlib import Path
7
+ from typing import Generator, Optional, Dict, List, Union
8
+ from webscout.AIbase import TTSProvider
9
+
10
+ class BaseTTSProvider(TTSProvider):
11
+ """
12
+ Base class for TTS providers with OpenAI-compatible functionality.
13
+
14
+ This class implements common methods and follows OpenAI TTS API patterns
15
+ for speech generation, streaming, and audio handling.
16
+ """
17
+
18
+ # Supported models (can be overridden by subclasses)
19
+ SUPPORTED_MODELS = [
20
+ "gpt-4o-mini-tts", # Latest intelligent realtime model
21
+ "tts-1", # Lower latency model
22
+ "tts-1-hd" # Higher quality model
23
+ ]
24
+
25
+ # Supported voices (can be overridden by subclasses)
26
+ SUPPORTED_VOICES = [
27
+ "alloy", "ash", "ballad", "coral", "echo",
28
+ "fable", "nova", "onyx", "sage", "shimmer"
29
+ ]
30
+
31
+ # Supported output formats
32
+ SUPPORTED_FORMATS = [
33
+ "mp3", # Default format
34
+ "opus", # Internet streaming, low latency
35
+ "aac", # Digital audio compression
36
+ "flac", # Lossless compression
37
+ "wav", # Uncompressed, low latency
38
+ "pcm" # Raw samples, 24kHz 16-bit
39
+ ]
40
+
41
+ def __init__(self):
42
+ """Initialize the base TTS provider."""
43
+ self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
44
+ self.default_model = "gpt-4o-mini-tts"
45
+ self.default_voice = "coral"
46
+ self.default_format = "mp3"
47
+
48
+ def validate_model(self, model: str) -> str:
49
+ """
50
+ Validate and return the model name.
51
+
52
+ Args:
53
+ model (str): Model name to validate
54
+
55
+ Returns:
56
+ str: Validated model name
57
+
58
+ Raises:
59
+ ValueError: If model is not supported
60
+ """
61
+ # If provider doesn't support models, return the model as-is
62
+ if self.SUPPORTED_MODELS is None:
63
+ return model
64
+
65
+ if model not in self.SUPPORTED_MODELS:
66
+ raise ValueError(f"Model '{model}' not supported. Available models: {', '.join(self.SUPPORTED_MODELS)}")
67
+ return model
68
+
69
+ def validate_voice(self, voice: str) -> str:
70
+ """
71
+ Validate and return the voice name.
72
+
73
+ Args:
74
+ voice (str): Voice name to validate
75
+
76
+ Returns:
77
+ str: Validated voice name
78
+
79
+ Raises:
80
+ ValueError: If voice is not supported
81
+ """
82
+ if voice not in self.SUPPORTED_VOICES:
83
+ raise ValueError(f"Voice '{voice}' not supported. Available voices: {', '.join(self.SUPPORTED_VOICES)}")
84
+ return voice
85
+
86
+ def validate_format(self, response_format: str) -> str:
87
+ """
88
+ Validate and return the response format.
89
+
90
+ Args:
91
+ response_format (str): Response format to validate
92
+
93
+ Returns:
94
+ str: Validated response format
95
+
96
+ Raises:
97
+ ValueError: If format is not supported
98
+ """
99
+ if response_format not in self.SUPPORTED_FORMATS:
100
+ raise ValueError(f"Format '{response_format}' not supported. Available formats: {', '.join(self.SUPPORTED_FORMATS)}")
101
+ return response_format
102
+
103
+ def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
104
+ """
105
+ Save audio to a specific destination.
106
+
107
+ Args:
108
+ audio_file (str): Path to the source audio file
109
+ destination (str, optional): Destination path. Defaults to current directory with timestamp.
110
+ verbose (bool, optional): Whether to print debug information. Defaults to False.
111
+
112
+ Returns:
113
+ str: Path to the saved audio file
114
+
115
+ Raises:
116
+ FileNotFoundError: If the audio file doesn't exist
117
+ """
118
+ import shutil
119
+ import time
120
+
121
+ source_path = Path(audio_file)
122
+
123
+ if not source_path.exists():
124
+ raise FileNotFoundError(f"Audio file not found: {audio_file}")
125
+
126
+ if destination is None:
127
+ # Create a default destination with timestamp in current directory
128
+ timestamp = int(time.time())
129
+ destination = os.path.join(os.getcwd(), f"speech_{timestamp}{source_path.suffix}")
130
+
131
+ # Ensure the destination directory exists
132
+ os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
133
+
134
+ # Copy the file
135
+ shutil.copy2(source_path, destination)
136
+
137
+ if verbose:
138
+ print(f"[debug] Audio saved to {destination}")
139
+
140
+ return destination
141
+
142
+ def create_speech(
143
+ self,
144
+ input_text: str,
145
+ model: str = None,
146
+ voice: str = None,
147
+ response_format: str = None,
148
+ instructions: str = None,
149
+ verbose: bool = False
150
+ ) -> str:
151
+ """
152
+ Create speech from input text (OpenAI-compatible interface).
153
+
154
+ Args:
155
+ input_text (str): The text to convert to speech
156
+ model (str, optional): The TTS model to use
157
+ voice (str, optional): The voice to use
158
+ response_format (str, optional): Audio format (mp3, opus, aac, flac, wav, pcm)
159
+ instructions (str, optional): Voice instructions for controlling speech aspects
160
+ verbose (bool, optional): Whether to print debug information
161
+
162
+ Returns:
163
+ str: Path to the generated audio file
164
+ """
165
+ # Use defaults if not provided
166
+ model = model or self.default_model
167
+ voice = voice or self.default_voice
168
+ response_format = response_format or self.default_format
169
+
170
+ # Validate parameters
171
+ self.validate_model(model)
172
+ self.validate_voice(voice)
173
+ self.validate_format(response_format)
174
+
175
+ # Call the provider-specific TTS implementation
176
+ return self.tts(
177
+ text=input_text,
178
+ model=model,
179
+ voice=voice,
180
+ response_format=response_format,
181
+ instructions=instructions,
182
+ verbose=verbose
183
+ )
184
+
185
+ def stream_audio(
186
+ self,
187
+ input_text: str,
188
+ model: str = None,
189
+ voice: str = None,
190
+ response_format: str = None,
191
+ instructions: str = None,
192
+ chunk_size: int = 1024,
193
+ verbose: bool = False
194
+ ) -> Generator[bytes, None, None]:
195
+ """
196
+ Stream audio in chunks with OpenAI-compatible parameters.
197
+
198
+ Args:
199
+ input_text (str): The text to convert to speech
200
+ model (str, optional): The TTS model to use
201
+ voice (str, optional): The voice to use
202
+ response_format (str, optional): Audio format
203
+ instructions (str, optional): Voice instructions
204
+ chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
205
+ verbose (bool, optional): Whether to print debug information. Defaults to False.
206
+
207
+ Yields:
208
+ Generator[bytes, None, None]: Audio data chunks
209
+ """
210
+ # Generate the audio file using create_speech
211
+ audio_file = self.create_speech(
212
+ input_text=input_text,
213
+ model=model,
214
+ voice=voice,
215
+ response_format=response_format,
216
+ instructions=instructions,
217
+ verbose=verbose
218
+ )
219
+
220
+ # Stream the file in chunks
221
+ with open(audio_file, 'rb') as f:
222
+ while chunk := f.read(chunk_size):
223
+ yield chunk
224
+
225
+ def tts(self, text: str, **kwargs) -> str:
226
+ """
227
+ Abstract method for text-to-speech conversion.
228
+ Must be implemented by subclasses.
229
+
230
+ Args:
231
+ text (str): The text to convert to speech
232
+ **kwargs: Additional provider-specific parameters
233
+
234
+ Returns:
235
+ str: Path to the generated audio file
236
+
237
+ Raises:
238
+ NotImplementedError: If not implemented by subclass
239
+ """
240
+ raise NotImplementedError("Subclasses must implement the tts method")
241
+
242
+
243
+ class AsyncBaseTTSProvider:
244
+ """
245
+ Base class for async TTS providers with OpenAI-compatible functionality.
246
+
247
+ This class implements common async methods following OpenAI TTS API patterns
248
+ for speech generation, streaming, and audio handling.
249
+ """
250
+
251
+ # Supported models (can be overridden by subclasses)
252
+ SUPPORTED_MODELS = [
253
+ "gpt-4o-mini-tts", # Latest intelligent realtime model
254
+ "tts-1", # Lower latency model
255
+ "tts-1-hd" # Higher quality model
256
+ ]
257
+
258
+ # Supported voices (can be overridden by subclasses)
259
+ SUPPORTED_VOICES = [
260
+ "alloy", "ash", "ballad", "coral", "echo",
261
+ "fable", "nova", "onyx", "sage", "shimmer"
262
+ ]
263
+
264
+ # Supported output formats
265
+ SUPPORTED_FORMATS = [
266
+ "mp3", # Default format
267
+ "opus", # Internet streaming, low latency
268
+ "aac", # Digital audio compression
269
+ "flac", # Lossless compression
270
+ "wav", # Uncompressed, low latency
271
+ "pcm" # Raw samples, 24kHz 16-bit
272
+ ]
273
+
274
+ def __init__(self):
275
+ """Initialize the async base TTS provider."""
276
+ self.temp_dir = tempfile.mkdtemp(prefix="webscout_tts_")
277
+ self.default_model = "gpt-4o-mini-tts"
278
+ self.default_voice = "coral"
279
+ self.default_format = "mp3"
280
+
281
+ async def validate_model(self, model: str) -> str:
282
+ """
283
+ Validate and return the model name.
284
+
285
+ Args:
286
+ model (str): Model name to validate
287
+
288
+ Returns:
289
+ str: Validated model name
290
+
291
+ Raises:
292
+ ValueError: If model is not supported
293
+ """
294
+ # If provider doesn't support models, return the model as-is
295
+ if self.SUPPORTED_MODELS is None:
296
+ return model
297
+
298
+ if model not in self.SUPPORTED_MODELS:
299
+ raise ValueError(f"Model '{model}' not supported. Available models: {', '.join(self.SUPPORTED_MODELS)}")
300
+ return model
301
+
302
+ async def validate_voice(self, voice: str) -> str:
303
+ """
304
+ Validate and return the voice name.
305
+
306
+ Args:
307
+ voice (str): Voice name to validate
308
+
309
+ Returns:
310
+ str: Validated voice name
311
+
312
+ Raises:
313
+ ValueError: If voice is not supported
314
+ """
315
+ if voice not in self.SUPPORTED_VOICES:
316
+ raise ValueError(f"Voice '{voice}' not supported. Available voices: {', '.join(self.SUPPORTED_VOICES)}")
317
+ return voice
318
+
319
+ async def validate_format(self, response_format: str) -> str:
320
+ """
321
+ Validate and return the response format.
322
+
323
+ Args:
324
+ response_format (str): Response format to validate
325
+
326
+ Returns:
327
+ str: Validated response format
328
+
329
+ Raises:
330
+ ValueError: If format is not supported
331
+ """
332
+ if response_format not in self.SUPPORTED_FORMATS:
333
+ raise ValueError(f"Format '{response_format}' not supported. Available formats: {', '.join(self.SUPPORTED_FORMATS)}")
334
+ return response_format
335
+
336
+ async def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
337
+ """
338
+ Save audio to a specific destination asynchronously.
339
+
340
+ Args:
341
+ audio_file (str): Path to the source audio file
342
+ destination (str, optional): Destination path. Defaults to current directory with timestamp.
343
+ verbose (bool, optional): Whether to print debug information. Defaults to False.
344
+
345
+ Returns:
346
+ str: Path to the saved audio file
347
+
348
+ Raises:
349
+ FileNotFoundError: If the audio file doesn't exist
350
+ """
351
+ import shutil
352
+ import time
353
+ import asyncio
354
+
355
+ source_path = Path(audio_file)
356
+
357
+ if not source_path.exists():
358
+ raise FileNotFoundError(f"Audio file not found: {audio_file}")
359
+
360
+ if destination is None:
361
+ # Create a default destination with timestamp in current directory
362
+ timestamp = int(time.time())
363
+ destination = os.path.join(os.getcwd(), f"speech_{timestamp}{source_path.suffix}")
364
+
365
+ # Ensure the destination directory exists
366
+ os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
367
+
368
+ # Copy the file using asyncio to avoid blocking
369
+ await asyncio.to_thread(shutil.copy2, source_path, destination)
370
+
371
+ if verbose:
372
+ print(f"[debug] Audio saved to {destination}")
373
+
374
+ return destination
375
+
376
+ async def create_speech(
377
+ self,
378
+ input_text: str,
379
+ model: str = None,
380
+ voice: str = None,
381
+ response_format: str = None,
382
+ instructions: str = None,
383
+ verbose: bool = False
384
+ ) -> str:
385
+ """
386
+ Create speech from input text asynchronously (OpenAI-compatible interface).
387
+
388
+ Args:
389
+ input_text (str): The text to convert to speech
390
+ model (str, optional): The TTS model to use
391
+ voice (str, optional): The voice to use
392
+ response_format (str, optional): Audio format (mp3, opus, aac, flac, wav, pcm)
393
+ instructions (str, optional): Voice instructions for controlling speech aspects
394
+ verbose (bool, optional): Whether to print debug information
395
+
396
+ Returns:
397
+ str: Path to the generated audio file
398
+ """
399
+ # Use defaults if not provided
400
+ model = model or self.default_model
401
+ voice = voice or self.default_voice
402
+ response_format = response_format or self.default_format
403
+
404
+ # Validate parameters
405
+ await self.validate_model(model)
406
+ await self.validate_voice(voice)
407
+ await self.validate_format(response_format)
408
+
409
+ # Call the provider-specific TTS implementation
410
+ return await self.tts(
411
+ text=input_text,
412
+ model=model,
413
+ voice=voice,
414
+ response_format=response_format,
415
+ instructions=instructions,
416
+ verbose=verbose
417
+ )
418
+
419
+ async def stream_audio(
420
+ self,
421
+ input_text: str,
422
+ model: str = None,
423
+ voice: str = None,
424
+ response_format: str = None,
425
+ instructions: str = None,
426
+ chunk_size: int = 1024,
427
+ verbose: bool = False
428
+ ):
429
+ """
430
+ Stream audio in chunks asynchronously with OpenAI-compatible parameters.
431
+
432
+ Args:
433
+ input_text (str): The text to convert to speech
434
+ model (str, optional): The TTS model to use
435
+ voice (str, optional): The voice to use
436
+ response_format (str, optional): Audio format
437
+ instructions (str, optional): Voice instructions
438
+ chunk_size (int, optional): Size of audio chunks to yield. Defaults to 1024.
439
+ verbose (bool, optional): Whether to print debug information. Defaults to False.
440
+
441
+ Yields:
442
+ AsyncGenerator[bytes, None]: Audio data chunks
443
+ """
444
+ try:
445
+ import aiofiles
446
+ except ImportError:
447
+ raise ImportError("The 'aiofiles' package is required for async streaming. Install it with 'pip install aiofiles'.")
448
+
449
+ # Generate the audio file using create_speech
450
+ audio_file = await self.create_speech(
451
+ input_text=input_text,
452
+ model=model,
453
+ voice=voice,
454
+ response_format=response_format,
455
+ instructions=instructions,
456
+ verbose=verbose
457
+ )
458
+
459
+ # Stream the file in chunks
460
+ async with aiofiles.open(audio_file, 'rb') as f:
461
+ while chunk := await f.read(chunk_size):
462
+ yield chunk
463
+
464
+ async def tts(self, text: str, **kwargs) -> str:
465
+ """
466
+ Abstract async method for text-to-speech conversion.
467
+ Must be implemented by subclasses.
468
+
469
+ Args:
470
+ text (str): The text to convert to speech
471
+ **kwargs: Additional provider-specific parameters
472
+
473
+ Returns:
474
+ str: Path to the generated audio file
475
+
476
+ Raises:
477
+ NotImplementedError: If not implemented by subclass
478
+ """
479
+ raise NotImplementedError("Subclasses must implement the async tts method")