webscout 8.2.1__py3-none-any.whl → 8.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIbase.py +144 -7
- webscout/Bard.py +5 -0
- webscout/Extra/tempmail/__init__.py +2 -0
- webscout/Extra/tempmail/base.py +6 -1
- webscout/Extra/tempmail/emailnator.py +84 -0
- webscout/Local/__init__.py +8 -2
- webscout/Local/cli.py +178 -0
- webscout/Local/llm.py +104 -5
- webscout/Local/model_manager.py +48 -0
- webscout/Local/server.py +547 -13
- webscout/Provider/Cloudflare.py +5 -0
- webscout/Provider/Gemini.py +2 -0
- webscout/Provider/OPENAI/e2b.py +159 -1
- webscout/Provider/OPENAI/textpollinations.py +90 -44
- webscout/Provider/OPENAI/toolbaz.py +4 -4
- webscout/Provider/TTS/__init__.py +1 -0
- webscout/Provider/TTS/base.py +159 -0
- webscout/Provider/TTS/deepgram.py +16 -16
- webscout/Provider/TTS/elevenlabs.py +5 -5
- webscout/Provider/TTS/gesserit.py +6 -5
- webscout/Provider/TTS/murfai.py +7 -7
- webscout/Provider/TTS/parler.py +6 -6
- webscout/Provider/TTS/speechma.py +22 -22
- webscout/Provider/TTS/streamElements.py +7 -7
- webscout/Provider/TextPollinationsAI.py +56 -41
- webscout/Provider/toolbaz.py +4 -4
- webscout/version.py +1 -1
- {webscout-8.2.1.dist-info → webscout-8.2.3.dist-info}/METADATA +1 -1
- {webscout-8.2.1.dist-info → webscout-8.2.3.dist-info}/RECORD +33 -31
- {webscout-8.2.1.dist-info → webscout-8.2.3.dist-info}/LICENSE.md +0 -0
- {webscout-8.2.1.dist-info → webscout-8.2.3.dist-info}/WHEEL +0 -0
- {webscout-8.2.1.dist-info → webscout-8.2.3.dist-info}/entry_points.txt +0 -0
- {webscout-8.2.1.dist-info → webscout-8.2.3.dist-info}/top_level.txt +0 -0
webscout/Local/llm.py
CHANGED
|
@@ -37,29 +37,75 @@ class LLMInterface:
|
|
|
37
37
|
raise ValueError(f"Model {model_name} not found. Please download it first.")
|
|
38
38
|
self.llm = None
|
|
39
39
|
|
|
40
|
-
def load_model(
|
|
40
|
+
def load_model(
|
|
41
|
+
self,
|
|
42
|
+
n_gpu_layers: Optional[int] = None,
|
|
43
|
+
n_ctx: Optional[int] = None,
|
|
44
|
+
verbose: bool = False,
|
|
45
|
+
n_threads: Optional[int] = None,
|
|
46
|
+
n_batch: Optional[int] = None,
|
|
47
|
+
use_mlock: bool = False,
|
|
48
|
+
use_mmap: bool = True,
|
|
49
|
+
rope_freq_base: Optional[float] = None,
|
|
50
|
+
rope_freq_scale: Optional[float] = None,
|
|
51
|
+
low_vram: bool = False,
|
|
52
|
+
) -> None:
|
|
41
53
|
"""
|
|
42
54
|
Load the model into memory.
|
|
43
55
|
Args:
|
|
44
56
|
n_gpu_layers (Optional[int]): Number of layers to offload to GPU (-1 for all).
|
|
45
57
|
n_ctx (Optional[int]): Context size.
|
|
46
58
|
verbose (bool): Whether to show verbose output.
|
|
59
|
+
n_threads (Optional[int]): Number of threads to use.
|
|
60
|
+
n_batch (Optional[int]): Batch size for prompt processing.
|
|
61
|
+
use_mlock (bool): Whether to use mlock to keep model in memory.
|
|
62
|
+
use_mmap (bool): Whether to use memory mapping for the model.
|
|
63
|
+
rope_freq_base (Optional[float]): RoPE base frequency.
|
|
64
|
+
rope_freq_scale (Optional[float]): RoPE frequency scaling factor.
|
|
65
|
+
low_vram (bool): Whether to optimize for low VRAM usage.
|
|
47
66
|
Raises:
|
|
48
67
|
ValueError: If model loading fails.
|
|
49
68
|
"""
|
|
69
|
+
# If model is already loaded, check if we need to reload with different parameters
|
|
70
|
+
if self.llm is not None:
|
|
71
|
+
if n_ctx is not None and hasattr(self.llm, 'n_ctx') and self.llm.n_ctx != n_ctx:
|
|
72
|
+
# Need to reload with new context size
|
|
73
|
+
self.llm = None
|
|
74
|
+
else:
|
|
75
|
+
# Model already loaded with compatible parameters
|
|
76
|
+
return
|
|
77
|
+
|
|
50
78
|
if n_gpu_layers is None:
|
|
51
79
|
n_gpu_layers = config.get("default_gpu_layers", -1)
|
|
52
80
|
if n_ctx is None:
|
|
53
81
|
n_ctx = config.get("default_context_length", 4096)
|
|
82
|
+
|
|
83
|
+
# Determine number of threads if not specified
|
|
84
|
+
if n_threads is None:
|
|
85
|
+
import multiprocessing
|
|
86
|
+
n_threads = max(1, multiprocessing.cpu_count() // 2)
|
|
87
|
+
|
|
54
88
|
console.print(f"[bold blue]Loading model {self.model_name}...[/bold blue]")
|
|
55
89
|
try:
|
|
56
90
|
self.llm = Llama(
|
|
57
91
|
model_path=self.model_path,
|
|
58
92
|
n_gpu_layers=n_gpu_layers,
|
|
59
93
|
n_ctx=n_ctx,
|
|
60
|
-
verbose=verbose
|
|
94
|
+
verbose=verbose,
|
|
95
|
+
n_threads=n_threads,
|
|
96
|
+
n_batch=n_batch or 512,
|
|
97
|
+
use_mlock=use_mlock,
|
|
98
|
+
use_mmap=use_mmap,
|
|
99
|
+
rope_freq_base=rope_freq_base,
|
|
100
|
+
rope_freq_scale=rope_freq_scale,
|
|
101
|
+
low_vram=low_vram,
|
|
61
102
|
)
|
|
103
|
+
|
|
62
104
|
console.print(f"[bold green]Model {self.model_name} loaded successfully[/bold green]")
|
|
105
|
+
if verbose:
|
|
106
|
+
console.print(f"[dim]Using {n_threads} threads, context size: {n_ctx}[/dim]")
|
|
107
|
+
if n_gpu_layers and n_gpu_layers > 0:
|
|
108
|
+
console.print(f"[dim]GPU acceleration: {n_gpu_layers} layers offloaded to GPU[/dim]")
|
|
63
109
|
except Exception as e:
|
|
64
110
|
raise ValueError(f"Failed to load model from file: {self.model_path}\n{str(e)}")
|
|
65
111
|
|
|
@@ -71,6 +117,13 @@ class LLMInterface:
|
|
|
71
117
|
top_p: float = 0.95,
|
|
72
118
|
stream: bool = False,
|
|
73
119
|
stop: Optional[List[str]] = None,
|
|
120
|
+
suffix: Optional[str] = None,
|
|
121
|
+
images: Optional[List[str]] = None,
|
|
122
|
+
system: Optional[str] = None,
|
|
123
|
+
template: Optional[str] = None,
|
|
124
|
+
context: Optional[List[int]] = None,
|
|
125
|
+
raw: bool = False,
|
|
126
|
+
format: Optional[Union[str, Dict[str, Any]]] = None,
|
|
74
127
|
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
75
128
|
"""
|
|
76
129
|
Create a completion for the given prompt.
|
|
@@ -107,12 +160,14 @@ class LLMInterface:
|
|
|
107
160
|
|
|
108
161
|
def create_chat_completion(
|
|
109
162
|
self,
|
|
110
|
-
messages: List[Dict[str,
|
|
163
|
+
messages: List[Dict[str, Any]],
|
|
111
164
|
max_tokens: int = 256,
|
|
112
165
|
temperature: float = 0.7,
|
|
113
166
|
top_p: float = 0.95,
|
|
114
167
|
stream: bool = False,
|
|
115
168
|
stop: Optional[List[str]] = None,
|
|
169
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
170
|
+
format: Optional[Union[str, Dict[str, Any]]] = None,
|
|
116
171
|
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
117
172
|
"""
|
|
118
173
|
Create a chat completion for the given messages.
|
|
@@ -156,22 +211,26 @@ class LLMInterface:
|
|
|
156
211
|
|
|
157
212
|
def stream_chat_completion(
|
|
158
213
|
self,
|
|
159
|
-
messages: List[Dict[str,
|
|
214
|
+
messages: List[Dict[str, Any]],
|
|
160
215
|
callback: Callable[[str], None],
|
|
161
216
|
max_tokens: int = 256,
|
|
162
217
|
temperature: float = 0.7,
|
|
163
218
|
top_p: float = 0.95,
|
|
164
219
|
stop: Optional[List[str]] = None,
|
|
220
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
221
|
+
format: Optional[Union[str, Dict[str, Any]]] = None,
|
|
165
222
|
) -> None:
|
|
166
223
|
"""
|
|
167
224
|
Stream a chat completion with a callback for each token.
|
|
168
225
|
Args:
|
|
169
|
-
messages (List[Dict[str,
|
|
226
|
+
messages (List[Dict[str, Any]]): List of chat messages.
|
|
170
227
|
callback (Callable[[str], None]): Function to call with each token.
|
|
171
228
|
max_tokens (int): Maximum number of tokens to generate.
|
|
172
229
|
temperature (float): Sampling temperature.
|
|
173
230
|
top_p (float): Top-p sampling.
|
|
174
231
|
stop (Optional[List[str]]): List of strings to stop generation when encountered.
|
|
232
|
+
tools (Optional[List[Dict[str, Any]]]): List of tools for function calling.
|
|
233
|
+
format (Optional[Union[str, Dict[str, Any]]]): Format for structured output.
|
|
175
234
|
"""
|
|
176
235
|
stream = self.create_chat_completion(
|
|
177
236
|
messages=messages,
|
|
@@ -186,3 +245,43 @@ class LLMInterface:
|
|
|
186
245
|
if "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]:
|
|
187
246
|
content = chunk["choices"][0]["delta"]["content"]
|
|
188
247
|
callback(content)
|
|
248
|
+
|
|
249
|
+
def create_embeddings(
|
|
250
|
+
self,
|
|
251
|
+
input: Union[str, List[str]],
|
|
252
|
+
truncate: bool = True,
|
|
253
|
+
) -> Dict[str, Any]:
|
|
254
|
+
"""
|
|
255
|
+
Generate embeddings for the given input.
|
|
256
|
+
Args:
|
|
257
|
+
input (Union[str, List[str]]): Text or list of texts to generate embeddings for.
|
|
258
|
+
truncate (bool): Whether to truncate the input to fit within context length.
|
|
259
|
+
Returns:
|
|
260
|
+
Dict[str, Any]: Embeddings response.
|
|
261
|
+
"""
|
|
262
|
+
if self.llm is None:
|
|
263
|
+
self.load_model()
|
|
264
|
+
|
|
265
|
+
# Convert input to list if it's a string
|
|
266
|
+
if isinstance(input, str):
|
|
267
|
+
input_texts = [input]
|
|
268
|
+
else:
|
|
269
|
+
input_texts = input
|
|
270
|
+
|
|
271
|
+
# Generate embeddings for each input text
|
|
272
|
+
embeddings = []
|
|
273
|
+
for text in input_texts:
|
|
274
|
+
# Use llama-cpp-python's embedding method
|
|
275
|
+
embedding = self.llm.embed(text)
|
|
276
|
+
embeddings.append(embedding)
|
|
277
|
+
|
|
278
|
+
# Create response
|
|
279
|
+
response = {
|
|
280
|
+
"model": self.model_name,
|
|
281
|
+
"embeddings": embeddings,
|
|
282
|
+
"total_duration": 0, # Could be improved with actual timing
|
|
283
|
+
"load_duration": 0, # Could be improved with actual timing
|
|
284
|
+
"prompt_eval_count": len(input_texts)
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return response
|
webscout/Local/model_manager.py
CHANGED
|
@@ -203,3 +203,51 @@ class ModelManager:
|
|
|
203
203
|
return model_info.get("path")
|
|
204
204
|
return None
|
|
205
205
|
return info["path"]
|
|
206
|
+
|
|
207
|
+
def copy_model(self, source_model: str, destination_model: str) -> bool:
|
|
208
|
+
"""
|
|
209
|
+
Copy a model to a new name.
|
|
210
|
+
Args:
|
|
211
|
+
source_model (str): Name of the source model.
|
|
212
|
+
destination_model (str): Name for the destination model.
|
|
213
|
+
Returns:
|
|
214
|
+
bool: True if copied successfully, False otherwise.
|
|
215
|
+
"""
|
|
216
|
+
# Get source model info
|
|
217
|
+
source_info = self.get_model_info(source_model)
|
|
218
|
+
if not source_info or "path" not in source_info:
|
|
219
|
+
console.print(f"[bold red]Source model {source_model} not found[/bold red]")
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
# Create destination directory
|
|
223
|
+
dest_dir = config.get_model_path(destination_model)
|
|
224
|
+
dest_dir.mkdir(exist_ok=True, parents=True)
|
|
225
|
+
|
|
226
|
+
# Copy the model file
|
|
227
|
+
source_path = Path(source_info["path"])
|
|
228
|
+
dest_path = dest_dir / source_path.name
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
console.print(f"[bold blue]Copying model from {source_path} to {dest_path}...[/bold blue]")
|
|
232
|
+
shutil.copy2(source_path, dest_path)
|
|
233
|
+
|
|
234
|
+
# Create info file for the destination model
|
|
235
|
+
dest_info = source_info.copy()
|
|
236
|
+
dest_info["name"] = destination_model
|
|
237
|
+
dest_info["path"] = str(dest_path)
|
|
238
|
+
dest_info["copied_from"] = source_model
|
|
239
|
+
dest_info["copied_at"] = datetime.datetime.now().isoformat()
|
|
240
|
+
|
|
241
|
+
with open(dest_dir / "info.json", "w") as f:
|
|
242
|
+
json.dump(dest_info, f, indent=2)
|
|
243
|
+
|
|
244
|
+
console.print(f"[bold green]Model copied successfully to {dest_path}[/bold green]")
|
|
245
|
+
return True
|
|
246
|
+
except Exception as e:
|
|
247
|
+
console.print(f"[bold red]Error copying model: {str(e)}[/bold red]")
|
|
248
|
+
# Clean up if there was an error
|
|
249
|
+
if dest_path.exists():
|
|
250
|
+
dest_path.unlink()
|
|
251
|
+
if dest_dir.exists():
|
|
252
|
+
shutil.rmtree(dest_dir)
|
|
253
|
+
return False
|