webscout 8.2.1__py3-none-any.whl → 8.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

webscout/Local/llm.py CHANGED
@@ -37,29 +37,75 @@ class LLMInterface:
37
37
  raise ValueError(f"Model {model_name} not found. Please download it first.")
38
38
  self.llm = None
39
39
 
40
- def load_model(self, n_gpu_layers: Optional[int] = None, n_ctx: Optional[int] = None, verbose: bool = False) -> None:
40
+ def load_model(
41
+ self,
42
+ n_gpu_layers: Optional[int] = None,
43
+ n_ctx: Optional[int] = None,
44
+ verbose: bool = False,
45
+ n_threads: Optional[int] = None,
46
+ n_batch: Optional[int] = None,
47
+ use_mlock: bool = False,
48
+ use_mmap: bool = True,
49
+ rope_freq_base: Optional[float] = None,
50
+ rope_freq_scale: Optional[float] = None,
51
+ low_vram: bool = False,
52
+ ) -> None:
41
53
  """
42
54
  Load the model into memory.
43
55
  Args:
44
56
  n_gpu_layers (Optional[int]): Number of layers to offload to GPU (-1 for all).
45
57
  n_ctx (Optional[int]): Context size.
46
58
  verbose (bool): Whether to show verbose output.
59
+ n_threads (Optional[int]): Number of threads to use.
60
+ n_batch (Optional[int]): Batch size for prompt processing.
61
+ use_mlock (bool): Whether to use mlock to keep model in memory.
62
+ use_mmap (bool): Whether to use memory mapping for the model.
63
+ rope_freq_base (Optional[float]): RoPE base frequency.
64
+ rope_freq_scale (Optional[float]): RoPE frequency scaling factor.
65
+ low_vram (bool): Whether to optimize for low VRAM usage.
47
66
  Raises:
48
67
  ValueError: If model loading fails.
49
68
  """
69
+ # If model is already loaded, check if we need to reload with different parameters
70
+ if self.llm is not None:
71
+ if n_ctx is not None and hasattr(self.llm, 'n_ctx') and self.llm.n_ctx != n_ctx:
72
+ # Need to reload with new context size
73
+ self.llm = None
74
+ else:
75
+ # Model already loaded with compatible parameters
76
+ return
77
+
50
78
  if n_gpu_layers is None:
51
79
  n_gpu_layers = config.get("default_gpu_layers", -1)
52
80
  if n_ctx is None:
53
81
  n_ctx = config.get("default_context_length", 4096)
82
+
83
+ # Determine number of threads if not specified
84
+ if n_threads is None:
85
+ import multiprocessing
86
+ n_threads = max(1, multiprocessing.cpu_count() // 2)
87
+
54
88
  console.print(f"[bold blue]Loading model {self.model_name}...[/bold blue]")
55
89
  try:
56
90
  self.llm = Llama(
57
91
  model_path=self.model_path,
58
92
  n_gpu_layers=n_gpu_layers,
59
93
  n_ctx=n_ctx,
60
- verbose=verbose
94
+ verbose=verbose,
95
+ n_threads=n_threads,
96
+ n_batch=n_batch or 512,
97
+ use_mlock=use_mlock,
98
+ use_mmap=use_mmap,
99
+ rope_freq_base=rope_freq_base,
100
+ rope_freq_scale=rope_freq_scale,
101
+ low_vram=low_vram,
61
102
  )
103
+
62
104
  console.print(f"[bold green]Model {self.model_name} loaded successfully[/bold green]")
105
+ if verbose:
106
+ console.print(f"[dim]Using {n_threads} threads, context size: {n_ctx}[/dim]")
107
+ if n_gpu_layers and n_gpu_layers > 0:
108
+ console.print(f"[dim]GPU acceleration: {n_gpu_layers} layers offloaded to GPU[/dim]")
63
109
  except Exception as e:
64
110
  raise ValueError(f"Failed to load model from file: {self.model_path}\n{str(e)}")
65
111
 
@@ -71,6 +117,13 @@ class LLMInterface:
71
117
  top_p: float = 0.95,
72
118
  stream: bool = False,
73
119
  stop: Optional[List[str]] = None,
120
+ suffix: Optional[str] = None,
121
+ images: Optional[List[str]] = None,
122
+ system: Optional[str] = None,
123
+ template: Optional[str] = None,
124
+ context: Optional[List[int]] = None,
125
+ raw: bool = False,
126
+ format: Optional[Union[str, Dict[str, Any]]] = None,
74
127
  ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
75
128
  """
76
129
  Create a completion for the given prompt.
@@ -107,12 +160,14 @@ class LLMInterface:
107
160
 
108
161
  def create_chat_completion(
109
162
  self,
110
- messages: List[Dict[str, str]],
163
+ messages: List[Dict[str, Any]],
111
164
  max_tokens: int = 256,
112
165
  temperature: float = 0.7,
113
166
  top_p: float = 0.95,
114
167
  stream: bool = False,
115
168
  stop: Optional[List[str]] = None,
169
+ tools: Optional[List[Dict[str, Any]]] = None,
170
+ format: Optional[Union[str, Dict[str, Any]]] = None,
116
171
  ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
117
172
  """
118
173
  Create a chat completion for the given messages.
@@ -156,22 +211,26 @@ class LLMInterface:
156
211
 
157
212
  def stream_chat_completion(
158
213
  self,
159
- messages: List[Dict[str, str]],
214
+ messages: List[Dict[str, Any]],
160
215
  callback: Callable[[str], None],
161
216
  max_tokens: int = 256,
162
217
  temperature: float = 0.7,
163
218
  top_p: float = 0.95,
164
219
  stop: Optional[List[str]] = None,
220
+ tools: Optional[List[Dict[str, Any]]] = None,
221
+ format: Optional[Union[str, Dict[str, Any]]] = None,
165
222
  ) -> None:
166
223
  """
167
224
  Stream a chat completion with a callback for each token.
168
225
  Args:
169
- messages (List[Dict[str, str]]): List of chat messages.
226
+ messages (List[Dict[str, Any]]): List of chat messages.
170
227
  callback (Callable[[str], None]): Function to call with each token.
171
228
  max_tokens (int): Maximum number of tokens to generate.
172
229
  temperature (float): Sampling temperature.
173
230
  top_p (float): Top-p sampling.
174
231
  stop (Optional[List[str]]): List of strings to stop generation when encountered.
232
+ tools (Optional[List[Dict[str, Any]]]): List of tools for function calling.
233
+ format (Optional[Union[str, Dict[str, Any]]]): Format for structured output.
175
234
  """
176
235
  stream = self.create_chat_completion(
177
236
  messages=messages,
@@ -186,3 +245,43 @@ class LLMInterface:
186
245
  if "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]:
187
246
  content = chunk["choices"][0]["delta"]["content"]
188
247
  callback(content)
248
+
249
+ def create_embeddings(
250
+ self,
251
+ input: Union[str, List[str]],
252
+ truncate: bool = True,
253
+ ) -> Dict[str, Any]:
254
+ """
255
+ Generate embeddings for the given input.
256
+ Args:
257
+ input (Union[str, List[str]]): Text or list of texts to generate embeddings for.
258
+ truncate (bool): Whether to truncate the input to fit within context length.
259
+ Returns:
260
+ Dict[str, Any]: Embeddings response.
261
+ """
262
+ if self.llm is None:
263
+ self.load_model()
264
+
265
+ # Convert input to list if it's a string
266
+ if isinstance(input, str):
267
+ input_texts = [input]
268
+ else:
269
+ input_texts = input
270
+
271
+ # Generate embeddings for each input text
272
+ embeddings = []
273
+ for text in input_texts:
274
+ # Use llama-cpp-python's embedding method
275
+ embedding = self.llm.embed(text)
276
+ embeddings.append(embedding)
277
+
278
+ # Create response
279
+ response = {
280
+ "model": self.model_name,
281
+ "embeddings": embeddings,
282
+ "total_duration": 0, # Could be improved with actual timing
283
+ "load_duration": 0, # Could be improved with actual timing
284
+ "prompt_eval_count": len(input_texts)
285
+ }
286
+
287
+ return response
@@ -203,3 +203,51 @@ class ModelManager:
203
203
  return model_info.get("path")
204
204
  return None
205
205
  return info["path"]
206
+
207
+ def copy_model(self, source_model: str, destination_model: str) -> bool:
208
+ """
209
+ Copy a model to a new name.
210
+ Args:
211
+ source_model (str): Name of the source model.
212
+ destination_model (str): Name for the destination model.
213
+ Returns:
214
+ bool: True if copied successfully, False otherwise.
215
+ """
216
+ # Get source model info
217
+ source_info = self.get_model_info(source_model)
218
+ if not source_info or "path" not in source_info:
219
+ console.print(f"[bold red]Source model {source_model} not found[/bold red]")
220
+ return False
221
+
222
+ # Create destination directory
223
+ dest_dir = config.get_model_path(destination_model)
224
+ dest_dir.mkdir(exist_ok=True, parents=True)
225
+
226
+ # Copy the model file
227
+ source_path = Path(source_info["path"])
228
+ dest_path = dest_dir / source_path.name
229
+
230
+ try:
231
+ console.print(f"[bold blue]Copying model from {source_path} to {dest_path}...[/bold blue]")
232
+ shutil.copy2(source_path, dest_path)
233
+
234
+ # Create info file for the destination model
235
+ dest_info = source_info.copy()
236
+ dest_info["name"] = destination_model
237
+ dest_info["path"] = str(dest_path)
238
+ dest_info["copied_from"] = source_model
239
+ dest_info["copied_at"] = datetime.datetime.now().isoformat()
240
+
241
+ with open(dest_dir / "info.json", "w") as f:
242
+ json.dump(dest_info, f, indent=2)
243
+
244
+ console.print(f"[bold green]Model copied successfully to {dest_path}[/bold green]")
245
+ return True
246
+ except Exception as e:
247
+ console.print(f"[bold red]Error copying model: {str(e)}[/bold red]")
248
+ # Clean up if there was an error
249
+ if dest_path.exists():
250
+ dest_path.unlink()
251
+ if dest_dir.exists():
252
+ shutil.rmtree(dest_dir)
253
+ return False