webscout 8.1__py3-none-any.whl → 8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- inferno/__init__.py +6 -0
- inferno/__main__.py +9 -0
- inferno/cli.py +6 -0
- webscout/Local/__init__.py +6 -0
- webscout/Local/__main__.py +9 -0
- webscout/Local/api.py +576 -0
- webscout/Local/cli.py +338 -0
- webscout/Local/config.py +75 -0
- webscout/Local/llm.py +188 -0
- webscout/Local/model_manager.py +205 -0
- webscout/Local/server.py +187 -0
- webscout/Local/utils.py +93 -0
- webscout/Provider/AISEARCH/Perplexity.py +359 -0
- webscout/Provider/AISEARCH/__init__.py +2 -1
- webscout/Provider/AISEARCH/scira_search.py +8 -4
- webscout/Provider/ExaChat.py +18 -8
- webscout/Provider/GithubChat.py +5 -1
- webscout/Provider/Glider.py +4 -2
- webscout/Provider/OPENAI/__init__.py +8 -1
- webscout/Provider/OPENAI/chatgpt.py +549 -0
- webscout/Provider/OPENAI/exachat.py +20 -8
- webscout/Provider/OPENAI/glider.py +3 -1
- webscout/Provider/OPENAI/llmchatco.py +3 -1
- webscout/Provider/OPENAI/opkfc.py +488 -0
- webscout/Provider/OPENAI/scirachat.py +11 -7
- webscout/Provider/OPENAI/standardinput.py +425 -0
- webscout/Provider/OPENAI/textpollinations.py +285 -0
- webscout/Provider/OPENAI/toolbaz.py +405 -0
- webscout/Provider/OPENAI/uncovrAI.py +455 -0
- webscout/Provider/OPENAI/writecream.py +158 -0
- webscout/Provider/StandardInput.py +278 -0
- webscout/Provider/TextPollinationsAI.py +27 -28
- webscout/Provider/Writecream.py +211 -0
- webscout/Provider/WritingMate.py +197 -0
- webscout/Provider/Youchat.py +30 -26
- webscout/Provider/__init__.py +10 -2
- webscout/Provider/koala.py +2 -2
- webscout/Provider/llmchatco.py +5 -0
- webscout/Provider/scira_chat.py +5 -2
- webscout/Provider/scnet.py +187 -0
- webscout/Provider/toolbaz.py +320 -0
- webscout/Provider/uncovr.py +3 -3
- webscout/conversation.py +32 -32
- webscout/version.py +1 -1
- {webscout-8.1.dist-info → webscout-8.2.dist-info}/METADATA +54 -3
- {webscout-8.1.dist-info → webscout-8.2.dist-info}/RECORD +50 -25
- webscout-8.2.dist-info/entry_points.txt +5 -0
- {webscout-8.1.dist-info → webscout-8.2.dist-info}/top_level.txt +1 -0
- webscout-8.1.dist-info/entry_points.txt +0 -3
- {webscout-8.1.dist-info → webscout-8.2.dist-info}/LICENSE.md +0 -0
- {webscout-8.1.dist-info → webscout-8.2.dist-info}/WHEEL +0 -0
webscout/Local/cli.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for webscout.Local
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
from rich.prompt import Prompt
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from .model_manager import ModelManager
|
|
12
|
+
from .llm import LLMInterface
|
|
13
|
+
from .server import start_server
|
|
14
|
+
|
|
15
|
+
app: typer.Typer = typer.Typer(help="webscout.Local - A llama-cpp-python based LLM serving tool")
|
|
16
|
+
console: Console = Console()
|
|
17
|
+
|
|
18
|
+
model_manager: ModelManager = ModelManager()
|
|
19
|
+
|
|
20
|
+
@app.command("serve")
|
|
21
|
+
def run_model(
|
|
22
|
+
model_string: str = typer.Argument(..., help="Model to run (format: 'name', 'repo_id' or 'repo_id:filename')"),
|
|
23
|
+
host: Optional[str] = typer.Option(None, help="Host to bind the server to"),
|
|
24
|
+
port: Optional[int] = typer.Option(None, help="Port to bind the server to"),
|
|
25
|
+
) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Start a model server (downloads if needed).
|
|
28
|
+
"""
|
|
29
|
+
# First check if this is a filename that already exists
|
|
30
|
+
model_path = model_manager.get_model_path(model_string)
|
|
31
|
+
if model_path:
|
|
32
|
+
# This is a filename that exists, find the model name
|
|
33
|
+
for model_info in model_manager.list_models():
|
|
34
|
+
if model_info.get("filename") == model_string or model_info.get("path") == model_path:
|
|
35
|
+
model_name = model_info.get("name")
|
|
36
|
+
break
|
|
37
|
+
else:
|
|
38
|
+
# Fallback to using the string as model name
|
|
39
|
+
model_name = model_string
|
|
40
|
+
else:
|
|
41
|
+
# Parse the model string to see if it's a repo_id:filename format
|
|
42
|
+
repo_id, _ = model_manager.parse_model_string(model_string)
|
|
43
|
+
model_name = repo_id.split("/")[-1] if "/" in repo_id else repo_id
|
|
44
|
+
|
|
45
|
+
# Check if model exists, if not try to download it
|
|
46
|
+
if not model_manager.get_model_path(model_name):
|
|
47
|
+
console.print(f"[yellow]Model {model_name} not found locally. Attempting to download...[/yellow]")
|
|
48
|
+
try:
|
|
49
|
+
# We don't need to use the parsed values directly as download_model handles this
|
|
50
|
+
_ = model_manager.parse_model_string(model_string) # Just to validate the format
|
|
51
|
+
# Download the model
|
|
52
|
+
model_name, _ = model_manager.download_model(model_string)
|
|
53
|
+
console.print(f"[bold green]Model {model_name} downloaded successfully[/bold green]")
|
|
54
|
+
except Exception as e:
|
|
55
|
+
console.print(f"[bold red]Error downloading model: {str(e)}[/bold red]")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# Try to load the model to verify it works
|
|
59
|
+
try:
|
|
60
|
+
llm = LLMInterface(model_name)
|
|
61
|
+
llm.load_model(verbose=False)
|
|
62
|
+
console.print(f"[bold green]Model {model_name} loaded successfully[/bold green]")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
console.print(f"[bold red]Error loading model: {str(e)}[/bold red]")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
# Start the server
|
|
68
|
+
console.print(f"[bold blue]Starting webscout.Local server with model {model_name}...[/bold blue]")
|
|
69
|
+
start_server(host=host, port=port)
|
|
70
|
+
|
|
71
|
+
@app.command("pull")
|
|
72
|
+
def pull_model(
|
|
73
|
+
model_string: str = typer.Argument(..., help="Model to download (format: 'repo_id' or 'repo_id:filename')"),
|
|
74
|
+
) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Download a model from Hugging Face without running it.
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
model_name, model_path = model_manager.download_model(model_string)
|
|
80
|
+
console.print(f"[bold green]Model {model_name} downloaded successfully to {model_path}[/bold green]")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
console.print(f"[bold red]Error downloading model: {str(e)}[/bold red]")
|
|
83
|
+
|
|
84
|
+
@app.command("list")
|
|
85
|
+
def list_models() -> None:
|
|
86
|
+
"""
|
|
87
|
+
List downloaded models.
|
|
88
|
+
"""
|
|
89
|
+
models = model_manager.list_models()
|
|
90
|
+
|
|
91
|
+
if not models:
|
|
92
|
+
console.print("[yellow]No models found. Use 'webscout.Local pull' to download a model.[/yellow]")
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
table = Table(title="Downloaded Models")
|
|
96
|
+
table.add_column("Name", style="cyan")
|
|
97
|
+
table.add_column("Repository", style="green")
|
|
98
|
+
table.add_column("Filename", style="blue")
|
|
99
|
+
|
|
100
|
+
for model in models:
|
|
101
|
+
table.add_row(
|
|
102
|
+
model["name"],
|
|
103
|
+
model.get("repo_id", "Unknown"),
|
|
104
|
+
model.get("filename", "Unknown"),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
console.print(table)
|
|
108
|
+
|
|
109
|
+
@app.command(name="remove", help="Remove a downloaded model")
|
|
110
|
+
def remove_model(
|
|
111
|
+
model_string: str = typer.Argument(..., help="Name or filename of the model to remove"),
|
|
112
|
+
force: bool = typer.Option(False, "--force", "-f", help="Force removal without confirmation"),
|
|
113
|
+
) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Remove a downloaded model.
|
|
116
|
+
"""
|
|
117
|
+
# First check if this is a model name
|
|
118
|
+
model_info = model_manager.get_model_info(model_string)
|
|
119
|
+
|
|
120
|
+
# If not found by name, check if it's a filename
|
|
121
|
+
if not model_info:
|
|
122
|
+
for info in model_manager.list_models():
|
|
123
|
+
if info.get("filename") == model_string:
|
|
124
|
+
model_info = info
|
|
125
|
+
model_string = info["name"]
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
if not model_info:
|
|
129
|
+
console.print(f"[yellow]Model {model_string} not found.[/yellow]")
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
if not force:
|
|
133
|
+
confirm = Prompt.ask(
|
|
134
|
+
f"Are you sure you want to remove model {model_string}?",
|
|
135
|
+
choices=["y", "n"],
|
|
136
|
+
default="n",
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if confirm.lower() != "y":
|
|
140
|
+
console.print("[yellow]Operation cancelled.[/yellow]")
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
if model_manager.remove_model(model_string):
|
|
144
|
+
console.print(f"[bold green]Model {model_string} removed successfully[/bold green]")
|
|
145
|
+
else:
|
|
146
|
+
console.print(f"[bold red]Error removing model {model_string}[/bold red]")
|
|
147
|
+
|
|
148
|
+
@app.command("run")
|
|
149
|
+
def chat(
|
|
150
|
+
model_string: str = typer.Argument(..., help="Name or filename of the model to chat with"),
|
|
151
|
+
) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Interactive chat with a model.
|
|
154
|
+
"""
|
|
155
|
+
# First check if this is a filename that already exists
|
|
156
|
+
model_path = model_manager.get_model_path(model_string)
|
|
157
|
+
if model_path:
|
|
158
|
+
# This is a filename that exists, find the model name
|
|
159
|
+
for model_info in model_manager.list_models():
|
|
160
|
+
if model_info.get("filename") == model_string or model_info.get("path") == model_path:
|
|
161
|
+
model_name = model_info.get("name")
|
|
162
|
+
break
|
|
163
|
+
else:
|
|
164
|
+
# Fallback to using the string as model name
|
|
165
|
+
model_name = model_string
|
|
166
|
+
else:
|
|
167
|
+
# Use the string as model name
|
|
168
|
+
model_name = model_string
|
|
169
|
+
|
|
170
|
+
# Check if model exists, if not try to download it
|
|
171
|
+
if not model_manager.get_model_path(model_name):
|
|
172
|
+
console.print(f"[yellow]Model {model_name} not found locally. Attempting to download...[/yellow]")
|
|
173
|
+
try:
|
|
174
|
+
# Parse the model string to see if it's a repo_id:filename format
|
|
175
|
+
# We don't need to use the parsed values directly as download_model handles this
|
|
176
|
+
_ = model_manager.parse_model_string(model_string) # Just to validate the format
|
|
177
|
+
# Download the model
|
|
178
|
+
model_name, _ = model_manager.download_model(model_string)
|
|
179
|
+
console.print(f"[bold green]Model {model_name} downloaded successfully[/bold green]")
|
|
180
|
+
except Exception as e:
|
|
181
|
+
console.print(f"[bold red]Error downloading model: {str(e)}[/bold red]")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
# Load the model
|
|
185
|
+
try:
|
|
186
|
+
llm = LLMInterface(model_name)
|
|
187
|
+
llm.load_model(verbose=False)
|
|
188
|
+
except Exception as e:
|
|
189
|
+
console.print(f"[bold red]Error loading model: {str(e)}[/bold red]")
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
console.print(f"[bold green]Chat with {model_name}. Type '/help' for available commands or '/bye' to exit.[bold green]")
|
|
193
|
+
|
|
194
|
+
# Chat history
|
|
195
|
+
messages = []
|
|
196
|
+
system_prompt = None
|
|
197
|
+
|
|
198
|
+
# Initialize with empty system prompt
|
|
199
|
+
messages.append({"role": "system", "content": ""})
|
|
200
|
+
|
|
201
|
+
# Define help text
|
|
202
|
+
help_text = """
|
|
203
|
+
Available commands:
|
|
204
|
+
/help or /? - Show this help message
|
|
205
|
+
/bye - Exit the chat
|
|
206
|
+
/set system <prompt> - Set the system prompt
|
|
207
|
+
/set context <size> - Set context window size (default: 4096)
|
|
208
|
+
/clear or /cls - Clear the terminal screen
|
|
209
|
+
/reset - Reset all settings
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
while True:
|
|
213
|
+
# Get user input
|
|
214
|
+
user_input = input("\n> ")
|
|
215
|
+
|
|
216
|
+
# Handle commands
|
|
217
|
+
if user_input.startswith("/"):
|
|
218
|
+
cmd_parts = user_input.split(maxsplit=2)
|
|
219
|
+
cmd = cmd_parts[0].lower()
|
|
220
|
+
|
|
221
|
+
if cmd == "/bye" or user_input.lower() in ["exit", "quit"]:
|
|
222
|
+
console.print("[yellow]Goodbye![/yellow]")
|
|
223
|
+
break
|
|
224
|
+
|
|
225
|
+
elif cmd == "/help" or cmd == "/?":
|
|
226
|
+
console.print(help_text)
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
elif cmd == "/clear" or cmd == "/cls":
|
|
230
|
+
# Do not clear history, just clear the terminal screen
|
|
231
|
+
import os
|
|
232
|
+
os.system('cls' if os.name == 'nt' else 'clear')
|
|
233
|
+
console.print(f"[bold green]Chat with {model_name}. Type '/help' for available commands or '/bye' to exit.[/bold green]")
|
|
234
|
+
console.print("[yellow]Screen cleared. Chat history preserved.[/yellow]")
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
elif cmd == "/reset":
|
|
238
|
+
messages = [{"role": "system", "content": ""}]
|
|
239
|
+
system_prompt = None
|
|
240
|
+
console.print("[yellow]All settings reset.[/yellow]")
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
elif cmd == "/set" and len(cmd_parts) >= 2:
|
|
244
|
+
if len(cmd_parts) < 3:
|
|
245
|
+
console.print("[red]Error: Missing value for setting[/red]")
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
setting = cmd_parts[1].lower()
|
|
249
|
+
value = cmd_parts[2]
|
|
250
|
+
|
|
251
|
+
if setting == "system":
|
|
252
|
+
# Remove quotes if present
|
|
253
|
+
if value.startswith('"') and value.endswith('"'):
|
|
254
|
+
value = value[1:-1]
|
|
255
|
+
|
|
256
|
+
system_prompt = value
|
|
257
|
+
# Update system message
|
|
258
|
+
if messages and messages[0].get("role") == "system":
|
|
259
|
+
messages[0]["content"] = system_prompt
|
|
260
|
+
else:
|
|
261
|
+
# Clear messages and add system prompt
|
|
262
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
263
|
+
|
|
264
|
+
# Print confirmation that it's been applied
|
|
265
|
+
console.print(f"[yellow]System prompt set to:[/yellow]")
|
|
266
|
+
console.print(f"[cyan]\"{system_prompt}\"[/cyan]")
|
|
267
|
+
console.print(f"[green]System prompt applied. Next responses will follow this instruction.[/green]")
|
|
268
|
+
|
|
269
|
+
# Force a test message to ensure the system prompt is applied
|
|
270
|
+
test_messages = messages.copy()
|
|
271
|
+
test_messages.append({"role": "user", "content": "Say 'System prompt active'."})
|
|
272
|
+
|
|
273
|
+
# Test if the system prompt is working
|
|
274
|
+
console.print("[dim]Testing system prompt...[/dim]")
|
|
275
|
+
response = llm.create_chat_completion(
|
|
276
|
+
messages=test_messages,
|
|
277
|
+
stream=False,
|
|
278
|
+
max_tokens=20
|
|
279
|
+
)
|
|
280
|
+
console.print("[dim]System prompt test complete.[/dim]")
|
|
281
|
+
elif setting == "context":
|
|
282
|
+
try:
|
|
283
|
+
context_size = int(value)
|
|
284
|
+
# Reload the model with new context size
|
|
285
|
+
console.print(f"[yellow]Reloading model with context size: {context_size}...[/yellow]")
|
|
286
|
+
llm.load_model(n_ctx=context_size, verbose=False)
|
|
287
|
+
console.print(f"[green]Context size set to: {context_size}[/green]")
|
|
288
|
+
except ValueError:
|
|
289
|
+
console.print(f"[red]Invalid context size: {value}. Must be an integer.[/red]")
|
|
290
|
+
else:
|
|
291
|
+
console.print(f"[red]Unknown setting: {setting}[/red]")
|
|
292
|
+
continue
|
|
293
|
+
else:
|
|
294
|
+
console.print(f"[red]Unknown command: {cmd}[/red]")
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
# Add user message to history
|
|
298
|
+
messages.append({"role": "user", "content": user_input})
|
|
299
|
+
|
|
300
|
+
# Generate response
|
|
301
|
+
console.print("\n") # Add extra spacing between user input and response
|
|
302
|
+
|
|
303
|
+
# Use a buffer to collect the response
|
|
304
|
+
response_buffer = ""
|
|
305
|
+
|
|
306
|
+
def print_token(token):
|
|
307
|
+
nonlocal response_buffer
|
|
308
|
+
response_buffer += token
|
|
309
|
+
console.print(token, end="", highlight=False)
|
|
310
|
+
|
|
311
|
+
llm.stream_chat_completion(
|
|
312
|
+
messages=messages,
|
|
313
|
+
callback=print_token,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Get the full response to add to history
|
|
317
|
+
response = llm.create_chat_completion(
|
|
318
|
+
messages=messages,
|
|
319
|
+
stream=False,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
assistant_message = response["choices"][0]["message"]["content"]
|
|
323
|
+
messages.append({"role": "assistant", "content": assistant_message})
|
|
324
|
+
|
|
325
|
+
# Add extra spacing after the response
|
|
326
|
+
console.print("")
|
|
327
|
+
|
|
328
|
+
@app.command("version")
|
|
329
|
+
def version() -> None:
|
|
330
|
+
"""
|
|
331
|
+
Show version information.
|
|
332
|
+
"""
|
|
333
|
+
from webscout.Local import __version__
|
|
334
|
+
console.print(f"[bold]webscout.Local[/bold] version [cyan]{__version__}[/cyan]")
|
|
335
|
+
console.print("A llama-cpp-python based LLM serving tool")
|
|
336
|
+
|
|
337
|
+
if __name__ == "__main__":
|
|
338
|
+
app()
|
webscout/Local/config.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management for webscout
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, Any, Optional, List
|
|
9
|
+
|
|
10
|
+
# Default configuration
|
|
11
|
+
default_config: Dict[str, Any] = {
|
|
12
|
+
"models_dir": "~/.webscout/models",
|
|
13
|
+
"api_host": "127.0.0.1",
|
|
14
|
+
"api_port": 8000,
|
|
15
|
+
"default_context_length": 4096,
|
|
16
|
+
"default_gpu_layers": -1, # -1 means use all available GPU layers
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
class Config:
|
|
20
|
+
"""
|
|
21
|
+
Configuration manager for webscout.
|
|
22
|
+
Handles loading, saving, and accessing configuration values.
|
|
23
|
+
"""
|
|
24
|
+
config_dir: Path
|
|
25
|
+
config_file: Path
|
|
26
|
+
models_dir: Path
|
|
27
|
+
config: Dict[str, Any]
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
self.config_dir = Path(os.path.expanduser("~/.webscout"))
|
|
31
|
+
self.config_file = self.config_dir / "config.json"
|
|
32
|
+
self.models_dir = Path(os.path.expanduser(default_config["models_dir"]))
|
|
33
|
+
self._ensure_dirs()
|
|
34
|
+
self._load_config()
|
|
35
|
+
|
|
36
|
+
def _ensure_dirs(self) -> None:
|
|
37
|
+
"""Ensure configuration and models directories exist."""
|
|
38
|
+
self.config_dir.mkdir(exist_ok=True, parents=True)
|
|
39
|
+
self.models_dir.mkdir(exist_ok=True, parents=True)
|
|
40
|
+
|
|
41
|
+
def _load_config(self) -> None:
|
|
42
|
+
"""Load configuration from file or create default."""
|
|
43
|
+
if not self.config_file.exists():
|
|
44
|
+
self._save_config(default_config)
|
|
45
|
+
self.config = default_config.copy()
|
|
46
|
+
else:
|
|
47
|
+
with open(self.config_file, "r") as f:
|
|
48
|
+
self.config = json.load(f)
|
|
49
|
+
|
|
50
|
+
def _save_config(self, config: Dict[str, Any]) -> None:
|
|
51
|
+
"""Save configuration to file."""
|
|
52
|
+
with open(self.config_file, "w") as f:
|
|
53
|
+
json.dump(config, f, indent=2)
|
|
54
|
+
|
|
55
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
56
|
+
"""Get configuration value by key."""
|
|
57
|
+
return self.config.get(key, default)
|
|
58
|
+
|
|
59
|
+
def set(self, key: str, value: Any) -> None:
|
|
60
|
+
"""Set configuration value by key."""
|
|
61
|
+
self.config[key] = value
|
|
62
|
+
self._save_config(self.config)
|
|
63
|
+
|
|
64
|
+
def get_model_path(self, model_name: str) -> Path:
|
|
65
|
+
"""Get the path to a model directory by model name."""
|
|
66
|
+
return self.models_dir / model_name
|
|
67
|
+
|
|
68
|
+
def list_models(self) -> List[str]:
|
|
69
|
+
"""List all downloaded model names."""
|
|
70
|
+
if not self.models_dir.exists():
|
|
71
|
+
return []
|
|
72
|
+
return [d.name for d in self.models_dir.iterdir() if d.is_dir() and ":" not in d.name]
|
|
73
|
+
|
|
74
|
+
# Global configuration instance
|
|
75
|
+
config: Config = Config()
|
webscout/Local/llm.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM interface for webscout.Local using llama-cpp-python
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Any, List, Optional, Union, Generator, Callable
|
|
6
|
+
|
|
7
|
+
from llama_cpp import Llama
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from .config import config
|
|
11
|
+
from .model_manager import ModelManager
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
class LLMInterface:
|
|
16
|
+
"""
|
|
17
|
+
Interface for LLM models using llama-cpp-python.
|
|
18
|
+
Provides methods for loading models and generating completions or chat responses.
|
|
19
|
+
"""
|
|
20
|
+
model_name: str
|
|
21
|
+
model_manager: ModelManager
|
|
22
|
+
model_path: Optional[str]
|
|
23
|
+
llm: Optional[Llama]
|
|
24
|
+
|
|
25
|
+
def __init__(self, model_name: str) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Initialize the LLM interface.
|
|
28
|
+
Args:
|
|
29
|
+
model_name (str): Name of the model to load.
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: If the model is not found locally.
|
|
32
|
+
"""
|
|
33
|
+
self.model_name = model_name
|
|
34
|
+
self.model_manager = ModelManager()
|
|
35
|
+
self.model_path = self.model_manager.get_model_path(model_name)
|
|
36
|
+
if not self.model_path:
|
|
37
|
+
raise ValueError(f"Model {model_name} not found. Please download it first.")
|
|
38
|
+
self.llm = None
|
|
39
|
+
|
|
40
|
+
def load_model(self, n_gpu_layers: Optional[int] = None, n_ctx: Optional[int] = None, verbose: bool = False) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Load the model into memory.
|
|
43
|
+
Args:
|
|
44
|
+
n_gpu_layers (Optional[int]): Number of layers to offload to GPU (-1 for all).
|
|
45
|
+
n_ctx (Optional[int]): Context size.
|
|
46
|
+
verbose (bool): Whether to show verbose output.
|
|
47
|
+
Raises:
|
|
48
|
+
ValueError: If model loading fails.
|
|
49
|
+
"""
|
|
50
|
+
if n_gpu_layers is None:
|
|
51
|
+
n_gpu_layers = config.get("default_gpu_layers", -1)
|
|
52
|
+
if n_ctx is None:
|
|
53
|
+
n_ctx = config.get("default_context_length", 4096)
|
|
54
|
+
console.print(f"[bold blue]Loading model {self.model_name}...[/bold blue]")
|
|
55
|
+
try:
|
|
56
|
+
self.llm = Llama(
|
|
57
|
+
model_path=self.model_path,
|
|
58
|
+
n_gpu_layers=n_gpu_layers,
|
|
59
|
+
n_ctx=n_ctx,
|
|
60
|
+
verbose=verbose
|
|
61
|
+
)
|
|
62
|
+
console.print(f"[bold green]Model {self.model_name} loaded successfully[/bold green]")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
raise ValueError(f"Failed to load model from file: {self.model_path}\n{str(e)}")
|
|
65
|
+
|
|
66
|
+
def create_completion(
|
|
67
|
+
self,
|
|
68
|
+
prompt: str,
|
|
69
|
+
max_tokens: int = 256,
|
|
70
|
+
temperature: float = 0.7,
|
|
71
|
+
top_p: float = 0.95,
|
|
72
|
+
stream: bool = False,
|
|
73
|
+
stop: Optional[List[str]] = None,
|
|
74
|
+
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
75
|
+
"""
|
|
76
|
+
Create a completion for the given prompt.
|
|
77
|
+
Args:
|
|
78
|
+
prompt (str): The prompt to complete.
|
|
79
|
+
max_tokens (int): Maximum number of tokens to generate.
|
|
80
|
+
temperature (float): Sampling temperature.
|
|
81
|
+
top_p (float): Top-p sampling.
|
|
82
|
+
stream (bool): Whether to stream the response.
|
|
83
|
+
stop (Optional[List[str]]): List of strings to stop generation when encountered.
|
|
84
|
+
Returns:
|
|
85
|
+
Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]: Completion result or generator for streaming.
|
|
86
|
+
"""
|
|
87
|
+
if self.llm is None:
|
|
88
|
+
self.load_model()
|
|
89
|
+
if stream:
|
|
90
|
+
return self.llm.create_completion(
|
|
91
|
+
prompt=prompt,
|
|
92
|
+
max_tokens=max_tokens,
|
|
93
|
+
temperature=temperature,
|
|
94
|
+
top_p=top_p,
|
|
95
|
+
stream=True,
|
|
96
|
+
stop=stop or [],
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
return self.llm.create_completion(
|
|
100
|
+
prompt=prompt,
|
|
101
|
+
max_tokens=max_tokens,
|
|
102
|
+
temperature=temperature,
|
|
103
|
+
top_p=top_p,
|
|
104
|
+
stream=False,
|
|
105
|
+
stop=stop or [],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def create_chat_completion(
|
|
109
|
+
self,
|
|
110
|
+
messages: List[Dict[str, str]],
|
|
111
|
+
max_tokens: int = 256,
|
|
112
|
+
temperature: float = 0.7,
|
|
113
|
+
top_p: float = 0.95,
|
|
114
|
+
stream: bool = False,
|
|
115
|
+
stop: Optional[List[str]] = None,
|
|
116
|
+
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
117
|
+
"""
|
|
118
|
+
Create a chat completion for the given messages.
|
|
119
|
+
Args:
|
|
120
|
+
messages (List[Dict[str, str]]): List of chat messages.
|
|
121
|
+
max_tokens (int): Maximum number of tokens to generate.
|
|
122
|
+
temperature (float): Sampling temperature.
|
|
123
|
+
top_p (float): Top-p sampling.
|
|
124
|
+
stream (bool): Whether to stream the response.
|
|
125
|
+
stop (Optional[List[str]]): List of strings to stop generation when encountered.
|
|
126
|
+
Returns:
|
|
127
|
+
Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]: Chat completion result or generator for streaming.
|
|
128
|
+
"""
|
|
129
|
+
if self.llm is None:
|
|
130
|
+
self.load_model()
|
|
131
|
+
processed_messages: List[Dict[str, str]] = messages.copy()
|
|
132
|
+
system_messages = [m for m in processed_messages if m.get("role") == "system"]
|
|
133
|
+
non_system_messages = [m for m in processed_messages if m.get("role") != "system"]
|
|
134
|
+
if system_messages:
|
|
135
|
+
processed_messages = [system_messages[0]] + non_system_messages
|
|
136
|
+
else:
|
|
137
|
+
processed_messages = non_system_messages
|
|
138
|
+
if stream:
|
|
139
|
+
return self.llm.create_chat_completion(
|
|
140
|
+
messages=processed_messages,
|
|
141
|
+
max_tokens=max_tokens,
|
|
142
|
+
temperature=temperature,
|
|
143
|
+
top_p=top_p,
|
|
144
|
+
stream=True,
|
|
145
|
+
stop=stop or [],
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
return self.llm.create_chat_completion(
|
|
149
|
+
messages=processed_messages,
|
|
150
|
+
max_tokens=max_tokens,
|
|
151
|
+
temperature=temperature,
|
|
152
|
+
top_p=top_p,
|
|
153
|
+
stream=False,
|
|
154
|
+
stop=stop or [],
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def stream_chat_completion(
|
|
158
|
+
self,
|
|
159
|
+
messages: List[Dict[str, str]],
|
|
160
|
+
callback: Callable[[str], None],
|
|
161
|
+
max_tokens: int = 256,
|
|
162
|
+
temperature: float = 0.7,
|
|
163
|
+
top_p: float = 0.95,
|
|
164
|
+
stop: Optional[List[str]] = None,
|
|
165
|
+
) -> None:
|
|
166
|
+
"""
|
|
167
|
+
Stream a chat completion with a callback for each token.
|
|
168
|
+
Args:
|
|
169
|
+
messages (List[Dict[str, str]]): List of chat messages.
|
|
170
|
+
callback (Callable[[str], None]): Function to call with each token.
|
|
171
|
+
max_tokens (int): Maximum number of tokens to generate.
|
|
172
|
+
temperature (float): Sampling temperature.
|
|
173
|
+
top_p (float): Top-p sampling.
|
|
174
|
+
stop (Optional[List[str]]): List of strings to stop generation when encountered.
|
|
175
|
+
"""
|
|
176
|
+
stream = self.create_chat_completion(
|
|
177
|
+
messages=messages,
|
|
178
|
+
max_tokens=max_tokens,
|
|
179
|
+
temperature=temperature,
|
|
180
|
+
top_p=top_p,
|
|
181
|
+
stream=True,
|
|
182
|
+
stop=stop,
|
|
183
|
+
)
|
|
184
|
+
for chunk in stream:
|
|
185
|
+
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
186
|
+
if "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]:
|
|
187
|
+
content = chunk["choices"][0]["delta"]["content"]
|
|
188
|
+
callback(content)
|