webscout 8.2.1__py3-none-any.whl → 8.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/Bard.py +5 -0
- webscout/Extra/tempmail/__init__.py +2 -0
- webscout/Extra/tempmail/base.py +6 -1
- webscout/Extra/tempmail/emailnator.py +84 -0
- webscout/Local/__init__.py +8 -2
- webscout/Local/cli.py +178 -0
- webscout/Local/llm.py +104 -5
- webscout/Local/model_manager.py +48 -0
- webscout/Local/server.py +547 -13
- webscout/Provider/Gemini.py +2 -0
- webscout/Provider/OPENAI/e2b.py +159 -1
- webscout/version.py +1 -1
- {webscout-8.2.1.dist-info → webscout-8.2.2.dist-info}/METADATA +1 -1
- {webscout-8.2.1.dist-info → webscout-8.2.2.dist-info}/RECORD +18 -17
- {webscout-8.2.1.dist-info → webscout-8.2.2.dist-info}/LICENSE.md +0 -0
- {webscout-8.2.1.dist-info → webscout-8.2.2.dist-info}/WHEEL +0 -0
- {webscout-8.2.1.dist-info → webscout-8.2.2.dist-info}/entry_points.txt +0 -0
- {webscout-8.2.1.dist-info → webscout-8.2.2.dist-info}/top_level.txt +0 -0
webscout/Local/server.py
CHANGED
|
@@ -4,12 +4,15 @@ API server with OpenAI compatibility
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import time
|
|
7
|
-
|
|
7
|
+
import os
|
|
8
|
+
import datetime
|
|
9
|
+
from typing import Dict, Any, List, Optional, AsyncGenerator, Union
|
|
8
10
|
|
|
9
11
|
import uvicorn
|
|
10
|
-
from fastapi import FastAPI, HTTPException
|
|
12
|
+
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
11
13
|
from fastapi.responses import StreamingResponse
|
|
12
|
-
from
|
|
14
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
13
16
|
|
|
14
17
|
from .config import config
|
|
15
18
|
from .model_manager import ModelManager
|
|
@@ -17,16 +20,55 @@ from .llm import LLMInterface
|
|
|
17
20
|
|
|
18
21
|
app = FastAPI(title="webscout.local API", description="OpenAI-compatible API for webscout.local")
|
|
19
22
|
|
|
23
|
+
# Add CORS middleware to allow cross-origin requests
|
|
24
|
+
app.add_middleware(
|
|
25
|
+
CORSMiddleware,
|
|
26
|
+
allow_origins=["*"], # Allow all origins
|
|
27
|
+
allow_credentials=True,
|
|
28
|
+
allow_methods=["*"], # Allow all methods
|
|
29
|
+
allow_headers=["*"], # Allow all headers
|
|
30
|
+
)
|
|
31
|
+
|
|
20
32
|
# Models
|
|
21
33
|
model_manager: ModelManager = ModelManager()
|
|
22
34
|
loaded_models: Dict[str, LLMInterface] = {} # Cache for loaded models
|
|
23
35
|
|
|
36
|
+
class Image(BaseModel):
|
|
37
|
+
"""
|
|
38
|
+
Represents an image for multimodal models.
|
|
39
|
+
"""
|
|
40
|
+
url: Optional[str] = None
|
|
41
|
+
data: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
class ToolFunction(BaseModel):
|
|
44
|
+
"""
|
|
45
|
+
Represents a function for function calling.
|
|
46
|
+
"""
|
|
47
|
+
name: str
|
|
48
|
+
description: Optional[str] = None
|
|
49
|
+
parameters: Dict[str, Any] = Field(default_factory=dict)
|
|
50
|
+
|
|
51
|
+
class Tool(BaseModel):
|
|
52
|
+
"""
|
|
53
|
+
Represents a tool for function calling.
|
|
54
|
+
"""
|
|
55
|
+
type: str = "function"
|
|
56
|
+
function: ToolFunction
|
|
57
|
+
|
|
58
|
+
class ToolCall(BaseModel):
|
|
59
|
+
"""
|
|
60
|
+
Represents a tool call from the model.
|
|
61
|
+
"""
|
|
62
|
+
function: Dict[str, Any]
|
|
63
|
+
|
|
24
64
|
class ChatMessage(BaseModel):
|
|
25
65
|
"""
|
|
26
66
|
Represents a single chat message for the chat completion endpoint.
|
|
27
67
|
"""
|
|
28
68
|
role: str
|
|
29
|
-
content: str
|
|
69
|
+
content: Union[str, List[Dict[str, Any]], None] = ""
|
|
70
|
+
images: Optional[List[str]] = None
|
|
71
|
+
tool_calls: Optional[List[ToolCall]] = None
|
|
30
72
|
|
|
31
73
|
class ChatCompletionRequest(BaseModel):
|
|
32
74
|
"""
|
|
@@ -39,6 +81,10 @@ class ChatCompletionRequest(BaseModel):
|
|
|
39
81
|
max_tokens: int = 256
|
|
40
82
|
stream: bool = False
|
|
41
83
|
stop: Optional[List[str]] = None
|
|
84
|
+
tools: Optional[List[Tool]] = None
|
|
85
|
+
format: Optional[Union[str, Dict[str, Any]]] = None
|
|
86
|
+
options: Optional[Dict[str, Any]] = None
|
|
87
|
+
keep_alive: Optional[str] = "5m"
|
|
42
88
|
|
|
43
89
|
class CompletionRequest(BaseModel):
|
|
44
90
|
"""
|
|
@@ -46,11 +92,20 @@ class CompletionRequest(BaseModel):
|
|
|
46
92
|
"""
|
|
47
93
|
model: str
|
|
48
94
|
prompt: str
|
|
95
|
+
suffix: Optional[str] = None
|
|
49
96
|
temperature: float = 0.7
|
|
50
97
|
top_p: float = 0.95
|
|
51
98
|
max_tokens: int = 256
|
|
52
99
|
stream: bool = False
|
|
53
100
|
stop: Optional[List[str]] = None
|
|
101
|
+
images: Optional[List[str]] = None
|
|
102
|
+
format: Optional[Union[str, Dict[str, Any]]] = None
|
|
103
|
+
options: Optional[Dict[str, Any]] = None
|
|
104
|
+
system: Optional[str] = None
|
|
105
|
+
template: Optional[str] = None
|
|
106
|
+
context: Optional[List[int]] = None
|
|
107
|
+
raw: bool = False
|
|
108
|
+
keep_alive: Optional[str] = "5m"
|
|
54
109
|
|
|
55
110
|
class ModelInfo(BaseModel):
|
|
56
111
|
"""
|
|
@@ -68,11 +123,127 @@ class ModelList(BaseModel):
|
|
|
68
123
|
object: str = "list"
|
|
69
124
|
data: List[ModelInfo]
|
|
70
125
|
|
|
71
|
-
|
|
126
|
+
class EmbeddingRequest(BaseModel):
|
|
127
|
+
"""
|
|
128
|
+
Request model for embeddings.
|
|
129
|
+
"""
|
|
130
|
+
model: str
|
|
131
|
+
input: Union[str, List[str]]
|
|
132
|
+
truncate: bool = True
|
|
133
|
+
options: Optional[Dict[str, Any]] = None
|
|
134
|
+
keep_alive: Optional[str] = "5m"
|
|
135
|
+
|
|
136
|
+
class CopyRequest(BaseModel):
|
|
137
|
+
"""
|
|
138
|
+
Request model for copying a model.
|
|
139
|
+
"""
|
|
140
|
+
source: str
|
|
141
|
+
destination: str
|
|
142
|
+
|
|
143
|
+
class DeleteRequest(BaseModel):
|
|
144
|
+
"""
|
|
145
|
+
Request model for deleting a model.
|
|
146
|
+
"""
|
|
147
|
+
model: str
|
|
148
|
+
|
|
149
|
+
class ShowRequest(BaseModel):
|
|
150
|
+
"""
|
|
151
|
+
Request model for showing model information.
|
|
152
|
+
"""
|
|
153
|
+
model: str
|
|
154
|
+
verbose: bool = False
|
|
155
|
+
|
|
156
|
+
class PullRequest(BaseModel):
|
|
157
|
+
"""
|
|
158
|
+
Request model for pulling a model.
|
|
159
|
+
"""
|
|
160
|
+
model: str
|
|
161
|
+
insecure: bool = False
|
|
162
|
+
stream: bool = True
|
|
163
|
+
|
|
164
|
+
class ModelDetails(BaseModel):
|
|
165
|
+
"""
|
|
166
|
+
Detailed model information.
|
|
167
|
+
"""
|
|
168
|
+
format: str = "gguf"
|
|
169
|
+
family: str = "llama"
|
|
170
|
+
families: Optional[List[str]] = None
|
|
171
|
+
parameter_size: Optional[str] = None
|
|
172
|
+
quantization_level: Optional[str] = None
|
|
173
|
+
|
|
174
|
+
class ModelResponse(BaseModel):
|
|
175
|
+
"""
|
|
176
|
+
Response model for model information.
|
|
177
|
+
"""
|
|
178
|
+
name: str
|
|
179
|
+
modified_at: str
|
|
180
|
+
size: int
|
|
181
|
+
digest: Optional[str] = None
|
|
182
|
+
details: Optional[ModelDetails] = None
|
|
183
|
+
|
|
184
|
+
class ModelsResponse(BaseModel):
|
|
185
|
+
"""
|
|
186
|
+
Response model for listing models.
|
|
187
|
+
"""
|
|
188
|
+
models: List[ModelResponse]
|
|
189
|
+
|
|
190
|
+
class RunningModel(BaseModel):
|
|
191
|
+
"""
|
|
192
|
+
Information about a running model.
|
|
193
|
+
"""
|
|
194
|
+
name: str
|
|
195
|
+
model: str
|
|
196
|
+
size: int
|
|
197
|
+
digest: Optional[str] = None
|
|
198
|
+
details: Optional[ModelDetails] = None
|
|
199
|
+
expires_at: Optional[str] = None
|
|
200
|
+
size_vram: Optional[int] = None
|
|
201
|
+
|
|
202
|
+
class RunningModelsResponse(BaseModel):
|
|
203
|
+
"""
|
|
204
|
+
Response model for listing running models.
|
|
205
|
+
"""
|
|
206
|
+
models: List[RunningModel]
|
|
207
|
+
|
|
208
|
+
class VersionResponse(BaseModel):
|
|
209
|
+
"""
|
|
210
|
+
Response model for version information.
|
|
211
|
+
"""
|
|
212
|
+
version: str
|
|
213
|
+
|
|
214
|
+
def parse_keep_alive(keep_alive: Optional[str]) -> int:
|
|
215
|
+
"""
|
|
216
|
+
Parse the keep_alive parameter to seconds.
|
|
217
|
+
Args:
|
|
218
|
+
keep_alive (Optional[str]): Keep alive duration string.
|
|
219
|
+
Returns:
|
|
220
|
+
int: Keep alive duration in seconds.
|
|
221
|
+
"""
|
|
222
|
+
if not keep_alive:
|
|
223
|
+
return 300 # Default 5 minutes
|
|
224
|
+
|
|
225
|
+
if keep_alive.endswith("ms"):
|
|
226
|
+
return int(keep_alive[:-2]) // 1000
|
|
227
|
+
elif keep_alive.endswith("s"):
|
|
228
|
+
return int(keep_alive[:-1])
|
|
229
|
+
elif keep_alive.endswith("m"):
|
|
230
|
+
return int(keep_alive[:-1]) * 60
|
|
231
|
+
elif keep_alive.endswith("h"):
|
|
232
|
+
return int(keep_alive[:-1]) * 3600
|
|
233
|
+
elif keep_alive == "0":
|
|
234
|
+
return 0
|
|
235
|
+
else:
|
|
236
|
+
try:
|
|
237
|
+
return int(keep_alive)
|
|
238
|
+
except ValueError:
|
|
239
|
+
return 300 # Default 5 minutes
|
|
240
|
+
|
|
241
|
+
def get_model(model_name: str, options: Optional[Dict[str, Any]] = None) -> LLMInterface:
|
|
72
242
|
"""
|
|
73
243
|
Get or load a model by name, using a cache for efficiency.
|
|
74
244
|
Args:
|
|
75
245
|
model_name (str): Name of the model to load.
|
|
246
|
+
options (Optional[Dict[str, Any]]): Additional options for loading the model.
|
|
76
247
|
Returns:
|
|
77
248
|
LLMInterface: Loaded model interface.
|
|
78
249
|
Raises:
|
|
@@ -81,11 +252,33 @@ def get_model(model_name: str) -> LLMInterface:
|
|
|
81
252
|
if model_name not in loaded_models:
|
|
82
253
|
try:
|
|
83
254
|
loaded_models[model_name] = LLMInterface(model_name)
|
|
84
|
-
|
|
255
|
+
|
|
256
|
+
# Extract options if provided
|
|
257
|
+
n_gpu_layers = options.get("n_gpu_layers", None) if options else None
|
|
258
|
+
n_ctx = options.get("num_ctx", None) if options else None
|
|
259
|
+
verbose = options.get("verbose", False) if options else False
|
|
260
|
+
|
|
261
|
+
loaded_models[model_name].load_model(
|
|
262
|
+
n_gpu_layers=n_gpu_layers,
|
|
263
|
+
n_ctx=n_ctx,
|
|
264
|
+
verbose=verbose
|
|
265
|
+
)
|
|
85
266
|
except Exception as e:
|
|
86
267
|
raise HTTPException(status_code=404, detail=f"Model {model_name} not found: {str(e)}")
|
|
87
268
|
return loaded_models[model_name]
|
|
88
269
|
|
|
270
|
+
def unload_model(model_name: str) -> None:
|
|
271
|
+
"""
|
|
272
|
+
Unload a model from memory.
|
|
273
|
+
Args:
|
|
274
|
+
model_name (str): Name of the model to unload.
|
|
275
|
+
"""
|
|
276
|
+
if model_name in loaded_models:
|
|
277
|
+
# Free the model resources
|
|
278
|
+
loaded_models[model_name].llm = None
|
|
279
|
+
# Remove from loaded models cache
|
|
280
|
+
del loaded_models[model_name]
|
|
281
|
+
|
|
89
282
|
@app.get("/v1/models", response_model=ModelList)
|
|
90
283
|
async def list_models() -> ModelList:
|
|
91
284
|
"""
|
|
@@ -105,51 +298,135 @@ async def list_models() -> ModelList:
|
|
|
105
298
|
return ModelList(object="list", data=model_list)
|
|
106
299
|
|
|
107
300
|
@app.post("/v1/chat/completions")
|
|
108
|
-
async def create_chat_completion(request: ChatCompletionRequest) -> Any:
|
|
301
|
+
async def create_chat_completion(request: ChatCompletionRequest, background_tasks: BackgroundTasks) -> Any:
|
|
109
302
|
"""
|
|
110
303
|
Create a chat completion.
|
|
111
304
|
Args:
|
|
112
305
|
request (ChatCompletionRequest): Chat completion request.
|
|
306
|
+
background_tasks (BackgroundTasks): Background tasks for model unloading.
|
|
113
307
|
Returns:
|
|
114
308
|
StreamingResponse or dict: Streaming or regular response.
|
|
115
309
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
310
|
+
# Parse keep_alive parameter
|
|
311
|
+
keep_alive_seconds = parse_keep_alive(request.keep_alive)
|
|
312
|
+
|
|
313
|
+
# If messages is empty, just load the model and return
|
|
314
|
+
if not request.messages:
|
|
315
|
+
model = get_model(request.model, request.options)
|
|
316
|
+
|
|
317
|
+
# Schedule unloading if keep_alive is 0
|
|
318
|
+
if keep_alive_seconds == 0:
|
|
319
|
+
background_tasks.add_task(unload_model, request.model)
|
|
320
|
+
done_reason = "unload"
|
|
321
|
+
else:
|
|
322
|
+
done_reason = "load"
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
"model": request.model,
|
|
326
|
+
"created_at": datetime.datetime.now().isoformat(),
|
|
327
|
+
"message": {
|
|
328
|
+
"role": "assistant",
|
|
329
|
+
"content": ""
|
|
330
|
+
},
|
|
331
|
+
"done_reason": done_reason,
|
|
332
|
+
"done": True
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
# Process messages with images if present
|
|
336
|
+
processed_messages = []
|
|
337
|
+
for msg in request.messages:
|
|
338
|
+
message_dict = {"role": msg.role, "content": msg.content}
|
|
339
|
+
|
|
340
|
+
# Handle images for multimodal models
|
|
341
|
+
if msg.images:
|
|
342
|
+
message_dict["images"] = msg.images
|
|
343
|
+
|
|
344
|
+
# Handle tool calls
|
|
345
|
+
if msg.tool_calls:
|
|
346
|
+
message_dict["tool_calls"] = [tc.model_dump() for tc in msg.tool_calls]
|
|
347
|
+
|
|
348
|
+
processed_messages.append(message_dict)
|
|
349
|
+
|
|
350
|
+
# Get or load the model
|
|
351
|
+
model = get_model(request.model, request.options)
|
|
352
|
+
|
|
353
|
+
# Handle streaming response
|
|
118
354
|
if request.stream:
|
|
119
355
|
async def generate() -> AsyncGenerator[str, None]:
|
|
120
356
|
stream = model.create_chat_completion(
|
|
121
|
-
messages=
|
|
357
|
+
messages=processed_messages,
|
|
122
358
|
max_tokens=request.max_tokens,
|
|
123
359
|
temperature=request.temperature,
|
|
124
360
|
top_p=request.top_p,
|
|
125
361
|
stream=True,
|
|
126
362
|
stop=request.stop,
|
|
363
|
+
tools=request.tools,
|
|
364
|
+
format=request.format,
|
|
127
365
|
)
|
|
128
366
|
for chunk in stream:
|
|
129
367
|
yield f"data: {json.dumps(chunk)}\n\n"
|
|
130
368
|
yield "data: [DONE]\n\n"
|
|
369
|
+
|
|
370
|
+
# Schedule unloading if keep_alive is 0
|
|
371
|
+
if keep_alive_seconds == 0:
|
|
372
|
+
background_tasks.add_task(unload_model, request.model)
|
|
373
|
+
|
|
131
374
|
return StreamingResponse(generate(), media_type="text/event-stream")
|
|
132
375
|
else:
|
|
376
|
+
# Non-streaming response
|
|
133
377
|
response = model.create_chat_completion(
|
|
134
|
-
messages=
|
|
378
|
+
messages=processed_messages,
|
|
135
379
|
max_tokens=request.max_tokens,
|
|
136
380
|
temperature=request.temperature,
|
|
137
381
|
top_p=request.top_p,
|
|
138
382
|
stream=False,
|
|
139
383
|
stop=request.stop,
|
|
384
|
+
tools=request.tools,
|
|
385
|
+
format=request.format,
|
|
140
386
|
)
|
|
387
|
+
|
|
388
|
+
# Schedule unloading if keep_alive is 0
|
|
389
|
+
if keep_alive_seconds == 0:
|
|
390
|
+
background_tasks.add_task(unload_model, request.model)
|
|
391
|
+
|
|
141
392
|
return response
|
|
142
393
|
|
|
143
394
|
@app.post("/v1/completions")
|
|
144
|
-
async def create_completion(request: CompletionRequest) -> Any:
|
|
395
|
+
async def create_completion(request: CompletionRequest, background_tasks: BackgroundTasks) -> Any:
|
|
145
396
|
"""
|
|
146
397
|
Create a text completion.
|
|
147
398
|
Args:
|
|
148
399
|
request (CompletionRequest): Completion request.
|
|
400
|
+
background_tasks (BackgroundTasks): Background tasks for model unloading.
|
|
149
401
|
Returns:
|
|
150
402
|
StreamingResponse or dict: Streaming or regular response.
|
|
151
403
|
"""
|
|
152
|
-
|
|
404
|
+
# Parse keep_alive parameter
|
|
405
|
+
keep_alive_seconds = parse_keep_alive(request.keep_alive)
|
|
406
|
+
|
|
407
|
+
# If prompt is empty, just load the model and return
|
|
408
|
+
if not request.prompt:
|
|
409
|
+
model = get_model(request.model, request.options)
|
|
410
|
+
|
|
411
|
+
# Schedule unloading if keep_alive is 0
|
|
412
|
+
if keep_alive_seconds == 0:
|
|
413
|
+
background_tasks.add_task(unload_model, request.model)
|
|
414
|
+
done_reason = "unload"
|
|
415
|
+
else:
|
|
416
|
+
done_reason = "load"
|
|
417
|
+
|
|
418
|
+
return {
|
|
419
|
+
"model": request.model,
|
|
420
|
+
"created_at": datetime.datetime.now().isoformat(),
|
|
421
|
+
"response": "",
|
|
422
|
+
"done": True,
|
|
423
|
+
"done_reason": done_reason
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
# Get or load the model
|
|
427
|
+
model = get_model(request.model, request.options)
|
|
428
|
+
|
|
429
|
+
# Handle streaming response
|
|
153
430
|
if request.stream:
|
|
154
431
|
async def generate() -> AsyncGenerator[str, None]:
|
|
155
432
|
stream = model.create_completion(
|
|
@@ -159,12 +436,25 @@ async def create_completion(request: CompletionRequest) -> Any:
|
|
|
159
436
|
top_p=request.top_p,
|
|
160
437
|
stream=True,
|
|
161
438
|
stop=request.stop,
|
|
439
|
+
suffix=request.suffix,
|
|
440
|
+
images=request.images,
|
|
441
|
+
system=request.system,
|
|
442
|
+
template=request.template,
|
|
443
|
+
context=request.context,
|
|
444
|
+
raw=request.raw,
|
|
445
|
+
format=request.format,
|
|
162
446
|
)
|
|
163
447
|
for chunk in stream:
|
|
164
448
|
yield f"data: {json.dumps(chunk)}\n\n"
|
|
165
449
|
yield "data: [DONE]\n\n"
|
|
450
|
+
|
|
451
|
+
# Schedule unloading if keep_alive is 0
|
|
452
|
+
if keep_alive_seconds == 0:
|
|
453
|
+
background_tasks.add_task(unload_model, request.model)
|
|
454
|
+
|
|
166
455
|
return StreamingResponse(generate(), media_type="text/event-stream")
|
|
167
456
|
else:
|
|
457
|
+
# Non-streaming response
|
|
168
458
|
response = model.create_completion(
|
|
169
459
|
prompt=request.prompt,
|
|
170
460
|
max_tokens=request.max_tokens,
|
|
@@ -172,9 +462,253 @@ async def create_completion(request: CompletionRequest) -> Any:
|
|
|
172
462
|
top_p=request.top_p,
|
|
173
463
|
stream=False,
|
|
174
464
|
stop=request.stop,
|
|
465
|
+
suffix=request.suffix,
|
|
466
|
+
images=request.images,
|
|
467
|
+
system=request.system,
|
|
468
|
+
template=request.template,
|
|
469
|
+
context=request.context,
|
|
470
|
+
raw=request.raw,
|
|
471
|
+
format=request.format,
|
|
175
472
|
)
|
|
473
|
+
|
|
474
|
+
# Schedule unloading if keep_alive is 0
|
|
475
|
+
if keep_alive_seconds == 0:
|
|
476
|
+
background_tasks.add_task(unload_model, request.model)
|
|
477
|
+
|
|
176
478
|
return response
|
|
177
479
|
|
|
480
|
+
@app.post("/api/embed")
|
|
481
|
+
async def create_embeddings(request: EmbeddingRequest, background_tasks: BackgroundTasks) -> Any:
|
|
482
|
+
"""
|
|
483
|
+
Generate embeddings from a model.
|
|
484
|
+
Args:
|
|
485
|
+
request (EmbeddingRequest): Embedding request.
|
|
486
|
+
background_tasks (BackgroundTasks): Background tasks for model unloading.
|
|
487
|
+
Returns:
|
|
488
|
+
dict: Embedding response.
|
|
489
|
+
"""
|
|
490
|
+
# Parse keep_alive parameter
|
|
491
|
+
keep_alive_seconds = parse_keep_alive(request.keep_alive)
|
|
492
|
+
|
|
493
|
+
# Get or load the model
|
|
494
|
+
model = get_model(request.model, request.options)
|
|
495
|
+
|
|
496
|
+
try:
|
|
497
|
+
# Generate embeddings
|
|
498
|
+
embeddings = model.create_embeddings(
|
|
499
|
+
input=request.input,
|
|
500
|
+
truncate=request.truncate
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# Schedule unloading if keep_alive is 0
|
|
504
|
+
if keep_alive_seconds == 0:
|
|
505
|
+
background_tasks.add_task(unload_model, request.model)
|
|
506
|
+
|
|
507
|
+
return embeddings
|
|
508
|
+
except Exception as e:
|
|
509
|
+
raise HTTPException(status_code=500, detail=f"Failed to generate embeddings: {str(e)}")
|
|
510
|
+
|
|
511
|
+
@app.post("/api/tags")
|
|
512
|
+
async def list_all_models() -> ModelsResponse:
|
|
513
|
+
"""
|
|
514
|
+
List all available models.
|
|
515
|
+
Returns:
|
|
516
|
+
ModelsResponse: List of available models.
|
|
517
|
+
"""
|
|
518
|
+
models = model_manager.list_models()
|
|
519
|
+
response_models = []
|
|
520
|
+
|
|
521
|
+
for model in models:
|
|
522
|
+
# Extract model details
|
|
523
|
+
details = ModelDetails(
|
|
524
|
+
format="gguf",
|
|
525
|
+
family="llama", # Default, could be improved with model metadata
|
|
526
|
+
parameter_size=model.get("parameter_size", "Unknown"),
|
|
527
|
+
quantization_level=model.get("quantization_level", "Unknown")
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
# Create model response
|
|
531
|
+
response_models.append(
|
|
532
|
+
ModelResponse(
|
|
533
|
+
name=model["name"],
|
|
534
|
+
modified_at=model.get("downloaded_at", datetime.datetime.now().isoformat()),
|
|
535
|
+
size=os.path.getsize(model["path"]) if "path" in model else 0,
|
|
536
|
+
digest=None, # Could be improved with actual digest calculation
|
|
537
|
+
details=details
|
|
538
|
+
)
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
return ModelsResponse(models=response_models)
|
|
542
|
+
|
|
543
|
+
@app.get("/api/ps")
|
|
544
|
+
async def list_running_models() -> RunningModelsResponse:
|
|
545
|
+
"""
|
|
546
|
+
List all running models.
|
|
547
|
+
Returns:
|
|
548
|
+
RunningModelsResponse: List of running models.
|
|
549
|
+
"""
|
|
550
|
+
running_models = []
|
|
551
|
+
|
|
552
|
+
for name, _ in loaded_models.items():
|
|
553
|
+
# Get model info
|
|
554
|
+
model_info = model_manager.get_model_info(name)
|
|
555
|
+
|
|
556
|
+
if model_info:
|
|
557
|
+
# Extract model details
|
|
558
|
+
details = ModelDetails(
|
|
559
|
+
format="gguf",
|
|
560
|
+
family="llama", # Default, could be improved with model metadata
|
|
561
|
+
parameter_size=model_info.get("parameter_size", "Unknown"),
|
|
562
|
+
quantization_level=model_info.get("quantization_level", "Unknown")
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Create running model response
|
|
566
|
+
running_models.append(
|
|
567
|
+
RunningModel(
|
|
568
|
+
name=name,
|
|
569
|
+
model=name,
|
|
570
|
+
size=os.path.getsize(model_info["path"]) if "path" in model_info else 0,
|
|
571
|
+
digest=None, # Could be improved with actual digest calculation
|
|
572
|
+
details=details,
|
|
573
|
+
expires_at=None, # Could be improved with actual expiration time
|
|
574
|
+
size_vram=None # Could be improved with actual VRAM usage
|
|
575
|
+
)
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
return RunningModelsResponse(models=running_models)
|
|
579
|
+
|
|
580
|
+
@app.post("/api/show")
|
|
581
|
+
async def show_model(request: ShowRequest) -> Dict[str, Any]:
|
|
582
|
+
"""
|
|
583
|
+
Show model information.
|
|
584
|
+
Args:
|
|
585
|
+
request (ShowRequest): Show request.
|
|
586
|
+
Returns:
|
|
587
|
+
Dict[str, Any]: Model information.
|
|
588
|
+
"""
|
|
589
|
+
model_info = model_manager.get_model_info(request.model)
|
|
590
|
+
|
|
591
|
+
if not model_info:
|
|
592
|
+
raise HTTPException(status_code=404, detail=f"Model {request.model} not found")
|
|
593
|
+
|
|
594
|
+
# Extract model details
|
|
595
|
+
details = {
|
|
596
|
+
"parent_model": "",
|
|
597
|
+
"format": "gguf",
|
|
598
|
+
"family": "llama",
|
|
599
|
+
"families": ["llama"],
|
|
600
|
+
"parameter_size": model_info.get("parameter_size", "Unknown"),
|
|
601
|
+
"quantization_level": model_info.get("quantization_level", "Unknown")
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
# Get model capabilities
|
|
605
|
+
capabilities = ["completion"]
|
|
606
|
+
|
|
607
|
+
# Add vision capability if model supports it
|
|
608
|
+
if "vision" in model_info.get("capabilities", []):
|
|
609
|
+
capabilities.append("vision")
|
|
610
|
+
|
|
611
|
+
# Create response
|
|
612
|
+
response = {
|
|
613
|
+
"modelfile": model_info.get("modelfile", ""),
|
|
614
|
+
"parameters": model_info.get("parameters", ""),
|
|
615
|
+
"template": model_info.get("template", ""),
|
|
616
|
+
"details": details,
|
|
617
|
+
"capabilities": capabilities
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
# Add model_info if verbose is requested
|
|
621
|
+
if request.verbose:
|
|
622
|
+
response["model_info"] = model_info.get("model_info", {})
|
|
623
|
+
|
|
624
|
+
return response
|
|
625
|
+
|
|
626
|
+
@app.post("/api/copy")
|
|
627
|
+
async def copy_model(request: CopyRequest) -> Dict[str, str]:
|
|
628
|
+
"""
|
|
629
|
+
Copy a model.
|
|
630
|
+
Args:
|
|
631
|
+
request (CopyRequest): Copy request.
|
|
632
|
+
Returns:
|
|
633
|
+
Dict[str, str]: Success message.
|
|
634
|
+
"""
|
|
635
|
+
try:
|
|
636
|
+
# Check if source model exists
|
|
637
|
+
if not model_manager.get_model_info(request.source):
|
|
638
|
+
raise HTTPException(status_code=404, detail=f"Source model {request.source} not found")
|
|
639
|
+
|
|
640
|
+
# Copy the model
|
|
641
|
+
model_manager.copy_model(request.source, request.destination)
|
|
642
|
+
|
|
643
|
+
return {"status": "success"}
|
|
644
|
+
except Exception as e:
|
|
645
|
+
raise HTTPException(status_code=500, detail=f"Failed to copy model: {str(e)}")
|
|
646
|
+
|
|
647
|
+
@app.delete("/api/delete")
|
|
648
|
+
async def delete_model(request: DeleteRequest) -> Dict[str, str]:
|
|
649
|
+
"""
|
|
650
|
+
Delete a model.
|
|
651
|
+
Args:
|
|
652
|
+
request (DeleteRequest): Delete request.
|
|
653
|
+
Returns:
|
|
654
|
+
Dict[str, str]: Success message.
|
|
655
|
+
"""
|
|
656
|
+
# Check if model exists
|
|
657
|
+
if not model_manager.get_model_info(request.model):
|
|
658
|
+
raise HTTPException(status_code=404, detail=f"Model {request.model} not found")
|
|
659
|
+
|
|
660
|
+
# Delete the model
|
|
661
|
+
if model_manager.remove_model(request.model):
|
|
662
|
+
return {"status": "success"}
|
|
663
|
+
else:
|
|
664
|
+
raise HTTPException(status_code=500, detail=f"Failed to delete model {request.model}")
|
|
665
|
+
|
|
666
|
+
@app.post("/api/pull")
|
|
667
|
+
async def pull_model(request: PullRequest) -> Union[Dict[str, str], StreamingResponse]:
|
|
668
|
+
"""
|
|
669
|
+
Pull a model from Hugging Face.
|
|
670
|
+
Args:
|
|
671
|
+
request (PullRequest): Pull request.
|
|
672
|
+
Returns:
|
|
673
|
+
Union[Dict[str, str], StreamingResponse]: Success message or streaming response.
|
|
674
|
+
"""
|
|
675
|
+
if request.stream:
|
|
676
|
+
async def generate_stream():
|
|
677
|
+
# Initial response
|
|
678
|
+
yield json.dumps({"status": "pulling manifest"}) + "\n"
|
|
679
|
+
|
|
680
|
+
try:
|
|
681
|
+
# Pull the model
|
|
682
|
+
_, _ = model_manager.download_model(request.model)
|
|
683
|
+
|
|
684
|
+
# Success responses
|
|
685
|
+
yield json.dumps({"status": "downloading model"}) + "\n"
|
|
686
|
+
yield json.dumps({"status": "verifying sha256 digest"}) + "\n"
|
|
687
|
+
yield json.dumps({"status": "writing manifest"}) + "\n"
|
|
688
|
+
yield json.dumps({"status": "success"}) + "\n"
|
|
689
|
+
except Exception as e:
|
|
690
|
+
yield json.dumps({"status": "error", "error": str(e)}) + "\n"
|
|
691
|
+
|
|
692
|
+
return StreamingResponse(generate_stream(), media_type="application/json")
|
|
693
|
+
else:
|
|
694
|
+
try:
|
|
695
|
+
# Pull the model
|
|
696
|
+
model_manager.download_model(request.model)
|
|
697
|
+
|
|
698
|
+
return {"status": "success"}
|
|
699
|
+
except Exception as e:
|
|
700
|
+
raise HTTPException(status_code=500, detail=f"Failed to pull model: {str(e)}")
|
|
701
|
+
|
|
702
|
+
@app.get("/api/version")
|
|
703
|
+
async def get_version() -> VersionResponse:
|
|
704
|
+
"""
|
|
705
|
+
Get the version of the API.
|
|
706
|
+
Returns:
|
|
707
|
+
VersionResponse: Version information.
|
|
708
|
+
"""
|
|
709
|
+
from webscout.Local import __version__
|
|
710
|
+
return VersionResponse(version=__version__)
|
|
711
|
+
|
|
178
712
|
def start_server(host: Optional[str] = None, port: Optional[int] = None) -> None:
|
|
179
713
|
"""
|
|
180
714
|
Start the API server.
|
webscout/Provider/Gemini.py
CHANGED
|
@@ -18,8 +18,10 @@ MODEL_ALIASES: Dict[str, Model] = {
|
|
|
18
18
|
"gemini-2.5-pro": Model.G_2_5_PRO,
|
|
19
19
|
"gemini-2.0-exp-advanced": Model.G_2_0_EXP_ADVANCED,
|
|
20
20
|
"gemini-2.5-exp-advanced": Model.G_2_5_EXP_ADVANCED,
|
|
21
|
+
"gemini-2.5-flash": Model.G_2_5_FLASH,
|
|
21
22
|
# Add shorter aliases for convenience
|
|
22
23
|
"flash": Model.G_2_0_FLASH,
|
|
24
|
+
"flash-2.5": Model.G_2_5_FLASH,
|
|
23
25
|
"thinking": Model.G_2_0_FLASH_THINKING,
|
|
24
26
|
"pro": Model.G_2_5_PRO,
|
|
25
27
|
"advanced": Model.G_2_0_EXP_ADVANCED,
|