webscout 8.2.3__py3-none-any.whl → 8.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +226 -14
- webscout/Bard.py +579 -206
- webscout/DWEBS.py +78 -35
- webscout/Extra/gguf.py +2 -0
- webscout/Extra/tempmail/base.py +1 -1
- webscout/Provider/AISEARCH/hika_search.py +4 -0
- webscout/Provider/AISEARCH/scira_search.py +2 -5
- webscout/Provider/Aitopia.py +75 -51
- webscout/Provider/AllenAI.py +181 -147
- webscout/Provider/ChatGPTClone.py +97 -86
- webscout/Provider/ChatSandbox.py +342 -0
- webscout/Provider/Cloudflare.py +79 -32
- webscout/Provider/Deepinfra.py +135 -94
- webscout/Provider/ElectronHub.py +103 -39
- webscout/Provider/ExaChat.py +36 -20
- webscout/Provider/GPTWeb.py +103 -47
- webscout/Provider/GithubChat.py +52 -49
- webscout/Provider/GizAI.py +283 -0
- webscout/Provider/Glider.py +39 -28
- webscout/Provider/Groq.py +222 -91
- webscout/Provider/HeckAI.py +93 -69
- webscout/Provider/HuggingFaceChat.py +113 -106
- webscout/Provider/Hunyuan.py +94 -83
- webscout/Provider/Jadve.py +104 -79
- webscout/Provider/LambdaChat.py +142 -123
- webscout/Provider/Llama3.py +94 -39
- webscout/Provider/MCPCore.py +315 -0
- webscout/Provider/Marcus.py +95 -37
- webscout/Provider/Netwrck.py +94 -52
- webscout/Provider/OPENAI/__init__.py +4 -1
- webscout/Provider/OPENAI/ai4chat.py +286 -0
- webscout/Provider/OPENAI/chatgptclone.py +35 -14
- webscout/Provider/OPENAI/deepinfra.py +37 -0
- webscout/Provider/OPENAI/exachat.py +4 -0
- webscout/Provider/OPENAI/groq.py +354 -0
- webscout/Provider/OPENAI/heckai.py +6 -2
- webscout/Provider/OPENAI/mcpcore.py +376 -0
- webscout/Provider/OPENAI/multichat.py +368 -0
- webscout/Provider/OPENAI/netwrck.py +3 -1
- webscout/Provider/OPENAI/scirachat.py +2 -4
- webscout/Provider/OPENAI/textpollinations.py +20 -22
- webscout/Provider/OPENAI/toolbaz.py +1 -0
- webscout/Provider/OpenGPT.py +48 -38
- webscout/Provider/PI.py +178 -93
- webscout/Provider/PizzaGPT.py +66 -36
- webscout/Provider/StandardInput.py +42 -30
- webscout/Provider/TeachAnything.py +95 -52
- webscout/Provider/TextPollinationsAI.py +138 -78
- webscout/Provider/TwoAI.py +162 -81
- webscout/Provider/TypliAI.py +305 -0
- webscout/Provider/Venice.py +97 -58
- webscout/Provider/VercelAI.py +33 -14
- webscout/Provider/WiseCat.py +65 -28
- webscout/Provider/Writecream.py +37 -11
- webscout/Provider/WritingMate.py +135 -63
- webscout/Provider/__init__.py +9 -27
- webscout/Provider/ai4chat.py +6 -7
- webscout/Provider/asksteve.py +53 -44
- webscout/Provider/cerebras.py +77 -31
- webscout/Provider/chatglm.py +47 -37
- webscout/Provider/copilot.py +0 -3
- webscout/Provider/elmo.py +109 -60
- webscout/Provider/granite.py +102 -54
- webscout/Provider/hermes.py +95 -48
- webscout/Provider/koala.py +1 -1
- webscout/Provider/learnfastai.py +113 -54
- webscout/Provider/llama3mitril.py +86 -51
- webscout/Provider/llmchat.py +88 -46
- webscout/Provider/llmchatco.py +110 -115
- webscout/Provider/meta.py +41 -37
- webscout/Provider/multichat.py +67 -28
- webscout/Provider/scira_chat.py +49 -30
- webscout/Provider/scnet.py +106 -53
- webscout/Provider/searchchat.py +87 -88
- webscout/Provider/sonus.py +113 -63
- webscout/Provider/toolbaz.py +115 -82
- webscout/Provider/turboseek.py +90 -43
- webscout/Provider/tutorai.py +82 -64
- webscout/Provider/typefully.py +85 -35
- webscout/Provider/typegpt.py +118 -61
- webscout/Provider/uncovr.py +132 -76
- webscout/Provider/x0gpt.py +69 -26
- webscout/Provider/yep.py +79 -66
- webscout/cli.py +256 -0
- webscout/conversation.py +34 -22
- webscout/exceptions.py +23 -0
- webscout/prompt_manager.py +56 -42
- webscout/version.py +1 -1
- webscout/webscout_search.py +65 -47
- webscout/webscout_search_async.py +81 -126
- webscout/yep_search.py +93 -43
- {webscout-8.2.3.dist-info → webscout-8.2.5.dist-info}/METADATA +183 -50
- {webscout-8.2.3.dist-info → webscout-8.2.5.dist-info}/RECORD +97 -113
- {webscout-8.2.3.dist-info → webscout-8.2.5.dist-info}/WHEEL +1 -1
- webscout-8.2.5.dist-info/entry_points.txt +3 -0
- {webscout-8.2.3.dist-info → webscout-8.2.5.dist-info}/top_level.txt +0 -1
- inferno/__init__.py +0 -6
- inferno/__main__.py +0 -9
- inferno/cli.py +0 -6
- webscout/Local/__init__.py +0 -12
- webscout/Local/__main__.py +0 -9
- webscout/Local/api.py +0 -576
- webscout/Local/cli.py +0 -516
- webscout/Local/config.py +0 -75
- webscout/Local/llm.py +0 -287
- webscout/Local/model_manager.py +0 -253
- webscout/Local/server.py +0 -721
- webscout/Local/utils.py +0 -93
- webscout/Provider/C4ai.py +0 -432
- webscout/Provider/ChatGPTES.py +0 -237
- webscout/Provider/Chatify.py +0 -175
- webscout/Provider/DeepSeek.py +0 -196
- webscout/Provider/Llama.py +0 -200
- webscout/Provider/Phind.py +0 -535
- webscout/Provider/WebSim.py +0 -228
- webscout/Provider/askmyai.py +0 -158
- webscout/Provider/gaurish.py +0 -244
- webscout/Provider/labyrinth.py +0 -340
- webscout/Provider/lepton.py +0 -194
- webscout/Provider/llamatutor.py +0 -192
- webscout-8.2.3.dist-info/entry_points.txt +0 -5
- {webscout-8.2.3.dist-info → webscout-8.2.5.dist-info/licenses}/LICENSE.md +0 -0
webscout/Local/server.py
DELETED
|
@@ -1,721 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
API server with OpenAI compatibility
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import time
|
|
7
|
-
import os
|
|
8
|
-
import datetime
|
|
9
|
-
from typing import Dict, Any, List, Optional, AsyncGenerator, Union
|
|
10
|
-
|
|
11
|
-
import uvicorn
|
|
12
|
-
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
13
|
-
from fastapi.responses import StreamingResponse
|
|
14
|
-
from fastapi.middleware.cors import CORSMiddleware
|
|
15
|
-
from pydantic import BaseModel, Field
|
|
16
|
-
|
|
17
|
-
from .config import config
|
|
18
|
-
from .model_manager import ModelManager
|
|
19
|
-
from .llm import LLMInterface
|
|
20
|
-
|
|
21
|
-
app = FastAPI(title="webscout.local API", description="OpenAI-compatible API for webscout.local")
|
|
22
|
-
|
|
23
|
-
# Add CORS middleware to allow cross-origin requests
|
|
24
|
-
app.add_middleware(
|
|
25
|
-
CORSMiddleware,
|
|
26
|
-
allow_origins=["*"], # Allow all origins
|
|
27
|
-
allow_credentials=True,
|
|
28
|
-
allow_methods=["*"], # Allow all methods
|
|
29
|
-
allow_headers=["*"], # Allow all headers
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
# Models
|
|
33
|
-
model_manager: ModelManager = ModelManager()
|
|
34
|
-
loaded_models: Dict[str, LLMInterface] = {} # Cache for loaded models
|
|
35
|
-
|
|
36
|
-
class Image(BaseModel):
|
|
37
|
-
"""
|
|
38
|
-
Represents an image for multimodal models.
|
|
39
|
-
"""
|
|
40
|
-
url: Optional[str] = None
|
|
41
|
-
data: Optional[str] = None
|
|
42
|
-
|
|
43
|
-
class ToolFunction(BaseModel):
|
|
44
|
-
"""
|
|
45
|
-
Represents a function for function calling.
|
|
46
|
-
"""
|
|
47
|
-
name: str
|
|
48
|
-
description: Optional[str] = None
|
|
49
|
-
parameters: Dict[str, Any] = Field(default_factory=dict)
|
|
50
|
-
|
|
51
|
-
class Tool(BaseModel):
|
|
52
|
-
"""
|
|
53
|
-
Represents a tool for function calling.
|
|
54
|
-
"""
|
|
55
|
-
type: str = "function"
|
|
56
|
-
function: ToolFunction
|
|
57
|
-
|
|
58
|
-
class ToolCall(BaseModel):
|
|
59
|
-
"""
|
|
60
|
-
Represents a tool call from the model.
|
|
61
|
-
"""
|
|
62
|
-
function: Dict[str, Any]
|
|
63
|
-
|
|
64
|
-
class ChatMessage(BaseModel):
|
|
65
|
-
"""
|
|
66
|
-
Represents a single chat message for the chat completion endpoint.
|
|
67
|
-
"""
|
|
68
|
-
role: str
|
|
69
|
-
content: Union[str, List[Dict[str, Any]], None] = ""
|
|
70
|
-
images: Optional[List[str]] = None
|
|
71
|
-
tool_calls: Optional[List[ToolCall]] = None
|
|
72
|
-
|
|
73
|
-
class ChatCompletionRequest(BaseModel):
|
|
74
|
-
"""
|
|
75
|
-
Request model for chat completions.
|
|
76
|
-
"""
|
|
77
|
-
model: str
|
|
78
|
-
messages: List[ChatMessage]
|
|
79
|
-
temperature: float = 0.7
|
|
80
|
-
top_p: float = 0.95
|
|
81
|
-
max_tokens: int = 256
|
|
82
|
-
stream: bool = False
|
|
83
|
-
stop: Optional[List[str]] = None
|
|
84
|
-
tools: Optional[List[Tool]] = None
|
|
85
|
-
format: Optional[Union[str, Dict[str, Any]]] = None
|
|
86
|
-
options: Optional[Dict[str, Any]] = None
|
|
87
|
-
keep_alive: Optional[str] = "5m"
|
|
88
|
-
|
|
89
|
-
class CompletionRequest(BaseModel):
|
|
90
|
-
"""
|
|
91
|
-
Request model for text completions.
|
|
92
|
-
"""
|
|
93
|
-
model: str
|
|
94
|
-
prompt: str
|
|
95
|
-
suffix: Optional[str] = None
|
|
96
|
-
temperature: float = 0.7
|
|
97
|
-
top_p: float = 0.95
|
|
98
|
-
max_tokens: int = 256
|
|
99
|
-
stream: bool = False
|
|
100
|
-
stop: Optional[List[str]] = None
|
|
101
|
-
images: Optional[List[str]] = None
|
|
102
|
-
format: Optional[Union[str, Dict[str, Any]]] = None
|
|
103
|
-
options: Optional[Dict[str, Any]] = None
|
|
104
|
-
system: Optional[str] = None
|
|
105
|
-
template: Optional[str] = None
|
|
106
|
-
context: Optional[List[int]] = None
|
|
107
|
-
raw: bool = False
|
|
108
|
-
keep_alive: Optional[str] = "5m"
|
|
109
|
-
|
|
110
|
-
class ModelInfo(BaseModel):
|
|
111
|
-
"""
|
|
112
|
-
Model information for listing available models.
|
|
113
|
-
"""
|
|
114
|
-
id: str
|
|
115
|
-
object: str = "model"
|
|
116
|
-
created: int
|
|
117
|
-
owned_by: str = "webscout.local"
|
|
118
|
-
|
|
119
|
-
class ModelList(BaseModel):
|
|
120
|
-
"""
|
|
121
|
-
List of available models.
|
|
122
|
-
"""
|
|
123
|
-
object: str = "list"
|
|
124
|
-
data: List[ModelInfo]
|
|
125
|
-
|
|
126
|
-
class EmbeddingRequest(BaseModel):
|
|
127
|
-
"""
|
|
128
|
-
Request model for embeddings.
|
|
129
|
-
"""
|
|
130
|
-
model: str
|
|
131
|
-
input: Union[str, List[str]]
|
|
132
|
-
truncate: bool = True
|
|
133
|
-
options: Optional[Dict[str, Any]] = None
|
|
134
|
-
keep_alive: Optional[str] = "5m"
|
|
135
|
-
|
|
136
|
-
class CopyRequest(BaseModel):
|
|
137
|
-
"""
|
|
138
|
-
Request model for copying a model.
|
|
139
|
-
"""
|
|
140
|
-
source: str
|
|
141
|
-
destination: str
|
|
142
|
-
|
|
143
|
-
class DeleteRequest(BaseModel):
|
|
144
|
-
"""
|
|
145
|
-
Request model for deleting a model.
|
|
146
|
-
"""
|
|
147
|
-
model: str
|
|
148
|
-
|
|
149
|
-
class ShowRequest(BaseModel):
|
|
150
|
-
"""
|
|
151
|
-
Request model for showing model information.
|
|
152
|
-
"""
|
|
153
|
-
model: str
|
|
154
|
-
verbose: bool = False
|
|
155
|
-
|
|
156
|
-
class PullRequest(BaseModel):
|
|
157
|
-
"""
|
|
158
|
-
Request model for pulling a model.
|
|
159
|
-
"""
|
|
160
|
-
model: str
|
|
161
|
-
insecure: bool = False
|
|
162
|
-
stream: bool = True
|
|
163
|
-
|
|
164
|
-
class ModelDetails(BaseModel):
|
|
165
|
-
"""
|
|
166
|
-
Detailed model information.
|
|
167
|
-
"""
|
|
168
|
-
format: str = "gguf"
|
|
169
|
-
family: str = "llama"
|
|
170
|
-
families: Optional[List[str]] = None
|
|
171
|
-
parameter_size: Optional[str] = None
|
|
172
|
-
quantization_level: Optional[str] = None
|
|
173
|
-
|
|
174
|
-
class ModelResponse(BaseModel):
|
|
175
|
-
"""
|
|
176
|
-
Response model for model information.
|
|
177
|
-
"""
|
|
178
|
-
name: str
|
|
179
|
-
modified_at: str
|
|
180
|
-
size: int
|
|
181
|
-
digest: Optional[str] = None
|
|
182
|
-
details: Optional[ModelDetails] = None
|
|
183
|
-
|
|
184
|
-
class ModelsResponse(BaseModel):
|
|
185
|
-
"""
|
|
186
|
-
Response model for listing models.
|
|
187
|
-
"""
|
|
188
|
-
models: List[ModelResponse]
|
|
189
|
-
|
|
190
|
-
class RunningModel(BaseModel):
|
|
191
|
-
"""
|
|
192
|
-
Information about a running model.
|
|
193
|
-
"""
|
|
194
|
-
name: str
|
|
195
|
-
model: str
|
|
196
|
-
size: int
|
|
197
|
-
digest: Optional[str] = None
|
|
198
|
-
details: Optional[ModelDetails] = None
|
|
199
|
-
expires_at: Optional[str] = None
|
|
200
|
-
size_vram: Optional[int] = None
|
|
201
|
-
|
|
202
|
-
class RunningModelsResponse(BaseModel):
|
|
203
|
-
"""
|
|
204
|
-
Response model for listing running models.
|
|
205
|
-
"""
|
|
206
|
-
models: List[RunningModel]
|
|
207
|
-
|
|
208
|
-
class VersionResponse(BaseModel):
|
|
209
|
-
"""
|
|
210
|
-
Response model for version information.
|
|
211
|
-
"""
|
|
212
|
-
version: str
|
|
213
|
-
|
|
214
|
-
def parse_keep_alive(keep_alive: Optional[str]) -> int:
|
|
215
|
-
"""
|
|
216
|
-
Parse the keep_alive parameter to seconds.
|
|
217
|
-
Args:
|
|
218
|
-
keep_alive (Optional[str]): Keep alive duration string.
|
|
219
|
-
Returns:
|
|
220
|
-
int: Keep alive duration in seconds.
|
|
221
|
-
"""
|
|
222
|
-
if not keep_alive:
|
|
223
|
-
return 300 # Default 5 minutes
|
|
224
|
-
|
|
225
|
-
if keep_alive.endswith("ms"):
|
|
226
|
-
return int(keep_alive[:-2]) // 1000
|
|
227
|
-
elif keep_alive.endswith("s"):
|
|
228
|
-
return int(keep_alive[:-1])
|
|
229
|
-
elif keep_alive.endswith("m"):
|
|
230
|
-
return int(keep_alive[:-1]) * 60
|
|
231
|
-
elif keep_alive.endswith("h"):
|
|
232
|
-
return int(keep_alive[:-1]) * 3600
|
|
233
|
-
elif keep_alive == "0":
|
|
234
|
-
return 0
|
|
235
|
-
else:
|
|
236
|
-
try:
|
|
237
|
-
return int(keep_alive)
|
|
238
|
-
except ValueError:
|
|
239
|
-
return 300 # Default 5 minutes
|
|
240
|
-
|
|
241
|
-
def get_model(model_name: str, options: Optional[Dict[str, Any]] = None) -> LLMInterface:
|
|
242
|
-
"""
|
|
243
|
-
Get or load a model by name, using a cache for efficiency.
|
|
244
|
-
Args:
|
|
245
|
-
model_name (str): Name of the model to load.
|
|
246
|
-
options (Optional[Dict[str, Any]]): Additional options for loading the model.
|
|
247
|
-
Returns:
|
|
248
|
-
LLMInterface: Loaded model interface.
|
|
249
|
-
Raises:
|
|
250
|
-
HTTPException: If the model cannot be loaded.
|
|
251
|
-
"""
|
|
252
|
-
if model_name not in loaded_models:
|
|
253
|
-
try:
|
|
254
|
-
loaded_models[model_name] = LLMInterface(model_name)
|
|
255
|
-
|
|
256
|
-
# Extract options if provided
|
|
257
|
-
n_gpu_layers = options.get("n_gpu_layers", None) if options else None
|
|
258
|
-
n_ctx = options.get("num_ctx", None) if options else None
|
|
259
|
-
verbose = options.get("verbose", False) if options else False
|
|
260
|
-
|
|
261
|
-
loaded_models[model_name].load_model(
|
|
262
|
-
n_gpu_layers=n_gpu_layers,
|
|
263
|
-
n_ctx=n_ctx,
|
|
264
|
-
verbose=verbose
|
|
265
|
-
)
|
|
266
|
-
except Exception as e:
|
|
267
|
-
raise HTTPException(status_code=404, detail=f"Model {model_name} not found: {str(e)}")
|
|
268
|
-
return loaded_models[model_name]
|
|
269
|
-
|
|
270
|
-
def unload_model(model_name: str) -> None:
|
|
271
|
-
"""
|
|
272
|
-
Unload a model from memory.
|
|
273
|
-
Args:
|
|
274
|
-
model_name (str): Name of the model to unload.
|
|
275
|
-
"""
|
|
276
|
-
if model_name in loaded_models:
|
|
277
|
-
# Free the model resources
|
|
278
|
-
loaded_models[model_name].llm = None
|
|
279
|
-
# Remove from loaded models cache
|
|
280
|
-
del loaded_models[model_name]
|
|
281
|
-
|
|
282
|
-
@app.get("/v1/models", response_model=ModelList)
|
|
283
|
-
async def list_models() -> ModelList:
|
|
284
|
-
"""
|
|
285
|
-
List available models.
|
|
286
|
-
Returns:
|
|
287
|
-
ModelList: List of available models.
|
|
288
|
-
"""
|
|
289
|
-
models = model_manager.list_models()
|
|
290
|
-
model_list: List[ModelInfo] = []
|
|
291
|
-
for model in models:
|
|
292
|
-
model_list.append(
|
|
293
|
-
ModelInfo(
|
|
294
|
-
id=model["name"],
|
|
295
|
-
created=int(time.time()),
|
|
296
|
-
)
|
|
297
|
-
)
|
|
298
|
-
return ModelList(object="list", data=model_list)
|
|
299
|
-
|
|
300
|
-
@app.post("/v1/chat/completions")
|
|
301
|
-
async def create_chat_completion(request: ChatCompletionRequest, background_tasks: BackgroundTasks) -> Any:
|
|
302
|
-
"""
|
|
303
|
-
Create a chat completion.
|
|
304
|
-
Args:
|
|
305
|
-
request (ChatCompletionRequest): Chat completion request.
|
|
306
|
-
background_tasks (BackgroundTasks): Background tasks for model unloading.
|
|
307
|
-
Returns:
|
|
308
|
-
StreamingResponse or dict: Streaming or regular response.
|
|
309
|
-
"""
|
|
310
|
-
# Parse keep_alive parameter
|
|
311
|
-
keep_alive_seconds = parse_keep_alive(request.keep_alive)
|
|
312
|
-
|
|
313
|
-
# If messages is empty, just load the model and return
|
|
314
|
-
if not request.messages:
|
|
315
|
-
model = get_model(request.model, request.options)
|
|
316
|
-
|
|
317
|
-
# Schedule unloading if keep_alive is 0
|
|
318
|
-
if keep_alive_seconds == 0:
|
|
319
|
-
background_tasks.add_task(unload_model, request.model)
|
|
320
|
-
done_reason = "unload"
|
|
321
|
-
else:
|
|
322
|
-
done_reason = "load"
|
|
323
|
-
|
|
324
|
-
return {
|
|
325
|
-
"model": request.model,
|
|
326
|
-
"created_at": datetime.datetime.now().isoformat(),
|
|
327
|
-
"message": {
|
|
328
|
-
"role": "assistant",
|
|
329
|
-
"content": ""
|
|
330
|
-
},
|
|
331
|
-
"done_reason": done_reason,
|
|
332
|
-
"done": True
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
# Process messages with images if present
|
|
336
|
-
processed_messages = []
|
|
337
|
-
for msg in request.messages:
|
|
338
|
-
message_dict = {"role": msg.role, "content": msg.content}
|
|
339
|
-
|
|
340
|
-
# Handle images for multimodal models
|
|
341
|
-
if msg.images:
|
|
342
|
-
message_dict["images"] = msg.images
|
|
343
|
-
|
|
344
|
-
# Handle tool calls
|
|
345
|
-
if msg.tool_calls:
|
|
346
|
-
message_dict["tool_calls"] = [tc.model_dump() for tc in msg.tool_calls]
|
|
347
|
-
|
|
348
|
-
processed_messages.append(message_dict)
|
|
349
|
-
|
|
350
|
-
# Get or load the model
|
|
351
|
-
model = get_model(request.model, request.options)
|
|
352
|
-
|
|
353
|
-
# Handle streaming response
|
|
354
|
-
if request.stream:
|
|
355
|
-
async def generate() -> AsyncGenerator[str, None]:
|
|
356
|
-
stream = model.create_chat_completion(
|
|
357
|
-
messages=processed_messages,
|
|
358
|
-
max_tokens=request.max_tokens,
|
|
359
|
-
temperature=request.temperature,
|
|
360
|
-
top_p=request.top_p,
|
|
361
|
-
stream=True,
|
|
362
|
-
stop=request.stop,
|
|
363
|
-
tools=request.tools,
|
|
364
|
-
format=request.format,
|
|
365
|
-
)
|
|
366
|
-
for chunk in stream:
|
|
367
|
-
yield f"data: {json.dumps(chunk)}\n\n"
|
|
368
|
-
yield "data: [DONE]\n\n"
|
|
369
|
-
|
|
370
|
-
# Schedule unloading if keep_alive is 0
|
|
371
|
-
if keep_alive_seconds == 0:
|
|
372
|
-
background_tasks.add_task(unload_model, request.model)
|
|
373
|
-
|
|
374
|
-
return StreamingResponse(generate(), media_type="text/event-stream")
|
|
375
|
-
else:
|
|
376
|
-
# Non-streaming response
|
|
377
|
-
response = model.create_chat_completion(
|
|
378
|
-
messages=processed_messages,
|
|
379
|
-
max_tokens=request.max_tokens,
|
|
380
|
-
temperature=request.temperature,
|
|
381
|
-
top_p=request.top_p,
|
|
382
|
-
stream=False,
|
|
383
|
-
stop=request.stop,
|
|
384
|
-
tools=request.tools,
|
|
385
|
-
format=request.format,
|
|
386
|
-
)
|
|
387
|
-
|
|
388
|
-
# Schedule unloading if keep_alive is 0
|
|
389
|
-
if keep_alive_seconds == 0:
|
|
390
|
-
background_tasks.add_task(unload_model, request.model)
|
|
391
|
-
|
|
392
|
-
return response
|
|
393
|
-
|
|
394
|
-
@app.post("/v1/completions")
|
|
395
|
-
async def create_completion(request: CompletionRequest, background_tasks: BackgroundTasks) -> Any:
|
|
396
|
-
"""
|
|
397
|
-
Create a text completion.
|
|
398
|
-
Args:
|
|
399
|
-
request (CompletionRequest): Completion request.
|
|
400
|
-
background_tasks (BackgroundTasks): Background tasks for model unloading.
|
|
401
|
-
Returns:
|
|
402
|
-
StreamingResponse or dict: Streaming or regular response.
|
|
403
|
-
"""
|
|
404
|
-
# Parse keep_alive parameter
|
|
405
|
-
keep_alive_seconds = parse_keep_alive(request.keep_alive)
|
|
406
|
-
|
|
407
|
-
# If prompt is empty, just load the model and return
|
|
408
|
-
if not request.prompt:
|
|
409
|
-
model = get_model(request.model, request.options)
|
|
410
|
-
|
|
411
|
-
# Schedule unloading if keep_alive is 0
|
|
412
|
-
if keep_alive_seconds == 0:
|
|
413
|
-
background_tasks.add_task(unload_model, request.model)
|
|
414
|
-
done_reason = "unload"
|
|
415
|
-
else:
|
|
416
|
-
done_reason = "load"
|
|
417
|
-
|
|
418
|
-
return {
|
|
419
|
-
"model": request.model,
|
|
420
|
-
"created_at": datetime.datetime.now().isoformat(),
|
|
421
|
-
"response": "",
|
|
422
|
-
"done": True,
|
|
423
|
-
"done_reason": done_reason
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
# Get or load the model
|
|
427
|
-
model = get_model(request.model, request.options)
|
|
428
|
-
|
|
429
|
-
# Handle streaming response
|
|
430
|
-
if request.stream:
|
|
431
|
-
async def generate() -> AsyncGenerator[str, None]:
|
|
432
|
-
stream = model.create_completion(
|
|
433
|
-
prompt=request.prompt,
|
|
434
|
-
max_tokens=request.max_tokens,
|
|
435
|
-
temperature=request.temperature,
|
|
436
|
-
top_p=request.top_p,
|
|
437
|
-
stream=True,
|
|
438
|
-
stop=request.stop,
|
|
439
|
-
suffix=request.suffix,
|
|
440
|
-
images=request.images,
|
|
441
|
-
system=request.system,
|
|
442
|
-
template=request.template,
|
|
443
|
-
context=request.context,
|
|
444
|
-
raw=request.raw,
|
|
445
|
-
format=request.format,
|
|
446
|
-
)
|
|
447
|
-
for chunk in stream:
|
|
448
|
-
yield f"data: {json.dumps(chunk)}\n\n"
|
|
449
|
-
yield "data: [DONE]\n\n"
|
|
450
|
-
|
|
451
|
-
# Schedule unloading if keep_alive is 0
|
|
452
|
-
if keep_alive_seconds == 0:
|
|
453
|
-
background_tasks.add_task(unload_model, request.model)
|
|
454
|
-
|
|
455
|
-
return StreamingResponse(generate(), media_type="text/event-stream")
|
|
456
|
-
else:
|
|
457
|
-
# Non-streaming response
|
|
458
|
-
response = model.create_completion(
|
|
459
|
-
prompt=request.prompt,
|
|
460
|
-
max_tokens=request.max_tokens,
|
|
461
|
-
temperature=request.temperature,
|
|
462
|
-
top_p=request.top_p,
|
|
463
|
-
stream=False,
|
|
464
|
-
stop=request.stop,
|
|
465
|
-
suffix=request.suffix,
|
|
466
|
-
images=request.images,
|
|
467
|
-
system=request.system,
|
|
468
|
-
template=request.template,
|
|
469
|
-
context=request.context,
|
|
470
|
-
raw=request.raw,
|
|
471
|
-
format=request.format,
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
# Schedule unloading if keep_alive is 0
|
|
475
|
-
if keep_alive_seconds == 0:
|
|
476
|
-
background_tasks.add_task(unload_model, request.model)
|
|
477
|
-
|
|
478
|
-
return response
|
|
479
|
-
|
|
480
|
-
@app.post("/api/embed")
|
|
481
|
-
async def create_embeddings(request: EmbeddingRequest, background_tasks: BackgroundTasks) -> Any:
|
|
482
|
-
"""
|
|
483
|
-
Generate embeddings from a model.
|
|
484
|
-
Args:
|
|
485
|
-
request (EmbeddingRequest): Embedding request.
|
|
486
|
-
background_tasks (BackgroundTasks): Background tasks for model unloading.
|
|
487
|
-
Returns:
|
|
488
|
-
dict: Embedding response.
|
|
489
|
-
"""
|
|
490
|
-
# Parse keep_alive parameter
|
|
491
|
-
keep_alive_seconds = parse_keep_alive(request.keep_alive)
|
|
492
|
-
|
|
493
|
-
# Get or load the model
|
|
494
|
-
model = get_model(request.model, request.options)
|
|
495
|
-
|
|
496
|
-
try:
|
|
497
|
-
# Generate embeddings
|
|
498
|
-
embeddings = model.create_embeddings(
|
|
499
|
-
input=request.input,
|
|
500
|
-
truncate=request.truncate
|
|
501
|
-
)
|
|
502
|
-
|
|
503
|
-
# Schedule unloading if keep_alive is 0
|
|
504
|
-
if keep_alive_seconds == 0:
|
|
505
|
-
background_tasks.add_task(unload_model, request.model)
|
|
506
|
-
|
|
507
|
-
return embeddings
|
|
508
|
-
except Exception as e:
|
|
509
|
-
raise HTTPException(status_code=500, detail=f"Failed to generate embeddings: {str(e)}")
|
|
510
|
-
|
|
511
|
-
@app.post("/api/tags")
|
|
512
|
-
async def list_all_models() -> ModelsResponse:
|
|
513
|
-
"""
|
|
514
|
-
List all available models.
|
|
515
|
-
Returns:
|
|
516
|
-
ModelsResponse: List of available models.
|
|
517
|
-
"""
|
|
518
|
-
models = model_manager.list_models()
|
|
519
|
-
response_models = []
|
|
520
|
-
|
|
521
|
-
for model in models:
|
|
522
|
-
# Extract model details
|
|
523
|
-
details = ModelDetails(
|
|
524
|
-
format="gguf",
|
|
525
|
-
family="llama", # Default, could be improved with model metadata
|
|
526
|
-
parameter_size=model.get("parameter_size", "Unknown"),
|
|
527
|
-
quantization_level=model.get("quantization_level", "Unknown")
|
|
528
|
-
)
|
|
529
|
-
|
|
530
|
-
# Create model response
|
|
531
|
-
response_models.append(
|
|
532
|
-
ModelResponse(
|
|
533
|
-
name=model["name"],
|
|
534
|
-
modified_at=model.get("downloaded_at", datetime.datetime.now().isoformat()),
|
|
535
|
-
size=os.path.getsize(model["path"]) if "path" in model else 0,
|
|
536
|
-
digest=None, # Could be improved with actual digest calculation
|
|
537
|
-
details=details
|
|
538
|
-
)
|
|
539
|
-
)
|
|
540
|
-
|
|
541
|
-
return ModelsResponse(models=response_models)
|
|
542
|
-
|
|
543
|
-
@app.get("/api/ps")
|
|
544
|
-
async def list_running_models() -> RunningModelsResponse:
|
|
545
|
-
"""
|
|
546
|
-
List all running models.
|
|
547
|
-
Returns:
|
|
548
|
-
RunningModelsResponse: List of running models.
|
|
549
|
-
"""
|
|
550
|
-
running_models = []
|
|
551
|
-
|
|
552
|
-
for name, _ in loaded_models.items():
|
|
553
|
-
# Get model info
|
|
554
|
-
model_info = model_manager.get_model_info(name)
|
|
555
|
-
|
|
556
|
-
if model_info:
|
|
557
|
-
# Extract model details
|
|
558
|
-
details = ModelDetails(
|
|
559
|
-
format="gguf",
|
|
560
|
-
family="llama", # Default, could be improved with model metadata
|
|
561
|
-
parameter_size=model_info.get("parameter_size", "Unknown"),
|
|
562
|
-
quantization_level=model_info.get("quantization_level", "Unknown")
|
|
563
|
-
)
|
|
564
|
-
|
|
565
|
-
# Create running model response
|
|
566
|
-
running_models.append(
|
|
567
|
-
RunningModel(
|
|
568
|
-
name=name,
|
|
569
|
-
model=name,
|
|
570
|
-
size=os.path.getsize(model_info["path"]) if "path" in model_info else 0,
|
|
571
|
-
digest=None, # Could be improved with actual digest calculation
|
|
572
|
-
details=details,
|
|
573
|
-
expires_at=None, # Could be improved with actual expiration time
|
|
574
|
-
size_vram=None # Could be improved with actual VRAM usage
|
|
575
|
-
)
|
|
576
|
-
)
|
|
577
|
-
|
|
578
|
-
return RunningModelsResponse(models=running_models)
|
|
579
|
-
|
|
580
|
-
@app.post("/api/show")
|
|
581
|
-
async def show_model(request: ShowRequest) -> Dict[str, Any]:
|
|
582
|
-
"""
|
|
583
|
-
Show model information.
|
|
584
|
-
Args:
|
|
585
|
-
request (ShowRequest): Show request.
|
|
586
|
-
Returns:
|
|
587
|
-
Dict[str, Any]: Model information.
|
|
588
|
-
"""
|
|
589
|
-
model_info = model_manager.get_model_info(request.model)
|
|
590
|
-
|
|
591
|
-
if not model_info:
|
|
592
|
-
raise HTTPException(status_code=404, detail=f"Model {request.model} not found")
|
|
593
|
-
|
|
594
|
-
# Extract model details
|
|
595
|
-
details = {
|
|
596
|
-
"parent_model": "",
|
|
597
|
-
"format": "gguf",
|
|
598
|
-
"family": "llama",
|
|
599
|
-
"families": ["llama"],
|
|
600
|
-
"parameter_size": model_info.get("parameter_size", "Unknown"),
|
|
601
|
-
"quantization_level": model_info.get("quantization_level", "Unknown")
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
# Get model capabilities
|
|
605
|
-
capabilities = ["completion"]
|
|
606
|
-
|
|
607
|
-
# Add vision capability if model supports it
|
|
608
|
-
if "vision" in model_info.get("capabilities", []):
|
|
609
|
-
capabilities.append("vision")
|
|
610
|
-
|
|
611
|
-
# Create response
|
|
612
|
-
response = {
|
|
613
|
-
"modelfile": model_info.get("modelfile", ""),
|
|
614
|
-
"parameters": model_info.get("parameters", ""),
|
|
615
|
-
"template": model_info.get("template", ""),
|
|
616
|
-
"details": details,
|
|
617
|
-
"capabilities": capabilities
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
# Add model_info if verbose is requested
|
|
621
|
-
if request.verbose:
|
|
622
|
-
response["model_info"] = model_info.get("model_info", {})
|
|
623
|
-
|
|
624
|
-
return response
|
|
625
|
-
|
|
626
|
-
@app.post("/api/copy")
|
|
627
|
-
async def copy_model(request: CopyRequest) -> Dict[str, str]:
|
|
628
|
-
"""
|
|
629
|
-
Copy a model.
|
|
630
|
-
Args:
|
|
631
|
-
request (CopyRequest): Copy request.
|
|
632
|
-
Returns:
|
|
633
|
-
Dict[str, str]: Success message.
|
|
634
|
-
"""
|
|
635
|
-
try:
|
|
636
|
-
# Check if source model exists
|
|
637
|
-
if not model_manager.get_model_info(request.source):
|
|
638
|
-
raise HTTPException(status_code=404, detail=f"Source model {request.source} not found")
|
|
639
|
-
|
|
640
|
-
# Copy the model
|
|
641
|
-
model_manager.copy_model(request.source, request.destination)
|
|
642
|
-
|
|
643
|
-
return {"status": "success"}
|
|
644
|
-
except Exception as e:
|
|
645
|
-
raise HTTPException(status_code=500, detail=f"Failed to copy model: {str(e)}")
|
|
646
|
-
|
|
647
|
-
@app.delete("/api/delete")
|
|
648
|
-
async def delete_model(request: DeleteRequest) -> Dict[str, str]:
|
|
649
|
-
"""
|
|
650
|
-
Delete a model.
|
|
651
|
-
Args:
|
|
652
|
-
request (DeleteRequest): Delete request.
|
|
653
|
-
Returns:
|
|
654
|
-
Dict[str, str]: Success message.
|
|
655
|
-
"""
|
|
656
|
-
# Check if model exists
|
|
657
|
-
if not model_manager.get_model_info(request.model):
|
|
658
|
-
raise HTTPException(status_code=404, detail=f"Model {request.model} not found")
|
|
659
|
-
|
|
660
|
-
# Delete the model
|
|
661
|
-
if model_manager.remove_model(request.model):
|
|
662
|
-
return {"status": "success"}
|
|
663
|
-
else:
|
|
664
|
-
raise HTTPException(status_code=500, detail=f"Failed to delete model {request.model}")
|
|
665
|
-
|
|
666
|
-
@app.post("/api/pull")
|
|
667
|
-
async def pull_model(request: PullRequest) -> Union[Dict[str, str], StreamingResponse]:
|
|
668
|
-
"""
|
|
669
|
-
Pull a model from Hugging Face.
|
|
670
|
-
Args:
|
|
671
|
-
request (PullRequest): Pull request.
|
|
672
|
-
Returns:
|
|
673
|
-
Union[Dict[str, str], StreamingResponse]: Success message or streaming response.
|
|
674
|
-
"""
|
|
675
|
-
if request.stream:
|
|
676
|
-
async def generate_stream():
|
|
677
|
-
# Initial response
|
|
678
|
-
yield json.dumps({"status": "pulling manifest"}) + "\n"
|
|
679
|
-
|
|
680
|
-
try:
|
|
681
|
-
# Pull the model
|
|
682
|
-
_, _ = model_manager.download_model(request.model)
|
|
683
|
-
|
|
684
|
-
# Success responses
|
|
685
|
-
yield json.dumps({"status": "downloading model"}) + "\n"
|
|
686
|
-
yield json.dumps({"status": "verifying sha256 digest"}) + "\n"
|
|
687
|
-
yield json.dumps({"status": "writing manifest"}) + "\n"
|
|
688
|
-
yield json.dumps({"status": "success"}) + "\n"
|
|
689
|
-
except Exception as e:
|
|
690
|
-
yield json.dumps({"status": "error", "error": str(e)}) + "\n"
|
|
691
|
-
|
|
692
|
-
return StreamingResponse(generate_stream(), media_type="application/json")
|
|
693
|
-
else:
|
|
694
|
-
try:
|
|
695
|
-
# Pull the model
|
|
696
|
-
model_manager.download_model(request.model)
|
|
697
|
-
|
|
698
|
-
return {"status": "success"}
|
|
699
|
-
except Exception as e:
|
|
700
|
-
raise HTTPException(status_code=500, detail=f"Failed to pull model: {str(e)}")
|
|
701
|
-
|
|
702
|
-
@app.get("/api/version")
|
|
703
|
-
async def get_version() -> VersionResponse:
|
|
704
|
-
"""
|
|
705
|
-
Get the version of the API.
|
|
706
|
-
Returns:
|
|
707
|
-
VersionResponse: Version information.
|
|
708
|
-
"""
|
|
709
|
-
from webscout.Local import __version__
|
|
710
|
-
return VersionResponse(version=__version__)
|
|
711
|
-
|
|
712
|
-
def start_server(host: Optional[str] = None, port: Optional[int] = None) -> None:
|
|
713
|
-
"""
|
|
714
|
-
Start the API server.
|
|
715
|
-
Args:
|
|
716
|
-
host (Optional[str]): Host to bind the server to.
|
|
717
|
-
port (Optional[int]): Port to bind the server to.
|
|
718
|
-
"""
|
|
719
|
-
host = host or config.get("api_host", "127.0.0.1")
|
|
720
|
-
port = port or config.get("api_port", 8000)
|
|
721
|
-
uvicorn.run(app, host=host, port=port)
|