webscout 8.0__py3-none-any.whl → 8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- inferno/__init__.py +6 -0
- inferno/__main__.py +9 -0
- inferno/cli.py +6 -0
- webscout/Local/__init__.py +6 -0
- webscout/Local/__main__.py +9 -0
- webscout/Local/api.py +576 -0
- webscout/Local/cli.py +338 -0
- webscout/Local/config.py +75 -0
- webscout/Local/llm.py +188 -0
- webscout/Local/model_manager.py +205 -0
- webscout/Local/server.py +187 -0
- webscout/Local/utils.py +93 -0
- webscout/Provider/AISEARCH/DeepFind.py +1 -1
- webscout/Provider/AISEARCH/ISou.py +1 -1
- webscout/Provider/AISEARCH/Perplexity.py +359 -0
- webscout/Provider/AISEARCH/__init__.py +3 -1
- webscout/Provider/AISEARCH/felo_search.py +1 -1
- webscout/Provider/AISEARCH/genspark_search.py +1 -1
- webscout/Provider/AISEARCH/hika_search.py +1 -1
- webscout/Provider/AISEARCH/iask_search.py +436 -0
- webscout/Provider/AISEARCH/scira_search.py +9 -5
- webscout/Provider/AISEARCH/webpilotai_search.py +1 -1
- webscout/Provider/ExaAI.py +1 -1
- webscout/Provider/ExaChat.py +18 -8
- webscout/Provider/GithubChat.py +5 -1
- webscout/Provider/Glider.py +4 -2
- webscout/Provider/Jadve.py +2 -2
- webscout/Provider/OPENAI/__init__.py +24 -0
- webscout/Provider/OPENAI/base.py +46 -0
- webscout/Provider/OPENAI/c4ai.py +347 -0
- webscout/Provider/OPENAI/chatgpt.py +549 -0
- webscout/Provider/OPENAI/chatgptclone.py +460 -0
- webscout/Provider/OPENAI/deepinfra.py +284 -0
- webscout/Provider/OPENAI/exaai.py +419 -0
- webscout/Provider/OPENAI/exachat.py +433 -0
- webscout/Provider/OPENAI/freeaichat.py +355 -0
- webscout/Provider/OPENAI/glider.py +316 -0
- webscout/Provider/OPENAI/heckai.py +337 -0
- webscout/Provider/OPENAI/llmchatco.py +327 -0
- webscout/Provider/OPENAI/netwrck.py +348 -0
- webscout/Provider/OPENAI/opkfc.py +488 -0
- webscout/Provider/OPENAI/scirachat.py +463 -0
- webscout/Provider/OPENAI/sonus.py +294 -0
- webscout/Provider/OPENAI/standardinput.py +425 -0
- webscout/Provider/OPENAI/textpollinations.py +285 -0
- webscout/Provider/OPENAI/toolbaz.py +405 -0
- webscout/Provider/OPENAI/typegpt.py +361 -0
- webscout/Provider/OPENAI/uncovrAI.py +455 -0
- webscout/Provider/OPENAI/utils.py +211 -0
- webscout/Provider/OPENAI/venice.py +428 -0
- webscout/Provider/OPENAI/wisecat.py +381 -0
- webscout/Provider/OPENAI/writecream.py +158 -0
- webscout/Provider/OPENAI/x0gpt.py +389 -0
- webscout/Provider/OPENAI/yep.py +329 -0
- webscout/Provider/StandardInput.py +278 -0
- webscout/Provider/TextPollinationsAI.py +27 -28
- webscout/Provider/Venice.py +1 -1
- webscout/Provider/Writecream.py +211 -0
- webscout/Provider/WritingMate.py +197 -0
- webscout/Provider/Youchat.py +30 -26
- webscout/Provider/__init__.py +14 -6
- webscout/Provider/koala.py +2 -2
- webscout/Provider/llmchatco.py +5 -0
- webscout/Provider/scira_chat.py +18 -12
- webscout/Provider/scnet.py +187 -0
- webscout/Provider/toolbaz.py +320 -0
- webscout/Provider/typegpt.py +3 -184
- webscout/Provider/uncovr.py +3 -3
- webscout/conversation.py +32 -32
- webscout/prompt_manager.py +2 -1
- webscout/version.py +1 -1
- webscout-8.2.dist-info/METADATA +734 -0
- {webscout-8.0.dist-info → webscout-8.2.dist-info}/RECORD +77 -32
- webscout-8.2.dist-info/entry_points.txt +5 -0
- {webscout-8.0.dist-info → webscout-8.2.dist-info}/top_level.txt +1 -0
- webscout/Provider/flowith.py +0 -207
- webscout-8.0.dist-info/METADATA +0 -995
- webscout-8.0.dist-info/entry_points.txt +0 -3
- {webscout-8.0.dist-info → webscout-8.2.dist-info}/LICENSE.md +0 -0
- {webscout-8.0.dist-info → webscout-8.2.dist-info}/WHEEL +0 -0
inferno/__init__.py
ADDED
inferno/__main__.py
ADDED
inferno/cli.py
ADDED
webscout/Local/api.py
ADDED
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API endpoints for webscout.Local
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Dict, List, Optional, Union, Any
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
import base64
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends
|
|
14
|
+
from fastapi.responses import StreamingResponse
|
|
15
|
+
from pydantic import BaseModel
|
|
16
|
+
|
|
17
|
+
from .llm import ModelManager
|
|
18
|
+
from .config import Config
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# API Models
|
|
23
|
+
class GenerateRequest(BaseModel):
|
|
24
|
+
model: str
|
|
25
|
+
prompt: str = ""
|
|
26
|
+
suffix: Optional[str] = None
|
|
27
|
+
images: Optional[List[str]] = None
|
|
28
|
+
system: Optional[str] = None
|
|
29
|
+
template: Optional[str] = None
|
|
30
|
+
context: Optional[List[int]] = None
|
|
31
|
+
stream: bool = True
|
|
32
|
+
raw: bool = False
|
|
33
|
+
format: Optional[Union[str, Dict[str, Any]]] = None
|
|
34
|
+
options: Optional[Dict[str, Any]] = None
|
|
35
|
+
keep_alive: Optional[str] = "5m"
|
|
36
|
+
|
|
37
|
+
class ChatMessage(BaseModel):
|
|
38
|
+
role: str
|
|
39
|
+
content: Union[str, List[Dict[str, Any]]]
|
|
40
|
+
images: Optional[List[str]] = None
|
|
41
|
+
tool_calls: Optional[List[Dict[str, Any]]] = None
|
|
42
|
+
|
|
43
|
+
class ChatRequest(BaseModel):
|
|
44
|
+
model: str
|
|
45
|
+
messages: List[ChatMessage]
|
|
46
|
+
stream: bool = True
|
|
47
|
+
tools: Optional[List[Dict[str, Any]]] = None
|
|
48
|
+
format: Optional[Union[str, Dict[str, Any]]] = None
|
|
49
|
+
options: Optional[Dict[str, Any]] = None
|
|
50
|
+
keep_alive: Optional[str] = "5m"
|
|
51
|
+
|
|
52
|
+
class EmbeddingRequest(BaseModel):
|
|
53
|
+
model: str
|
|
54
|
+
input: Union[str, List[str]]
|
|
55
|
+
truncate: bool = True
|
|
56
|
+
options: Optional[Dict[str, Any]] = None
|
|
57
|
+
keep_alive: Optional[str] = "5m"
|
|
58
|
+
|
|
59
|
+
class PullModelRequest(BaseModel):
|
|
60
|
+
model: str
|
|
61
|
+
insecure: bool = False
|
|
62
|
+
stream: bool = True
|
|
63
|
+
|
|
64
|
+
class DeleteModelRequest(BaseModel):
|
|
65
|
+
model: str
|
|
66
|
+
|
|
67
|
+
class ModelResponse(BaseModel):
|
|
68
|
+
name: str
|
|
69
|
+
modified_at: str
|
|
70
|
+
size: int
|
|
71
|
+
details: Optional[Dict[str, Any]] = None
|
|
72
|
+
|
|
73
|
+
class ModelsResponse(BaseModel):
|
|
74
|
+
models: List[ModelResponse]
|
|
75
|
+
|
|
76
|
+
# API Router
|
|
77
|
+
router = APIRouter()
|
|
78
|
+
|
|
79
|
+
# Dependency to get model manager
|
|
80
|
+
def get_model_manager(config: Config = Depends(lambda: Config.from_env())):
|
|
81
|
+
return ModelManager(config)
|
|
82
|
+
|
|
83
|
+
@router.post("/api/generate")
|
|
84
|
+
async def generate(
|
|
85
|
+
request: GenerateRequest,
|
|
86
|
+
background_tasks: BackgroundTasks,
|
|
87
|
+
model_manager: ModelManager = Depends(get_model_manager),
|
|
88
|
+
):
|
|
89
|
+
"""Generate a completion for a given prompt"""
|
|
90
|
+
try:
|
|
91
|
+
# Parse keep_alive
|
|
92
|
+
keep_alive_seconds = 300 # Default 5 minutes
|
|
93
|
+
if request.keep_alive:
|
|
94
|
+
if request.keep_alive.endswith("ms"):
|
|
95
|
+
keep_alive_seconds = int(request.keep_alive[:-2]) / 1000
|
|
96
|
+
elif request.keep_alive.endswith("s"):
|
|
97
|
+
keep_alive_seconds = int(request.keep_alive[:-1])
|
|
98
|
+
elif request.keep_alive.endswith("m"):
|
|
99
|
+
keep_alive_seconds = int(request.keep_alive[:-1]) * 60
|
|
100
|
+
elif request.keep_alive.endswith("h"):
|
|
101
|
+
keep_alive_seconds = int(request.keep_alive[:-1]) * 3600
|
|
102
|
+
elif request.keep_alive == "0":
|
|
103
|
+
# Special case: unload immediately after completion
|
|
104
|
+
keep_alive_seconds = 0
|
|
105
|
+
else:
|
|
106
|
+
try:
|
|
107
|
+
keep_alive_seconds = int(request.keep_alive)
|
|
108
|
+
except ValueError:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
# If prompt is empty, just load the model and return
|
|
112
|
+
if not request.prompt:
|
|
113
|
+
model = model_manager.load_model(
|
|
114
|
+
request.model,
|
|
115
|
+
**(request.options or {})
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Schedule unloading if keep_alive is 0
|
|
119
|
+
if keep_alive_seconds == 0:
|
|
120
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
"model": request.model,
|
|
124
|
+
"created_at": datetime.now().isoformat(),
|
|
125
|
+
"response": "",
|
|
126
|
+
"done": True,
|
|
127
|
+
"done_reason": "load" if keep_alive_seconds > 0 else "unload"
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# Load the model
|
|
131
|
+
model = model_manager.load_model(
|
|
132
|
+
request.model,
|
|
133
|
+
**(request.options or {})
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Process images if provided
|
|
137
|
+
image_data = None
|
|
138
|
+
if request.images and len(request.images) > 0:
|
|
139
|
+
# For now, we only support the first image
|
|
140
|
+
image_base64 = request.images[0]
|
|
141
|
+
if image_base64.startswith("data:"):
|
|
142
|
+
# Handle data URI
|
|
143
|
+
image_base64 = image_base64.split(",", 1)[1]
|
|
144
|
+
image_data = base64.b64decode(image_base64)
|
|
145
|
+
|
|
146
|
+
# Prepare generation parameters
|
|
147
|
+
generation_params = {
|
|
148
|
+
"prompt": request.prompt,
|
|
149
|
+
"suffix": request.suffix,
|
|
150
|
+
"max_tokens": request.options.get("num_predict", 128) if request.options else 128,
|
|
151
|
+
"temperature": request.options.get("temperature", 0.8) if request.options else 0.8,
|
|
152
|
+
"top_p": request.options.get("top_p", 0.95) if request.options else 0.95,
|
|
153
|
+
"echo": False,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Add system prompt if provided
|
|
157
|
+
if request.system:
|
|
158
|
+
generation_params["system_prompt"] = request.system
|
|
159
|
+
|
|
160
|
+
# Add format if provided
|
|
161
|
+
if request.format:
|
|
162
|
+
generation_params["response_format"] = request.format
|
|
163
|
+
|
|
164
|
+
# Add images if provided
|
|
165
|
+
if image_data:
|
|
166
|
+
generation_params["image_data"] = image_data
|
|
167
|
+
|
|
168
|
+
# Add context if provided
|
|
169
|
+
if request.context:
|
|
170
|
+
generation_params["context"] = request.context
|
|
171
|
+
|
|
172
|
+
# Stream the response
|
|
173
|
+
if request.stream:
|
|
174
|
+
async def generate_stream():
|
|
175
|
+
start_time = time.time()
|
|
176
|
+
load_time = 0 # We don't track this separately
|
|
177
|
+
|
|
178
|
+
# Start generation
|
|
179
|
+
completion_id = str(uuid4())
|
|
180
|
+
|
|
181
|
+
# Initial response
|
|
182
|
+
yield json.dumps({
|
|
183
|
+
"model": request.model,
|
|
184
|
+
"created_at": datetime.now().isoformat(),
|
|
185
|
+
"response": "",
|
|
186
|
+
"done": False
|
|
187
|
+
}) + "\n"
|
|
188
|
+
|
|
189
|
+
# Generate completion
|
|
190
|
+
completion = model.create_completion(**generation_params)
|
|
191
|
+
|
|
192
|
+
# Final response with stats
|
|
193
|
+
end_time = time.time()
|
|
194
|
+
total_duration = int((end_time - start_time) * 1e9) # Convert to nanoseconds
|
|
195
|
+
|
|
196
|
+
yield json.dumps({
|
|
197
|
+
"model": request.model,
|
|
198
|
+
"created_at": datetime.now().isoformat(),
|
|
199
|
+
"response": completion["choices"][0]["text"],
|
|
200
|
+
"done": True,
|
|
201
|
+
"context": completion.get("context", []),
|
|
202
|
+
"total_duration": total_duration,
|
|
203
|
+
"load_duration": load_time,
|
|
204
|
+
"prompt_eval_count": completion.get("usage", {}).get("prompt_tokens", 0),
|
|
205
|
+
"prompt_eval_duration": 0, # Not tracked
|
|
206
|
+
"eval_count": completion.get("usage", {}).get("completion_tokens", 0),
|
|
207
|
+
"eval_duration": 0 # Not tracked
|
|
208
|
+
}) + "\n"
|
|
209
|
+
|
|
210
|
+
# Schedule unloading if keep_alive is 0
|
|
211
|
+
if keep_alive_seconds == 0:
|
|
212
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
213
|
+
|
|
214
|
+
return StreamingResponse(generate_stream(), media_type="application/json")
|
|
215
|
+
else:
|
|
216
|
+
# Non-streaming response
|
|
217
|
+
start_time = time.time()
|
|
218
|
+
|
|
219
|
+
# Generate completion
|
|
220
|
+
completion = model.create_completion(**generation_params)
|
|
221
|
+
|
|
222
|
+
# Calculate durations
|
|
223
|
+
end_time = time.time()
|
|
224
|
+
total_duration = int((end_time - start_time) * 1e9) # Convert to nanoseconds
|
|
225
|
+
|
|
226
|
+
# Schedule unloading if keep_alive is 0
|
|
227
|
+
if keep_alive_seconds == 0:
|
|
228
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
"model": request.model,
|
|
232
|
+
"created_at": datetime.now().isoformat(),
|
|
233
|
+
"response": completion["choices"][0]["text"],
|
|
234
|
+
"done": True,
|
|
235
|
+
"context": completion.get("context", []),
|
|
236
|
+
"total_duration": total_duration,
|
|
237
|
+
"load_duration": 0, # Not tracked separately
|
|
238
|
+
"prompt_eval_count": completion.get("usage", {}).get("prompt_tokens", 0),
|
|
239
|
+
"prompt_eval_duration": 0, # Not tracked
|
|
240
|
+
"eval_count": completion.get("usage", {}).get("completion_tokens", 0),
|
|
241
|
+
"eval_duration": 0 # Not tracked
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.error(f"Error in generate: {str(e)}")
|
|
246
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
247
|
+
|
|
248
|
+
@router.post("/api/chat")
|
|
249
|
+
async def chat(
|
|
250
|
+
request: ChatRequest,
|
|
251
|
+
background_tasks: BackgroundTasks,
|
|
252
|
+
model_manager: ModelManager = Depends(get_model_manager),
|
|
253
|
+
):
|
|
254
|
+
"""Generate a chat completion"""
|
|
255
|
+
try:
|
|
256
|
+
# Parse keep_alive
|
|
257
|
+
keep_alive_seconds = 300 # Default 5 minutes
|
|
258
|
+
if request.keep_alive:
|
|
259
|
+
if request.keep_alive.endswith("ms"):
|
|
260
|
+
keep_alive_seconds = int(request.keep_alive[:-2]) / 1000
|
|
261
|
+
elif request.keep_alive.endswith("s"):
|
|
262
|
+
keep_alive_seconds = int(request.keep_alive[:-1])
|
|
263
|
+
elif request.keep_alive.endswith("m"):
|
|
264
|
+
keep_alive_seconds = int(request.keep_alive[:-1]) * 60
|
|
265
|
+
elif request.keep_alive.endswith("h"):
|
|
266
|
+
keep_alive_seconds = int(request.keep_alive[:-1]) * 3600
|
|
267
|
+
elif request.keep_alive == "0":
|
|
268
|
+
# Special case: unload immediately after completion
|
|
269
|
+
keep_alive_seconds = 0
|
|
270
|
+
else:
|
|
271
|
+
try:
|
|
272
|
+
keep_alive_seconds = int(request.keep_alive)
|
|
273
|
+
except ValueError:
|
|
274
|
+
pass
|
|
275
|
+
|
|
276
|
+
# If messages is empty, just load the model and return
|
|
277
|
+
if not request.messages:
|
|
278
|
+
model = model_manager.load_model(
|
|
279
|
+
request.model,
|
|
280
|
+
**(request.options or {})
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Schedule unloading if keep_alive is 0
|
|
284
|
+
if keep_alive_seconds == 0:
|
|
285
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
"model": request.model,
|
|
289
|
+
"created_at": datetime.now().isoformat(),
|
|
290
|
+
"message": {
|
|
291
|
+
"role": "assistant",
|
|
292
|
+
"content": ""
|
|
293
|
+
},
|
|
294
|
+
"done_reason": "load" if keep_alive_seconds > 0 else "unload",
|
|
295
|
+
"done": True
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# Load the model
|
|
299
|
+
model = model_manager.load_model(
|
|
300
|
+
request.model,
|
|
301
|
+
**(request.options or {})
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Convert messages to the format expected by llama-cpp-python
|
|
305
|
+
messages = []
|
|
306
|
+
for msg in request.messages:
|
|
307
|
+
if isinstance(msg.content, str):
|
|
308
|
+
messages.append({
|
|
309
|
+
"role": msg.role,
|
|
310
|
+
"content": msg.content
|
|
311
|
+
})
|
|
312
|
+
else:
|
|
313
|
+
# Handle multimodal content
|
|
314
|
+
messages.append({
|
|
315
|
+
"role": msg.role,
|
|
316
|
+
"content": msg.content
|
|
317
|
+
})
|
|
318
|
+
|
|
319
|
+
# Prepare chat parameters
|
|
320
|
+
chat_params = {
|
|
321
|
+
"messages": messages,
|
|
322
|
+
"temperature": request.options.get("temperature", 0.8) if request.options else 0.8,
|
|
323
|
+
"top_p": request.options.get("top_p", 0.95) if request.options else 0.95,
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
# Add tools if provided
|
|
327
|
+
if request.tools:
|
|
328
|
+
chat_params["tools"] = request.tools
|
|
329
|
+
|
|
330
|
+
# Add format if provided
|
|
331
|
+
if request.format:
|
|
332
|
+
chat_params["response_format"] = request.format
|
|
333
|
+
|
|
334
|
+
# Stream the response
|
|
335
|
+
if request.stream:
|
|
336
|
+
async def generate_stream():
|
|
337
|
+
start_time = time.time()
|
|
338
|
+
|
|
339
|
+
# Start generation
|
|
340
|
+
completion_id = str(uuid4())
|
|
341
|
+
|
|
342
|
+
# Initial response
|
|
343
|
+
yield json.dumps({
|
|
344
|
+
"model": request.model,
|
|
345
|
+
"created_at": datetime.now().isoformat(),
|
|
346
|
+
"message": {
|
|
347
|
+
"role": "assistant",
|
|
348
|
+
"content": ""
|
|
349
|
+
},
|
|
350
|
+
"done": False
|
|
351
|
+
}) + "\n"
|
|
352
|
+
|
|
353
|
+
# Generate chat completion
|
|
354
|
+
completion = model.create_chat_completion(**chat_params)
|
|
355
|
+
|
|
356
|
+
# Final response with stats
|
|
357
|
+
end_time = time.time()
|
|
358
|
+
total_duration = int((end_time - start_time) * 1e9) # Convert to nanoseconds
|
|
359
|
+
|
|
360
|
+
response_message = completion["choices"][0]["message"]
|
|
361
|
+
|
|
362
|
+
yield json.dumps({
|
|
363
|
+
"model": request.model,
|
|
364
|
+
"created_at": datetime.now().isoformat(),
|
|
365
|
+
"message": response_message,
|
|
366
|
+
"done": True,
|
|
367
|
+
"done_reason": "stop",
|
|
368
|
+
"total_duration": total_duration,
|
|
369
|
+
"load_duration": 0, # Not tracked separately
|
|
370
|
+
"prompt_eval_count": completion.get("usage", {}).get("prompt_tokens", 0),
|
|
371
|
+
"prompt_eval_duration": 0, # Not tracked
|
|
372
|
+
"eval_count": completion.get("usage", {}).get("completion_tokens", 0),
|
|
373
|
+
"eval_duration": 0 # Not tracked
|
|
374
|
+
}) + "\n"
|
|
375
|
+
|
|
376
|
+
# Schedule unloading if keep_alive is 0
|
|
377
|
+
if keep_alive_seconds == 0:
|
|
378
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
379
|
+
|
|
380
|
+
return StreamingResponse(generate_stream(), media_type="application/json")
|
|
381
|
+
else:
|
|
382
|
+
# Non-streaming response
|
|
383
|
+
start_time = time.time()
|
|
384
|
+
|
|
385
|
+
# Generate chat completion
|
|
386
|
+
completion = model.create_chat_completion(**chat_params)
|
|
387
|
+
|
|
388
|
+
# Calculate durations
|
|
389
|
+
end_time = time.time()
|
|
390
|
+
total_duration = int((end_time - start_time) * 1e9) # Convert to nanoseconds
|
|
391
|
+
|
|
392
|
+
response_message = completion["choices"][0]["message"]
|
|
393
|
+
|
|
394
|
+
# Schedule unloading if keep_alive is 0
|
|
395
|
+
if keep_alive_seconds == 0:
|
|
396
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
397
|
+
|
|
398
|
+
return {
|
|
399
|
+
"model": request.model,
|
|
400
|
+
"created_at": datetime.now().isoformat(),
|
|
401
|
+
"message": response_message,
|
|
402
|
+
"done": True,
|
|
403
|
+
"done_reason": "stop",
|
|
404
|
+
"total_duration": total_duration,
|
|
405
|
+
"load_duration": 0, # Not tracked separately
|
|
406
|
+
"prompt_eval_count": completion.get("usage", {}).get("prompt_tokens", 0),
|
|
407
|
+
"prompt_eval_duration": 0, # Not tracked
|
|
408
|
+
"eval_count": completion.get("usage", {}).get("completion_tokens", 0),
|
|
409
|
+
"eval_duration": 0 # Not tracked
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
except Exception as e:
|
|
413
|
+
logger.error(f"Error in chat: {str(e)}")
|
|
414
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
415
|
+
|
|
416
|
+
@router.post("/api/embed")
|
|
417
|
+
async def embed(
|
|
418
|
+
request: EmbeddingRequest,
|
|
419
|
+
background_tasks: BackgroundTasks,
|
|
420
|
+
model_manager: ModelManager = Depends(get_model_manager),
|
|
421
|
+
):
|
|
422
|
+
"""Generate embeddings from a model"""
|
|
423
|
+
try:
|
|
424
|
+
# Parse keep_alive
|
|
425
|
+
keep_alive_seconds = 300 # Default 5 minutes
|
|
426
|
+
if request.keep_alive:
|
|
427
|
+
if request.keep_alive.endswith("ms"):
|
|
428
|
+
keep_alive_seconds = int(request.keep_alive[:-2]) / 1000
|
|
429
|
+
elif request.keep_alive.endswith("s"):
|
|
430
|
+
keep_alive_seconds = int(request.keep_alive[:-1])
|
|
431
|
+
elif request.keep_alive.endswith("m"):
|
|
432
|
+
keep_alive_seconds = int(request.keep_alive[:-1]) * 60
|
|
433
|
+
elif request.keep_alive.endswith("h"):
|
|
434
|
+
keep_alive_seconds = int(request.keep_alive[:-1]) * 3600
|
|
435
|
+
elif request.keep_alive == "0":
|
|
436
|
+
# Special case: unload immediately after completion
|
|
437
|
+
keep_alive_seconds = 0
|
|
438
|
+
else:
|
|
439
|
+
try:
|
|
440
|
+
keep_alive_seconds = int(request.keep_alive)
|
|
441
|
+
except ValueError:
|
|
442
|
+
pass
|
|
443
|
+
|
|
444
|
+
# Load the model with embedding=True
|
|
445
|
+
model_options = {**(request.options or {}), "embedding": True}
|
|
446
|
+
model = model_manager.load_model(
|
|
447
|
+
request.model,
|
|
448
|
+
**model_options
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
# Generate embeddings
|
|
452
|
+
start_time = time.time()
|
|
453
|
+
|
|
454
|
+
if isinstance(request.input, str):
|
|
455
|
+
# Single input
|
|
456
|
+
embedding = model.create_embedding(request.input)
|
|
457
|
+
embeddings = [embedding["embedding"]]
|
|
458
|
+
else:
|
|
459
|
+
# Multiple inputs
|
|
460
|
+
embeddings = []
|
|
461
|
+
for text in request.input:
|
|
462
|
+
embedding = model.create_embedding(text)
|
|
463
|
+
embeddings.append(embedding["embedding"])
|
|
464
|
+
|
|
465
|
+
# Calculate durations
|
|
466
|
+
end_time = time.time()
|
|
467
|
+
total_duration = int((end_time - start_time) * 1e9) # Convert to nanoseconds
|
|
468
|
+
|
|
469
|
+
# Schedule unloading if keep_alive is 0
|
|
470
|
+
if keep_alive_seconds == 0:
|
|
471
|
+
background_tasks.add_task(model_manager.unload_model, request.model)
|
|
472
|
+
|
|
473
|
+
return {
|
|
474
|
+
"model": request.model,
|
|
475
|
+
"embeddings": embeddings,
|
|
476
|
+
"total_duration": total_duration,
|
|
477
|
+
"load_duration": 0, # Not tracked separately
|
|
478
|
+
"prompt_eval_count": 0 # Not tracked
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
except Exception as e:
|
|
482
|
+
logger.error(f"Error in embed: {str(e)}")
|
|
483
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
484
|
+
|
|
485
|
+
@router.post("/api/pull")
|
|
486
|
+
async def pull_model(
|
|
487
|
+
request: PullModelRequest,
|
|
488
|
+
model_manager: ModelManager = Depends(get_model_manager),
|
|
489
|
+
):
|
|
490
|
+
"""Pull a model from Hugging Face Hub"""
|
|
491
|
+
try:
|
|
492
|
+
if request.stream:
|
|
493
|
+
async def generate_stream():
|
|
494
|
+
# Initial response
|
|
495
|
+
yield json.dumps({"status": "pulling manifest"}) + "\n"
|
|
496
|
+
|
|
497
|
+
# Pull the model
|
|
498
|
+
result = await model_manager.pull_model(request.model, request.insecure)
|
|
499
|
+
|
|
500
|
+
if result["status"] == "error":
|
|
501
|
+
yield json.dumps({"status": "error", "error": result["message"]}) + "\n"
|
|
502
|
+
return
|
|
503
|
+
|
|
504
|
+
# Success response
|
|
505
|
+
yield json.dumps({"status": "downloading model"}) + "\n"
|
|
506
|
+
yield json.dumps({"status": "verifying sha256 digest"}) + "\n"
|
|
507
|
+
yield json.dumps({"status": "writing manifest"}) + "\n"
|
|
508
|
+
yield json.dumps({"status": "success"}) + "\n"
|
|
509
|
+
|
|
510
|
+
return StreamingResponse(generate_stream(), media_type="application/json")
|
|
511
|
+
else:
|
|
512
|
+
# Non-streaming response
|
|
513
|
+
result = await model_manager.pull_model(request.model, request.insecure)
|
|
514
|
+
|
|
515
|
+
if result["status"] == "error":
|
|
516
|
+
return {"status": "error", "error": result["message"]}
|
|
517
|
+
|
|
518
|
+
return {"status": "success"}
|
|
519
|
+
|
|
520
|
+
except Exception as e:
|
|
521
|
+
logger.error(f"Error in pull_model: {str(e)}")
|
|
522
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
523
|
+
|
|
524
|
+
@router.delete("/api/delete")
|
|
525
|
+
async def delete_model(
|
|
526
|
+
request: DeleteModelRequest,
|
|
527
|
+
model_manager: ModelManager = Depends(get_model_manager),
|
|
528
|
+
):
|
|
529
|
+
"""Delete a model"""
|
|
530
|
+
try:
|
|
531
|
+
success = model_manager.delete_model(request.model)
|
|
532
|
+
|
|
533
|
+
if not success:
|
|
534
|
+
raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found")
|
|
535
|
+
|
|
536
|
+
return {"status": "success"}
|
|
537
|
+
|
|
538
|
+
except HTTPException:
|
|
539
|
+
raise
|
|
540
|
+
except Exception as e:
|
|
541
|
+
logger.error(f"Error in delete_model: {str(e)}")
|
|
542
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
543
|
+
|
|
544
|
+
@router.get("/api/tags")
|
|
545
|
+
async def list_models(
|
|
546
|
+
model_manager: ModelManager = Depends(get_model_manager),
|
|
547
|
+
):
|
|
548
|
+
"""List all available models"""
|
|
549
|
+
try:
|
|
550
|
+
models = model_manager.list_models()
|
|
551
|
+
|
|
552
|
+
# Convert to response format
|
|
553
|
+
response_models = []
|
|
554
|
+
for model in models:
|
|
555
|
+
response_models.append(ModelResponse(
|
|
556
|
+
name=model["name"],
|
|
557
|
+
modified_at=model["modified_at"],
|
|
558
|
+
size=model["size"],
|
|
559
|
+
details={
|
|
560
|
+
"format": "gguf",
|
|
561
|
+
"family": "llama", # Default, could be improved with model metadata
|
|
562
|
+
"parameter_size": "Unknown",
|
|
563
|
+
"quantization_level": "Unknown"
|
|
564
|
+
}
|
|
565
|
+
))
|
|
566
|
+
|
|
567
|
+
return ModelsResponse(models=response_models)
|
|
568
|
+
|
|
569
|
+
except Exception as e:
|
|
570
|
+
logger.error(f"Error in list_models: {str(e)}")
|
|
571
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
572
|
+
|
|
573
|
+
@router.get("/api/version")
|
|
574
|
+
async def version():
|
|
575
|
+
"""Get the version of webscout.Local"""
|
|
576
|
+
return {"version": "0.1.0"}
|