xinference 0.10.3__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +1 -1
- xinference/api/restful_api.py +53 -61
- xinference/client/restful/restful_client.py +52 -57
- xinference/conftest.py +1 -1
- xinference/core/cache_tracker.py +1 -1
- xinference/core/chat_interface.py +10 -4
- xinference/core/event.py +1 -1
- xinference/core/model.py +17 -6
- xinference/core/status_guard.py +1 -1
- xinference/core/supervisor.py +58 -72
- xinference/core/worker.py +68 -101
- xinference/deploy/cmdline.py +166 -1
- xinference/deploy/test/test_cmdline.py +2 -0
- xinference/deploy/utils.py +1 -1
- xinference/device_utils.py +29 -3
- xinference/fields.py +7 -1
- xinference/model/audio/whisper.py +88 -12
- xinference/model/core.py +2 -2
- xinference/model/image/__init__.py +29 -0
- xinference/model/image/core.py +6 -0
- xinference/model/image/custom.py +109 -0
- xinference/model/llm/__init__.py +92 -32
- xinference/model/llm/core.py +57 -102
- xinference/model/llm/ggml/chatglm.py +98 -13
- xinference/model/llm/ggml/llamacpp.py +49 -2
- xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +2 -2
- xinference/model/llm/llm_family.json +438 -7
- xinference/model/llm/llm_family.py +45 -41
- xinference/model/llm/llm_family_modelscope.json +258 -5
- xinference/model/llm/pytorch/chatglm.py +48 -0
- xinference/model/llm/pytorch/core.py +23 -6
- xinference/model/llm/pytorch/deepseek_vl.py +115 -33
- xinference/model/llm/pytorch/internlm2.py +32 -1
- xinference/model/llm/pytorch/qwen_vl.py +94 -12
- xinference/model/llm/pytorch/utils.py +38 -1
- xinference/model/llm/pytorch/yi_vl.py +96 -51
- xinference/model/llm/sglang/core.py +31 -9
- xinference/model/llm/utils.py +54 -20
- xinference/model/llm/vllm/core.py +101 -7
- xinference/thirdparty/omnilmm/chat.py +2 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +2 -1
- xinference/types.py +11 -0
- xinference/web/ui/build/asset-manifest.json +6 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.54bca460.css +2 -0
- xinference/web/ui/build/static/css/main.54bca460.css.map +1 -0
- xinference/web/ui/build/static/js/main.551aa479.js +3 -0
- xinference/web/ui/build/static/js/{main.26fdbfbe.js.LICENSE.txt → main.551aa479.js.LICENSE.txt} +7 -0
- xinference/web/ui/build/static/js/main.551aa479.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1fa824d82b2af519de7700c594e50bde4bbca60d13bd3fabff576802e4070304.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/23caf6f1e52c43e983ca3bfd4189f41dbd645fa78f2dfdcd7f6b69bc41678665.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/29dda700ab913cf7f2cfabe450ddabfb283e96adfa3ec9d315b2fa6c63cd375c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2c63e940b945fd5817157e08a42b889b30d668ea4c91332f48ef2b1b9d26f520.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4135fe8745434cbce6438d1ebfa47422e0c77d884db4edc75c8bf32ea1d50621.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/46b6dd1f6d1109cd0e2455a0ea0be3e9bda1097cd4ebec9c4040070372671cfc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de0a71074f9cbe1e7862750dcdd08cbc1bae7d9d9849a78b1783ca670017b3c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/53f6c0c0afb51265cd8fb940daeb65523501879ac2a8c03a1ead22b9793c5041.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8ccbb839002bc5bc03e0a0e7612362bf92f6ae64f87e094f8682d6a6fe4619bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/97ed30d6e22cf76f0733651e2c18364689a01665d0b5fe811c1b7ca3eb713c82.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c0c70f1838913aaa792a0d2260f17f90fd177b95698ed46b7bc3050eb712c1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9cfd33238ca43e5bf9fc7e442690e8cc6027c73553db36de87e3597ed524ee4b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6da6bc3d0d2191adebee87fb58ecebe82d071087bd2f7f3a9c7fdd2ada130f2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ada71518a429f821a9b1dea38bc951447f03c8db509887e0980b893acac938f3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b6c9558d28b5972bb8b2691c5a76a2c8814a815eb3443126da9f49f7d6a0c118.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bb0f721c084a4d85c09201c984f02ee8437d3b6c5c38a57cb4a101f653daef1b.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +33 -0
- xinference/web/ui/node_modules/clipboard/.babelrc.json +11 -0
- xinference/web/ui/node_modules/clipboard/.eslintrc.json +24 -0
- xinference/web/ui/node_modules/clipboard/.prettierrc.json +9 -0
- xinference/web/ui/node_modules/clipboard/bower.json +18 -0
- xinference/web/ui/node_modules/clipboard/composer.json +25 -0
- xinference/web/ui/node_modules/clipboard/package.json +63 -0
- xinference/web/ui/node_modules/delegate/package.json +31 -0
- xinference/web/ui/node_modules/good-listener/bower.json +11 -0
- xinference/web/ui/node_modules/good-listener/package.json +35 -0
- xinference/web/ui/node_modules/select/bower.json +13 -0
- xinference/web/ui/node_modules/select/package.json +29 -0
- xinference/web/ui/node_modules/tiny-emitter/package.json +53 -0
- xinference/web/ui/package-lock.json +34 -0
- xinference/web/ui/package.json +1 -0
- {xinference-0.10.3.dist-info → xinference-0.11.1.dist-info}/METADATA +13 -12
- {xinference-0.10.3.dist-info → xinference-0.11.1.dist-info}/RECORD +88 -67
- xinference/client/oscar/__init__.py +0 -13
- xinference/client/oscar/actor_client.py +0 -611
- xinference/model/llm/pytorch/spec_decoding_utils.py +0 -531
- xinference/model/llm/pytorch/spec_model.py +0 -186
- xinference/web/ui/build/static/js/main.26fdbfbe.js +0 -3
- xinference/web/ui/build/static/js/main.26fdbfbe.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f4d5d1a41892a754c1ee0237450d804b20612d1b657945b59e564161ea47aa7a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fad4cd70de36ef6e6d5f8fd74a10ded58d964a8a91ef7681693fbb8376552da7.json +0 -1
- {xinference-0.10.3.dist-info → xinference-0.11.1.dist-info}/LICENSE +0 -0
- {xinference-0.10.3.dist-info → xinference-0.11.1.dist-info}/WHEEL +0 -0
- {xinference-0.10.3.dist-info → xinference-0.11.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.10.3.dist-info → xinference-0.11.1.dist-info}/top_level.txt +0 -0
|
@@ -37,6 +37,7 @@ from ....types import (
|
|
|
37
37
|
CompletionChoice,
|
|
38
38
|
CompletionChunk,
|
|
39
39
|
CompletionUsage,
|
|
40
|
+
LoRA,
|
|
40
41
|
ToolCallFunction,
|
|
41
42
|
ToolCalls,
|
|
42
43
|
)
|
|
@@ -64,16 +65,19 @@ class VLLMModelConfig(TypedDict, total=False):
|
|
|
64
65
|
|
|
65
66
|
|
|
66
67
|
class VLLMGenerateConfig(TypedDict, total=False):
|
|
68
|
+
lora_name: Optional[str]
|
|
67
69
|
n: int
|
|
68
70
|
best_of: Optional[int]
|
|
69
71
|
presence_penalty: float
|
|
70
72
|
frequency_penalty: float
|
|
71
73
|
temperature: float
|
|
72
74
|
top_p: float
|
|
75
|
+
top_k: int
|
|
73
76
|
max_tokens: int
|
|
74
77
|
stop_token_ids: Optional[List[int]]
|
|
75
78
|
stop: Optional[Union[str, List[str]]]
|
|
76
79
|
stream: bool # non-sampling param, should not be passed to the engine.
|
|
80
|
+
stream_options: Optional[Union[dict, None]]
|
|
77
81
|
|
|
78
82
|
|
|
79
83
|
try:
|
|
@@ -90,6 +94,7 @@ VLLM_SUPPORTED_MODELS = [
|
|
|
90
94
|
"internlm-16k",
|
|
91
95
|
"mistral-v0.1",
|
|
92
96
|
"Yi",
|
|
97
|
+
"Yi-1.5",
|
|
93
98
|
"code-llama",
|
|
94
99
|
"code-llama-python",
|
|
95
100
|
]
|
|
@@ -106,10 +111,12 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
106
111
|
"internlm2-chat",
|
|
107
112
|
"qwen-chat",
|
|
108
113
|
"Yi-chat",
|
|
114
|
+
"Yi-1.5-chat",
|
|
109
115
|
"code-llama-instruct",
|
|
110
116
|
"mistral-instruct-v0.1",
|
|
111
117
|
"mistral-instruct-v0.2",
|
|
112
118
|
"mixtral-instruct-v0.1",
|
|
119
|
+
"mixtral-8x22B-instruct-v0.1",
|
|
113
120
|
"chatglm3",
|
|
114
121
|
"chatglm3-32k",
|
|
115
122
|
"chatglm3-128k",
|
|
@@ -142,16 +149,30 @@ class VLLMModel(LLM):
|
|
|
142
149
|
quantization: str,
|
|
143
150
|
model_path: str,
|
|
144
151
|
model_config: Optional[VLLMModelConfig],
|
|
152
|
+
peft_model: Optional[List[LoRA]] = None,
|
|
145
153
|
):
|
|
154
|
+
try:
|
|
155
|
+
from vllm.lora.request import LoRARequest
|
|
156
|
+
except ImportError:
|
|
157
|
+
error_message = "Failed to import module 'vllm'"
|
|
158
|
+
installation_guide = [
|
|
159
|
+
"Please make sure 'vllm' is installed. ",
|
|
160
|
+
"You can install it by `pip install vllm`\n",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
146
164
|
super().__init__(model_uid, model_family, model_spec, quantization, model_path)
|
|
147
165
|
self._model_config = model_config
|
|
148
166
|
self._engine = None
|
|
167
|
+
self.lora_modules = peft_model
|
|
168
|
+
self.lora_requests: List[LoRARequest] = []
|
|
149
169
|
|
|
150
170
|
def load(self):
|
|
151
171
|
try:
|
|
152
172
|
import vllm
|
|
153
173
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
|
154
174
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|
175
|
+
from vllm.lora.request import LoRARequest
|
|
155
176
|
except ImportError:
|
|
156
177
|
error_message = "Failed to import module 'vllm'"
|
|
157
178
|
installation_guide = [
|
|
@@ -170,11 +191,33 @@ class VLLMModel(LLM):
|
|
|
170
191
|
multiprocessing.set_start_method("fork", force=True)
|
|
171
192
|
|
|
172
193
|
self._model_config = self._sanitize_model_config(self._model_config)
|
|
194
|
+
|
|
195
|
+
if self.lora_modules is None:
|
|
196
|
+
self.lora_requests = []
|
|
197
|
+
else:
|
|
198
|
+
self.lora_requests = [
|
|
199
|
+
LoRARequest(
|
|
200
|
+
lora_name=lora.lora_name,
|
|
201
|
+
lora_int_id=i,
|
|
202
|
+
lora_local_path=lora.local_path,
|
|
203
|
+
)
|
|
204
|
+
for i, lora in enumerate(self.lora_modules, start=1)
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
enable_lora = len(self.lora_requests) > 0
|
|
208
|
+
max_loras = len(self.lora_requests)
|
|
209
|
+
|
|
173
210
|
logger.info(
|
|
174
211
|
f"Loading {self.model_uid} with following model config: {self._model_config}"
|
|
212
|
+
f"Enable lora: {enable_lora}. Lora count: {max_loras}."
|
|
175
213
|
)
|
|
176
214
|
|
|
177
|
-
engine_args = AsyncEngineArgs(
|
|
215
|
+
engine_args = AsyncEngineArgs(
|
|
216
|
+
model=self.model_path,
|
|
217
|
+
enable_lora=enable_lora,
|
|
218
|
+
max_loras=max_loras,
|
|
219
|
+
**self._model_config,
|
|
220
|
+
)
|
|
178
221
|
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
|
|
179
222
|
|
|
180
223
|
def _sanitize_model_config(
|
|
@@ -205,6 +248,7 @@ class VLLMModel(LLM):
|
|
|
205
248
|
generate_config = {}
|
|
206
249
|
|
|
207
250
|
sanitized = VLLMGenerateConfig()
|
|
251
|
+
sanitized.setdefault("lora_name", generate_config.get("lora_name", None))
|
|
208
252
|
sanitized.setdefault("n", generate_config.get("n", 1))
|
|
209
253
|
sanitized.setdefault("best_of", generate_config.get("best_of", None))
|
|
210
254
|
sanitized.setdefault(
|
|
@@ -215,12 +259,16 @@ class VLLMModel(LLM):
|
|
|
215
259
|
)
|
|
216
260
|
sanitized.setdefault("temperature", generate_config.get("temperature", 1.0))
|
|
217
261
|
sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
|
|
262
|
+
sanitized.setdefault("top_k", generate_config.get("top_k", -1))
|
|
218
263
|
sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 1024))
|
|
219
264
|
sanitized.setdefault("stop", generate_config.get("stop", None))
|
|
220
265
|
sanitized.setdefault(
|
|
221
266
|
"stop_token_ids", generate_config.get("stop_token_ids", None)
|
|
222
267
|
)
|
|
223
|
-
sanitized.setdefault("stream", generate_config.get("stream",
|
|
268
|
+
sanitized.setdefault("stream", generate_config.get("stream", False))
|
|
269
|
+
sanitized.setdefault(
|
|
270
|
+
"stream_options", generate_config.get("stream_options", None)
|
|
271
|
+
)
|
|
224
272
|
|
|
225
273
|
return sanitized
|
|
226
274
|
|
|
@@ -239,10 +287,17 @@ class VLLMModel(LLM):
|
|
|
239
287
|
if llm_spec.model_format == "pytorch":
|
|
240
288
|
if quantization != "none" and not (quantization is None):
|
|
241
289
|
return False
|
|
242
|
-
if llm_spec.model_format
|
|
243
|
-
# Currently, only 4-bit weight quantization is supported for
|
|
290
|
+
if llm_spec.model_format == "awq":
|
|
291
|
+
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
|
|
244
292
|
if "4" not in quantization:
|
|
245
293
|
return False
|
|
294
|
+
if llm_spec.model_format == "gptq":
|
|
295
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
|
|
296
|
+
if not any(q in quantization for q in ("3", "4", "8")):
|
|
297
|
+
return False
|
|
298
|
+
else:
|
|
299
|
+
if "4" not in quantization:
|
|
300
|
+
return False
|
|
246
301
|
if isinstance(llm_family, CustomLLMFamilyV1):
|
|
247
302
|
if llm_family.model_family not in VLLM_SUPPORTED_MODELS:
|
|
248
303
|
return False
|
|
@@ -330,16 +385,34 @@ class VLLMModel(LLM):
|
|
|
330
385
|
"Enter generate, prompt: %s, generate config: %s", prompt, generate_config
|
|
331
386
|
)
|
|
332
387
|
|
|
388
|
+
lora_model = sanitized_generate_config.pop("lora_name")
|
|
389
|
+
|
|
390
|
+
lora_request = None
|
|
391
|
+
if lora_model is not None:
|
|
392
|
+
for lora in self.lora_requests:
|
|
393
|
+
if lora_model == lora.lora_name:
|
|
394
|
+
lora_request = lora
|
|
395
|
+
break
|
|
396
|
+
|
|
333
397
|
stream = sanitized_generate_config.pop("stream")
|
|
398
|
+
stream_options = sanitized_generate_config.pop("stream_options", None)
|
|
399
|
+
include_usage = (
|
|
400
|
+
stream_options["include_usage"]
|
|
401
|
+
if isinstance(stream_options, dict)
|
|
402
|
+
else False
|
|
403
|
+
)
|
|
334
404
|
sampling_params = SamplingParams(**sanitized_generate_config)
|
|
335
405
|
request_id = str(uuid.uuid1())
|
|
336
406
|
|
|
337
407
|
assert self._engine is not None
|
|
338
|
-
results_generator = self._engine.generate(
|
|
408
|
+
results_generator = self._engine.generate(
|
|
409
|
+
prompt, sampling_params, request_id, lora_request=lora_request
|
|
410
|
+
)
|
|
339
411
|
|
|
340
412
|
async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
|
|
341
413
|
previous_texts = [""] * sanitized_generate_config["n"]
|
|
342
414
|
tools_token_filter = ChatModelMixin._tools_token_filter(self.model_family)
|
|
415
|
+
prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
|
|
343
416
|
async for _request_output in results_generator:
|
|
344
417
|
chunk = self._convert_request_output_to_completion_chunk(
|
|
345
418
|
request_id=request_id,
|
|
@@ -390,6 +463,20 @@ class VLLMModel(LLM):
|
|
|
390
463
|
total_tokens=total_tokens,
|
|
391
464
|
)
|
|
392
465
|
yield chunk
|
|
466
|
+
if include_usage:
|
|
467
|
+
chunk = CompletionChunk(
|
|
468
|
+
id=request_id,
|
|
469
|
+
object="text_completion",
|
|
470
|
+
created=int(time.time()),
|
|
471
|
+
model=self.model_uid,
|
|
472
|
+
choices=[],
|
|
473
|
+
)
|
|
474
|
+
chunk["usage"] = CompletionUsage(
|
|
475
|
+
prompt_tokens=prompt_tokens,
|
|
476
|
+
completion_tokens=completion_tokens,
|
|
477
|
+
total_tokens=total_tokens,
|
|
478
|
+
)
|
|
479
|
+
yield chunk
|
|
393
480
|
|
|
394
481
|
if stream:
|
|
395
482
|
return stream_results()
|
|
@@ -416,10 +503,17 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
416
503
|
if llm_spec.model_format == "pytorch":
|
|
417
504
|
if quantization != "none" and not (quantization is None):
|
|
418
505
|
return False
|
|
419
|
-
if llm_spec.model_format
|
|
420
|
-
# Currently, only 4-bit weight quantization is supported for
|
|
506
|
+
if llm_spec.model_format == "awq":
|
|
507
|
+
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
|
|
421
508
|
if "4" not in quantization:
|
|
422
509
|
return False
|
|
510
|
+
if llm_spec.model_format == "gptq":
|
|
511
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
|
|
512
|
+
if not any(q in quantization for q in ("3", "4", "8")):
|
|
513
|
+
return False
|
|
514
|
+
else:
|
|
515
|
+
if "4" not in quantization:
|
|
516
|
+
return False
|
|
423
517
|
if isinstance(llm_family, CustomLLMFamilyV1):
|
|
424
518
|
if llm_family.model_family not in VLLM_SUPPORTED_CHAT_MODELS:
|
|
425
519
|
return False
|
|
@@ -4,7 +4,6 @@ import json
|
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
|
-
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
|
8
7
|
from PIL import Image
|
|
9
8
|
from transformers import AutoModel, AutoTokenizer
|
|
10
9
|
|
|
@@ -20,6 +19,8 @@ DEFAULT_IM_END_TOKEN = "<im_end>"
|
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
def init_omni_lmm(model_path, device_map):
|
|
22
|
+
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
|
23
|
+
|
|
23
24
|
torch.backends.cuda.matmul.allow_tf32 = True
|
|
24
25
|
disable_torch_init()
|
|
25
26
|
model_name = os.path.expanduser(model_path)
|
|
@@ -2,7 +2,6 @@ import gc
|
|
|
2
2
|
import math
|
|
3
3
|
from typing import List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
|
-
import timm
|
|
6
5
|
import torch
|
|
7
6
|
import torch.nn as nn
|
|
8
7
|
from torch import Tensor
|
|
@@ -37,6 +36,8 @@ class Identity(torch.nn.Identity):
|
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
def create_vision_module(config):
|
|
39
|
+
import timm
|
|
40
|
+
|
|
40
41
|
vision_tower = timm.create_model(
|
|
41
42
|
"eva02_enormous_patch14_clip_224.laion2b_plus",
|
|
42
43
|
pretrained=False,
|
xinference/types.py
CHANGED
|
@@ -33,6 +33,7 @@ from .fields import (
|
|
|
33
33
|
stop_field,
|
|
34
34
|
stream_field,
|
|
35
35
|
stream_interval_field,
|
|
36
|
+
stream_option_field,
|
|
36
37
|
temperature_field,
|
|
37
38
|
top_k_field,
|
|
38
39
|
top_p_field,
|
|
@@ -186,6 +187,8 @@ class ChatglmCppGenerateConfig(TypedDict, total=False):
|
|
|
186
187
|
top_p: float
|
|
187
188
|
temperature: float
|
|
188
189
|
stream: bool
|
|
190
|
+
lora_name: Optional[str]
|
|
191
|
+
stream_options: Optional[Union[dict, None]]
|
|
189
192
|
|
|
190
193
|
|
|
191
194
|
class QWenCppModelConfig(TypedDict, total=False):
|
|
@@ -230,6 +233,7 @@ class LlamaCppGenerateConfig(TypedDict, total=False):
|
|
|
230
233
|
repetition_penalty: float
|
|
231
234
|
top_k: int
|
|
232
235
|
stream: bool
|
|
236
|
+
stream_options: Optional[Union[dict, None]]
|
|
233
237
|
tfs_z: float
|
|
234
238
|
mirostat_mode: int
|
|
235
239
|
mirostat_tau: float
|
|
@@ -278,6 +282,8 @@ class PytorchGenerateConfig(TypedDict, total=False):
|
|
|
278
282
|
stream_interval: int
|
|
279
283
|
model: Optional[str]
|
|
280
284
|
tools: Optional[List[Dict]]
|
|
285
|
+
lora_name: Optional[str]
|
|
286
|
+
stream_options: Optional[Union[dict, None]]
|
|
281
287
|
|
|
282
288
|
|
|
283
289
|
class PytorchModelConfig(TypedDict, total=False):
|
|
@@ -349,10 +355,12 @@ class CreateCompletionTorch(BaseModel):
|
|
|
349
355
|
stop: Optional[Union[str, List[str]]] = stop_field
|
|
350
356
|
stop_token_ids: Optional[Union[int, List[int]]] = none_field
|
|
351
357
|
stream: bool = stream_field
|
|
358
|
+
stream_options: Optional[Union[dict, None]] = stream_option_field
|
|
352
359
|
stream_interval: int = stream_interval_field
|
|
353
360
|
temperature: float = temperature_field
|
|
354
361
|
top_p: float = top_p_field
|
|
355
362
|
top_k: int = top_k_field
|
|
363
|
+
lora_name: Optional[str]
|
|
356
364
|
|
|
357
365
|
|
|
358
366
|
CreateCompletionLlamaCpp: BaseModel
|
|
@@ -365,6 +373,8 @@ try:
|
|
|
365
373
|
include_fields={
|
|
366
374
|
"grammar": (Optional[Any], None),
|
|
367
375
|
"max_tokens": (Optional[int], max_tokens_field),
|
|
376
|
+
"lora_name": (Optional[str], None),
|
|
377
|
+
"stream_options": (Optional[Union[dict, None]], None),
|
|
368
378
|
},
|
|
369
379
|
)
|
|
370
380
|
except ImportError:
|
|
@@ -392,6 +402,7 @@ class _CreateCompletionOpenAIFallback(BaseModel):
|
|
|
392
402
|
seed: Optional[int] = none_field
|
|
393
403
|
stop: Optional[Union[str, List[str]]] = stop_field
|
|
394
404
|
stream: bool = stream_field
|
|
405
|
+
stream_options: Optional[Union[dict, None]] = stream_option_field
|
|
395
406
|
suffix: Optional[str] = none_field
|
|
396
407
|
temperature: float = temperature_field
|
|
397
408
|
top_p: float = top_p_field
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.
|
|
3
|
+
"main.css": "./static/css/main.54bca460.css",
|
|
4
|
+
"main.js": "./static/js/main.551aa479.js",
|
|
4
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
5
6
|
"index.html": "./index.html",
|
|
6
|
-
"main.
|
|
7
|
+
"main.54bca460.css.map": "./static/css/main.54bca460.css.map",
|
|
8
|
+
"main.551aa479.js.map": "./static/js/main.551aa479.js.map"
|
|
7
9
|
},
|
|
8
10
|
"entrypoints": [
|
|
9
|
-
"static/
|
|
11
|
+
"static/css/main.54bca460.css",
|
|
12
|
+
"static/js/main.551aa479.js"
|
|
10
13
|
]
|
|
11
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.551aa479.js"></script><link href="./static/css/main.54bca460.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
.formBox{max-height:80vh;max-width:50vw;min-width:50vw;overflow:auto;padding:40px 20px 0 0;position:relative;transition:all .4s ease-in-out}.broaden{max-width:100%;min-width:100%;padding-right:0}.show-json{align-items:center;color:#444;display:flex;position:fixed;right:60px;top:90px}.icon{cursor:pointer;margin-left:20px;position:absolute;right:-40px}.icon:hover{color:#1976d2}.arrow{font-size:24px!important}.jsonBox{min-height:80vh;position:relative;transition:all .4s ease-in-out;width:100%}.hide{overflow:hidden;-webkit-transform:translate(30vw);transform:translate(30vw);width:0}.jsonBox-header{font-weight:700;line-height:40px}.textarea{border:1px solid #ddd;border-radius:5px;color:#444;height:calc(100% - 40px);padding:5px 10px;resize:none;width:100%}.copyIcon{color:#555;cursor:pointer;font-size:16px!important;position:absolute;right:5px;top:13px}.copyIcon:hover{color:#1976d2}.addBtn{margin-left:20px!important}.item{background-color:#eee;border-radius:10px;margin:10px 50px 0;overflow:hidden;padding:20px;position:relative}.item:hover .deleteBtn{-webkit-transform:translateX(-50px);transform:translateX(-50px)}.deleteBtn{background-color:#1976d2;border-radius:25px;height:50px;line-height:70px;position:absolute;right:20px;text-align:center;top:calc(50% - 25px);-webkit-transform:translateX(80px);transform:translateX(80px);transition:all .3s ease-in-out;width:50px}.deleteBtn:hover{box-shadow:0 0 10px #aaa;cursor:pointer}.deleteIcon{color:#fff;font-size:28px!important}
|
|
2
|
+
/*# sourceMappingURL=main.54bca460.css.map*/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"static/css/main.54bca460.css","mappings":"AAAA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CAInB,UAAW,CALX,YAAa,CAEb,cAAe,CAEf,UAAW,CADX,QAGF,CAEA,MAGE,cAAe,CACf,gBAAiB,CAHjB,iBAAkB,CAClB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,gBAEE,eAAgB,CADhB,gBAEF,CAEA,UAIE,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAOF,CAEA,UAME,UAAW,CALX,cAAe,CAIf,wBAA0B,CAH1B,iBAAkB,CAElB,SAAU,CADV,QAIF,CAEA,gBACE,aACF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF","sources":["scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: fixed;\n top: 90px;\n right: 60px;\n color: #444;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.jsonBox-header {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #444;\n}\n\n.copyIcon {\n cursor: pointer;\n position: absolute;\n top: 13px;\n right: 5px;\n font-size: 16px !important;\n color: #555;\n}\n\n.copyIcon:hover {\n color: #1976d2;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n background-color: #eee;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n"],"names":[],"sourceRoot":""}
|