xinference 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +97 -8
- xinference/client/restful/restful_client.py +51 -11
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/worker.py +31 -37
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +1 -0
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +20 -3
- xinference/model/audio/model_spec_modelscope.json +18 -1
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +37 -110
- xinference/model/llm/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +25 -353
- xinference/model/llm/llm_family.json +613 -89
- xinference/model/llm/llm_family.py +9 -1
- xinference/model/llm/llm_family_modelscope.json +540 -90
- xinference/model/llm/mlx/core.py +6 -3
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +16 -3
- xinference/model/llm/transformers/chatglm.py +2 -2
- xinference/model/llm/transformers/cogagent.py +1 -1
- xinference/model/llm/transformers/cogvlm2.py +1 -1
- xinference/model/llm/transformers/core.py +9 -3
- xinference/model/llm/transformers/glm4v.py +1 -1
- xinference/model/llm/transformers/minicpmv26.py +1 -1
- xinference/model/llm/transformers/qwen-omni.py +6 -0
- xinference/model/llm/transformers/qwen_vl.py +1 -1
- xinference/model/llm/utils.py +68 -45
- xinference/model/llm/vllm/core.py +38 -18
- xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -10
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +133 -16
- xinference/model/video/model_spec.json +54 -0
- xinference/model/video/model_spec_modelscope.json +56 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +0 -71
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/src/locales/en.json +6 -4
- xinference/web/ui/src/locales/zh.json +6 -4
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/RECORD +87 -87
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/js/main.91e77b5c.js +0 -3
- xinference/web/ui/build/static/js/main.91e77b5c.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- /xinference/web/ui/build/static/js/{main.91e77b5c.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
import glob
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
|
-
import matplotlib
|
|
6
|
+
# import matplotlib
|
|
7
7
|
import torch
|
|
8
8
|
from torch.nn.utils import weight_norm
|
|
9
9
|
|
|
10
|
-
matplotlib.use("Agg")
|
|
11
|
-
import matplotlib.pylab as plt
|
|
10
|
+
# matplotlib.use("Agg")
|
|
11
|
+
# import matplotlib.pylab as plt
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def plot_spectrogram(spectrogram):
|
xinference/types.py
CHANGED
|
@@ -247,56 +247,6 @@ class LogitsProcessorList(List[LogitsProcessor]):
|
|
|
247
247
|
return scores
|
|
248
248
|
|
|
249
249
|
|
|
250
|
-
class LlamaCppGenerateConfig(TypedDict, total=False):
|
|
251
|
-
suffix: Optional[str]
|
|
252
|
-
max_tokens: int
|
|
253
|
-
temperature: float
|
|
254
|
-
top_p: float
|
|
255
|
-
logprobs: Optional[int]
|
|
256
|
-
echo: bool
|
|
257
|
-
stop: Optional[Union[str, List[str]]]
|
|
258
|
-
frequency_penalty: float
|
|
259
|
-
presence_penalty: float
|
|
260
|
-
repetition_penalty: float
|
|
261
|
-
top_k: int
|
|
262
|
-
stream: bool
|
|
263
|
-
stream_options: Optional[Union[dict, None]]
|
|
264
|
-
tfs_z: float
|
|
265
|
-
mirostat_mode: int
|
|
266
|
-
mirostat_tau: float
|
|
267
|
-
mirostat_eta: float
|
|
268
|
-
model: Optional[str]
|
|
269
|
-
grammar: Optional[Any]
|
|
270
|
-
stopping_criteria: Optional["StoppingCriteriaList"]
|
|
271
|
-
logits_processor: Optional["LogitsProcessorList"]
|
|
272
|
-
tools: Optional[List[Dict]]
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
class LlamaCppModelConfig(TypedDict, total=False):
|
|
276
|
-
n_ctx: int
|
|
277
|
-
n_parts: int
|
|
278
|
-
n_gpu_layers: int
|
|
279
|
-
split_mode: int
|
|
280
|
-
main_gpu: int
|
|
281
|
-
seed: int
|
|
282
|
-
f16_kv: bool
|
|
283
|
-
logits_all: bool
|
|
284
|
-
vocab_only: bool
|
|
285
|
-
use_mmap: bool
|
|
286
|
-
use_mlock: bool
|
|
287
|
-
n_threads: Optional[int]
|
|
288
|
-
n_parallel: Optional[int]
|
|
289
|
-
n_batch: int
|
|
290
|
-
last_n_tokens_size: int
|
|
291
|
-
lora_base: Optional[str]
|
|
292
|
-
lora_path: Optional[str]
|
|
293
|
-
low_vram: bool
|
|
294
|
-
n_gqa: Optional[int] # (TEMPORARY) must be 8 for llama2 70b
|
|
295
|
-
rms_norm_eps: Optional[float] # (TEMPORARY)
|
|
296
|
-
verbose: bool
|
|
297
|
-
reasoning_content: bool
|
|
298
|
-
|
|
299
|
-
|
|
300
250
|
class PytorchGenerateConfig(TypedDict, total=False):
|
|
301
251
|
temperature: float
|
|
302
252
|
repetition_penalty: float
|
|
@@ -415,24 +365,6 @@ class CreateCompletionTorch(BaseModel):
|
|
|
415
365
|
chat_template_kwargs: Optional[Union[str, Dict[str, Any]]]
|
|
416
366
|
|
|
417
367
|
|
|
418
|
-
CreateCompletionLlamaCpp: BaseModel
|
|
419
|
-
try:
|
|
420
|
-
from llama_cpp import Llama
|
|
421
|
-
|
|
422
|
-
CreateCompletionLlamaCpp = get_pydantic_model_from_method(
|
|
423
|
-
Llama.create_completion,
|
|
424
|
-
exclude_fields=["model", "prompt", "grammar", "max_tokens"],
|
|
425
|
-
include_fields={
|
|
426
|
-
"grammar": (Optional[Any], None),
|
|
427
|
-
"max_tokens": (Optional[int], max_tokens_field),
|
|
428
|
-
"lora_name": (Optional[str], None),
|
|
429
|
-
"stream_options": (Optional[Union[dict, None]], None),
|
|
430
|
-
},
|
|
431
|
-
)
|
|
432
|
-
except ImportError:
|
|
433
|
-
CreateCompletionLlamaCpp = create_model("CreateCompletionLlamaCpp")
|
|
434
|
-
|
|
435
|
-
|
|
436
368
|
# This type is for openai API compatibility
|
|
437
369
|
CreateCompletionOpenAI: BaseModel
|
|
438
370
|
|
|
@@ -448,7 +380,6 @@ CreateCompletionOpenAI = fix_forward_ref(CreateCompletionOpenAI)
|
|
|
448
380
|
class CreateCompletion(
|
|
449
381
|
ModelAndPrompt,
|
|
450
382
|
CreateCompletionTorch,
|
|
451
|
-
CreateCompletionLlamaCpp,
|
|
452
383
|
CreateCompletionOpenAI,
|
|
453
384
|
):
|
|
454
385
|
pass
|
|
@@ -460,7 +391,6 @@ class CreateChatModel(BaseModel):
|
|
|
460
391
|
|
|
461
392
|
# Currently, chat calls generates, so the params share the same one.
|
|
462
393
|
CreateChatCompletionTorch = CreateCompletionTorch
|
|
463
|
-
CreateChatCompletionLlamaCpp: BaseModel = CreateCompletionLlamaCpp
|
|
464
394
|
|
|
465
395
|
|
|
466
396
|
from ._compat import CreateChatCompletionOpenAI
|
|
@@ -469,7 +399,6 @@ from ._compat import CreateChatCompletionOpenAI
|
|
|
469
399
|
class CreateChatCompletion( # type: ignore
|
|
470
400
|
CreateChatModel,
|
|
471
401
|
CreateChatCompletionTorch,
|
|
472
|
-
CreateChatCompletionLlamaCpp,
|
|
473
402
|
CreateChatCompletionOpenAI,
|
|
474
403
|
):
|
|
475
404
|
pass
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
3
|
"main.css": "./static/css/main.337afe76.css",
|
|
4
|
-
"main.js": "./static/js/main.
|
|
4
|
+
"main.js": "./static/js/main.ae579a97.js",
|
|
5
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
6
6
|
"index.html": "./index.html",
|
|
7
7
|
"main.337afe76.css.map": "./static/css/main.337afe76.css.map",
|
|
8
|
-
"main.
|
|
8
|
+
"main.ae579a97.js.map": "./static/js/main.ae579a97.js.map"
|
|
9
9
|
},
|
|
10
10
|
"entrypoints": [
|
|
11
11
|
"static/css/main.337afe76.css",
|
|
12
|
-
"static/js/main.
|
|
12
|
+
"static/js/main.ae579a97.js"
|
|
13
13
|
]
|
|
14
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.ae579a97.js"></script><link href="./static/css/main.337afe76.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|