xinference 1.5.1__py3-none-any.whl → 1.6.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (96) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +97 -8
  3. xinference/client/restful/restful_client.py +51 -11
  4. xinference/core/media_interface.py +758 -0
  5. xinference/core/model.py +49 -9
  6. xinference/core/worker.py +31 -37
  7. xinference/deploy/utils.py +0 -3
  8. xinference/model/audio/__init__.py +16 -27
  9. xinference/model/audio/core.py +1 -0
  10. xinference/model/audio/cosyvoice.py +4 -2
  11. xinference/model/audio/model_spec.json +20 -3
  12. xinference/model/audio/model_spec_modelscope.json +18 -1
  13. xinference/model/embedding/__init__.py +16 -24
  14. xinference/model/image/__init__.py +15 -25
  15. xinference/model/llm/__init__.py +37 -110
  16. xinference/model/llm/core.py +15 -6
  17. xinference/model/llm/llama_cpp/core.py +25 -353
  18. xinference/model/llm/llm_family.json +613 -89
  19. xinference/model/llm/llm_family.py +9 -1
  20. xinference/model/llm/llm_family_modelscope.json +540 -90
  21. xinference/model/llm/mlx/core.py +6 -3
  22. xinference/model/llm/reasoning_parser.py +281 -5
  23. xinference/model/llm/sglang/core.py +16 -3
  24. xinference/model/llm/transformers/chatglm.py +2 -2
  25. xinference/model/llm/transformers/cogagent.py +1 -1
  26. xinference/model/llm/transformers/cogvlm2.py +1 -1
  27. xinference/model/llm/transformers/core.py +9 -3
  28. xinference/model/llm/transformers/glm4v.py +1 -1
  29. xinference/model/llm/transformers/minicpmv26.py +1 -1
  30. xinference/model/llm/transformers/qwen-omni.py +6 -0
  31. xinference/model/llm/transformers/qwen_vl.py +1 -1
  32. xinference/model/llm/utils.py +68 -45
  33. xinference/model/llm/vllm/core.py +38 -18
  34. xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -10
  35. xinference/model/rerank/__init__.py +13 -24
  36. xinference/model/video/__init__.py +15 -25
  37. xinference/model/video/core.py +3 -3
  38. xinference/model/video/diffusers.py +133 -16
  39. xinference/model/video/model_spec.json +54 -0
  40. xinference/model/video/model_spec_modelscope.json +56 -0
  41. xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
  42. xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
  43. xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
  44. xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
  45. xinference/thirdparty/cosyvoice/bin/train.py +7 -2
  46. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
  47. xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
  48. xinference/thirdparty/cosyvoice/cli/model.py +140 -155
  49. xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
  50. xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
  51. xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
  52. xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
  53. xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
  54. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
  55. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
  56. xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
  57. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
  58. xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
  59. xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
  60. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
  61. xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
  62. xinference/thirdparty/cosyvoice/utils/common.py +1 -1
  63. xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
  64. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
  65. xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
  66. xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
  67. xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
  68. xinference/types.py +0 -71
  69. xinference/web/ui/build/asset-manifest.json +3 -3
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
  72. xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
  79. xinference/web/ui/src/locales/en.json +6 -4
  80. xinference/web/ui/src/locales/zh.json +6 -4
  81. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/METADATA +59 -39
  82. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/RECORD +87 -87
  83. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/WHEEL +1 -1
  84. xinference/core/image_interface.py +0 -377
  85. xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
  86. xinference/web/ui/build/static/js/main.91e77b5c.js +0 -3
  87. xinference/web/ui/build/static/js/main.91e77b5c.js.map +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
  93. /xinference/web/ui/build/static/js/{main.91e77b5c.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
  94. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/entry_points.txt +0 -0
  95. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/licenses/LICENSE +0 -0
  96. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,12 @@
3
3
  import glob
4
4
  import os
5
5
 
6
- import matplotlib
6
+ # import matplotlib
7
7
  import torch
8
8
  from torch.nn.utils import weight_norm
9
9
 
10
- matplotlib.use("Agg")
11
- import matplotlib.pylab as plt
10
+ # matplotlib.use("Agg")
11
+ # import matplotlib.pylab as plt
12
12
 
13
13
 
14
14
  def plot_spectrogram(spectrogram):
xinference/types.py CHANGED
@@ -247,56 +247,6 @@ class LogitsProcessorList(List[LogitsProcessor]):
247
247
  return scores
248
248
 
249
249
 
250
- class LlamaCppGenerateConfig(TypedDict, total=False):
251
- suffix: Optional[str]
252
- max_tokens: int
253
- temperature: float
254
- top_p: float
255
- logprobs: Optional[int]
256
- echo: bool
257
- stop: Optional[Union[str, List[str]]]
258
- frequency_penalty: float
259
- presence_penalty: float
260
- repetition_penalty: float
261
- top_k: int
262
- stream: bool
263
- stream_options: Optional[Union[dict, None]]
264
- tfs_z: float
265
- mirostat_mode: int
266
- mirostat_tau: float
267
- mirostat_eta: float
268
- model: Optional[str]
269
- grammar: Optional[Any]
270
- stopping_criteria: Optional["StoppingCriteriaList"]
271
- logits_processor: Optional["LogitsProcessorList"]
272
- tools: Optional[List[Dict]]
273
-
274
-
275
- class LlamaCppModelConfig(TypedDict, total=False):
276
- n_ctx: int
277
- n_parts: int
278
- n_gpu_layers: int
279
- split_mode: int
280
- main_gpu: int
281
- seed: int
282
- f16_kv: bool
283
- logits_all: bool
284
- vocab_only: bool
285
- use_mmap: bool
286
- use_mlock: bool
287
- n_threads: Optional[int]
288
- n_parallel: Optional[int]
289
- n_batch: int
290
- last_n_tokens_size: int
291
- lora_base: Optional[str]
292
- lora_path: Optional[str]
293
- low_vram: bool
294
- n_gqa: Optional[int] # (TEMPORARY) must be 8 for llama2 70b
295
- rms_norm_eps: Optional[float] # (TEMPORARY)
296
- verbose: bool
297
- reasoning_content: bool
298
-
299
-
300
250
  class PytorchGenerateConfig(TypedDict, total=False):
301
251
  temperature: float
302
252
  repetition_penalty: float
@@ -415,24 +365,6 @@ class CreateCompletionTorch(BaseModel):
415
365
  chat_template_kwargs: Optional[Union[str, Dict[str, Any]]]
416
366
 
417
367
 
418
- CreateCompletionLlamaCpp: BaseModel
419
- try:
420
- from llama_cpp import Llama
421
-
422
- CreateCompletionLlamaCpp = get_pydantic_model_from_method(
423
- Llama.create_completion,
424
- exclude_fields=["model", "prompt", "grammar", "max_tokens"],
425
- include_fields={
426
- "grammar": (Optional[Any], None),
427
- "max_tokens": (Optional[int], max_tokens_field),
428
- "lora_name": (Optional[str], None),
429
- "stream_options": (Optional[Union[dict, None]], None),
430
- },
431
- )
432
- except ImportError:
433
- CreateCompletionLlamaCpp = create_model("CreateCompletionLlamaCpp")
434
-
435
-
436
368
  # This type is for openai API compatibility
437
369
  CreateCompletionOpenAI: BaseModel
438
370
 
@@ -448,7 +380,6 @@ CreateCompletionOpenAI = fix_forward_ref(CreateCompletionOpenAI)
448
380
  class CreateCompletion(
449
381
  ModelAndPrompt,
450
382
  CreateCompletionTorch,
451
- CreateCompletionLlamaCpp,
452
383
  CreateCompletionOpenAI,
453
384
  ):
454
385
  pass
@@ -460,7 +391,6 @@ class CreateChatModel(BaseModel):
460
391
 
461
392
  # Currently, chat calls generates, so the params share the same one.
462
393
  CreateChatCompletionTorch = CreateCompletionTorch
463
- CreateChatCompletionLlamaCpp: BaseModel = CreateCompletionLlamaCpp
464
394
 
465
395
 
466
396
  from ._compat import CreateChatCompletionOpenAI
@@ -469,7 +399,6 @@ from ._compat import CreateChatCompletionOpenAI
469
399
  class CreateChatCompletion( # type: ignore
470
400
  CreateChatModel,
471
401
  CreateChatCompletionTorch,
472
- CreateChatCompletionLlamaCpp,
473
402
  CreateChatCompletionOpenAI,
474
403
  ):
475
404
  pass
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.337afe76.css",
4
- "main.js": "./static/js/main.91e77b5c.js",
4
+ "main.js": "./static/js/main.ae579a97.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.337afe76.css.map": "./static/css/main.337afe76.css.map",
8
- "main.91e77b5c.js.map": "./static/js/main.91e77b5c.js.map"
8
+ "main.ae579a97.js.map": "./static/js/main.ae579a97.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.337afe76.css",
12
- "static/js/main.91e77b5c.js"
12
+ "static/js/main.ae579a97.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.91e77b5c.js"></script><link href="./static/css/main.337afe76.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.ae579a97.js"></script><link href="./static/css/main.337afe76.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>