xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +107 -11
  3. xinference/client/restful/restful_client.py +51 -11
  4. xinference/constants.py +5 -1
  5. xinference/core/media_interface.py +758 -0
  6. xinference/core/model.py +49 -9
  7. xinference/core/supervisor.py +1 -1
  8. xinference/core/utils.py +1 -1
  9. xinference/core/worker.py +33 -39
  10. xinference/deploy/cmdline.py +17 -0
  11. xinference/deploy/utils.py +0 -3
  12. xinference/model/audio/__init__.py +16 -27
  13. xinference/model/audio/core.py +2 -1
  14. xinference/model/audio/cosyvoice.py +4 -2
  15. xinference/model/audio/model_spec.json +63 -46
  16. xinference/model/audio/model_spec_modelscope.json +31 -14
  17. xinference/model/embedding/__init__.py +16 -24
  18. xinference/model/image/__init__.py +15 -25
  19. xinference/model/llm/__init__.py +40 -115
  20. xinference/model/llm/core.py +29 -6
  21. xinference/model/llm/llama_cpp/core.py +30 -347
  22. xinference/model/llm/llm_family.json +1674 -2203
  23. xinference/model/llm/llm_family.py +71 -7
  24. xinference/model/llm/llm_family_csghub.json +0 -32
  25. xinference/model/llm/llm_family_modelscope.json +1838 -2016
  26. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  27. xinference/model/llm/lmdeploy/core.py +7 -2
  28. xinference/model/llm/mlx/core.py +23 -7
  29. xinference/model/llm/reasoning_parser.py +281 -5
  30. xinference/model/llm/sglang/core.py +39 -11
  31. xinference/model/llm/transformers/chatglm.py +9 -2
  32. xinference/model/llm/transformers/cogagent.py +10 -12
  33. xinference/model/llm/transformers/cogvlm2.py +6 -3
  34. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  35. xinference/model/llm/transformers/core.py +58 -60
  36. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  37. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  38. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  39. xinference/model/llm/transformers/gemma3.py +4 -5
  40. xinference/model/llm/transformers/glm4v.py +3 -21
  41. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  42. xinference/model/llm/transformers/intern_vl.py +3 -6
  43. xinference/model/llm/transformers/internlm2.py +1 -1
  44. xinference/model/llm/transformers/minicpmv25.py +4 -2
  45. xinference/model/llm/transformers/minicpmv26.py +5 -3
  46. xinference/model/llm/transformers/omnilmm.py +1 -1
  47. xinference/model/llm/transformers/opt.py +1 -1
  48. xinference/model/llm/transformers/ovis2.py +302 -0
  49. xinference/model/llm/transformers/qwen-omni.py +8 -1
  50. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  51. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  52. xinference/model/llm/transformers/qwen_vl.py +5 -2
  53. xinference/model/llm/utils.py +96 -45
  54. xinference/model/llm/vllm/core.py +108 -24
  55. xinference/model/llm/vllm/distributed_executor.py +8 -7
  56. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  57. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  58. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  59. xinference/model/llm/vllm/xavier/executor.py +1 -1
  60. xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
  61. xinference/model/rerank/__init__.py +13 -24
  62. xinference/model/video/__init__.py +15 -25
  63. xinference/model/video/core.py +3 -3
  64. xinference/model/video/diffusers.py +157 -13
  65. xinference/model/video/model_spec.json +100 -0
  66. xinference/model/video/model_spec_modelscope.json +104 -0
  67. xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
  68. xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
  69. xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
  70. xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
  71. xinference/thirdparty/cosyvoice/bin/train.py +7 -2
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
  74. xinference/thirdparty/cosyvoice/cli/model.py +140 -155
  75. xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
  76. xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
  77. xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
  78. xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
  79. xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
  80. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
  81. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
  84. xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
  85. xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
  86. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
  87. xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
  88. xinference/thirdparty/cosyvoice/utils/common.py +1 -1
  89. xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
  90. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
  91. xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
  92. xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
  93. xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
  94. xinference/types.py +2 -71
  95. xinference/web/ui/build/asset-manifest.json +6 -6
  96. xinference/web/ui/build/index.html +1 -1
  97. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  98. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  99. xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
  100. xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
  102. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  112. xinference/web/ui/src/locales/en.json +7 -4
  113. xinference/web/ui/src/locales/zh.json +7 -4
  114. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
  115. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
  116. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
  117. xinference/core/image_interface.py +0 -377
  118. xinference/model/llm/transformers/compression.py +0 -258
  119. xinference/model/llm/transformers/yi_vl.py +0 -239
  120. xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
  121. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  122. xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
  123. xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
  124. xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
  133. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  134. /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
  135. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
  136. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
  137. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
@@ -72,8 +72,10 @@ class LlamaCppLLMSpecV1(BaseModel):
72
72
  model_hub: str = "huggingface"
73
73
  model_uri: Optional[str]
74
74
  model_revision: Optional[str]
75
+ # for MOE model, illustrates the activated model size
76
+ activated_size_in_billions: Optional[Union[str, int]]
75
77
 
76
- @validator("model_size_in_billions", pre=False)
78
+ @validator("model_size_in_billions", "activated_size_in_billions", pre=False)
77
79
  def validate_model_size_with_radix(cls, v: object) -> object:
78
80
  if isinstance(v, str):
79
81
  if (
@@ -94,8 +96,10 @@ class PytorchLLMSpecV1(BaseModel):
94
96
  model_hub: str = "huggingface"
95
97
  model_uri: Optional[str]
96
98
  model_revision: Optional[str]
99
+ # for MOE model, illustrates the activated model size
100
+ activated_size_in_billions: Optional[Union[str, int]]
97
101
 
98
- @validator("model_size_in_billions", pre=False)
102
+ @validator("model_size_in_billions", "activated_size_in_billions", pre=False)
99
103
  def validate_model_size_with_radix(cls, v: object) -> object:
100
104
  if isinstance(v, str):
101
105
  if (
@@ -116,8 +120,10 @@ class MLXLLMSpecV1(BaseModel):
116
120
  model_hub: str = "huggingface"
117
121
  model_uri: Optional[str]
118
122
  model_revision: Optional[str]
123
+ # for MOE model, illustrates the activated model size
124
+ activated_size_in_billions: Optional[Union[str, int]]
119
125
 
120
- @validator("model_size_in_billions", pre=False)
126
+ @validator("model_size_in_billions", "activated_size_in_billions", pre=False)
121
127
  def validate_model_size_with_radix(cls, v: object) -> object:
122
128
  if isinstance(v, str):
123
129
  if (
@@ -136,7 +142,15 @@ class LLMFamilyV1(BaseModel):
136
142
  model_lang: List[str]
137
143
  model_ability: List[
138
144
  Literal[
139
- "embed", "generate", "chat", "tools", "vision", "audio", "omni", "reasoning"
145
+ "embed",
146
+ "generate",
147
+ "chat",
148
+ "tools",
149
+ "vision",
150
+ "audio",
151
+ "omni",
152
+ "reasoning",
153
+ "hybrid",
140
154
  ]
141
155
  ]
142
156
  model_description: Optional[str]
@@ -370,6 +384,53 @@ def cache_from_uri(
370
384
  raise ValueError(f"Unsupported URL scheme: {src_scheme}")
371
385
 
372
386
 
387
+ def cache_model_tokenizer_and_config(
388
+ llm_family: LLMFamilyV1,
389
+ llm_spec: "LLMSpecV1",
390
+ ) -> str:
391
+ """
392
+ Download model config.json and tokenizers only
393
+ """
394
+ cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec, "tokenizer_config")
395
+ os.makedirs(cache_dir, exist_ok=True)
396
+ if llm_spec.model_hub == "huggingface":
397
+ from huggingface_hub import snapshot_download
398
+
399
+ download_dir = retry_download(
400
+ snapshot_download,
401
+ llm_family.model_name,
402
+ {
403
+ "model_size": llm_spec.model_size_in_billions,
404
+ "model_format": llm_spec.model_format,
405
+ },
406
+ llm_spec.model_id,
407
+ revision=llm_spec.model_revision,
408
+ allow_patterns=["tokenizer*", "config.json"],
409
+ local_dir=cache_dir,
410
+ )
411
+ elif llm_spec.model_hub == "modelscope":
412
+ from modelscope.hub.snapshot_download import snapshot_download
413
+
414
+ download_dir = retry_download(
415
+ snapshot_download,
416
+ llm_family.model_name,
417
+ {
418
+ "model_size": llm_spec.model_size_in_billions,
419
+ "model_format": llm_spec.model_format,
420
+ },
421
+ llm_spec.model_id,
422
+ revision=llm_spec.model_revision,
423
+ allow_patterns=["tokenizer*", "config.json"],
424
+ local_dir=cache_dir,
425
+ )
426
+ else:
427
+ raise NotImplementedError(
428
+ f"Does not support download config.json and "
429
+ f"tokenizer related files via {llm_spec.model_hub}"
430
+ )
431
+ return download_dir
432
+
433
+
373
434
  def cache_model_config(
374
435
  llm_family: LLMFamilyV1,
375
436
  llm_spec: "LLMSpecV1",
@@ -377,7 +438,7 @@ def cache_model_config(
377
438
  """Download model config.json into cache_dir,
378
439
  returns local filepath
379
440
  """
380
- cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec)
441
+ cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec, "model_mem")
381
442
  config_file = os.path.join(cache_dir, "config.json")
382
443
  if not os.path.islink(config_file) and not os.path.exists(config_file):
383
444
  os.makedirs(cache_dir, exist_ok=True)
@@ -400,10 +461,13 @@ def cache_model_config(
400
461
  def _get_cache_dir_for_model_mem(
401
462
  llm_family: LLMFamilyV1,
402
463
  llm_spec: "LLMSpecV1",
464
+ category: str,
403
465
  create_if_not_exist=True,
404
466
  ):
405
467
  """
406
- For cal-model-mem only. (might called from supervisor / cli)
468
+ Get file dir for special usage, like `cal-model-mem` and download partial files for
469
+
470
+ e.g. for cal-model-mem, (might called from supervisor / cli)
407
471
  Temporary use separate dir from worker's cache_dir, due to issue of different style of symlink.
408
472
  """
409
473
  quant_suffix = ""
@@ -418,7 +482,7 @@ def _get_cache_dir_for_model_mem(
418
482
  if quant_suffix:
419
483
  cache_dir_name += f"-{quant_suffix}"
420
484
  cache_dir = os.path.realpath(
421
- os.path.join(XINFERENCE_CACHE_DIR, "model_mem", cache_dir_name)
485
+ os.path.join(XINFERENCE_CACHE_DIR, category, cache_dir_name)
422
486
  )
423
487
  if create_if_not_exist and not os.path.exists(cache_dir):
424
488
  os.makedirs(cache_dir, exist_ok=True)
@@ -17,8 +17,6 @@
17
17
  "model_format": "pytorch",
18
18
  "model_size_in_billions": "0_5",
19
19
  "quantizations": [
20
- "4-bit",
21
- "8-bit",
22
20
  "none"
23
21
  ],
24
22
  "model_id": "Qwen/Qwen2-0.5B-Instruct",
@@ -54,35 +52,5 @@
54
52
  "<|im_start|>",
55
53
  "<|im_end|>"
56
54
  ]
57
- },
58
- {
59
- "version": 1,
60
- "context_length": 32768,
61
- "model_name": "csg-wukong-chat-v0.1",
62
- "model_lang": [
63
- "en"
64
- ],
65
- "model_ability": [
66
- "chat"
67
- ],
68
- "model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
69
- "model_specs": [
70
- {
71
- "model_format": "pytorch",
72
- "model_size_in_billions": 1,
73
- "quantizations": [
74
- "none"
75
- ],
76
- "model_id": "OpenCSG/csg-wukong-1B-chat-v0.1",
77
- "model_hub": "csghub"
78
- }
79
- ],
80
- "chat_template": "{% for item in messages %}{% if loop.first and item['role'] == 'system' %}{{ item['content'] + '\n' }}{% elif loop.first %}{{ '<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant\\'s response.</s>\n' }}{% endif %}{% if item['role'] == 'user' %}{{ '<|user|>\n' + item['content'] + '</s>\n' }}{% elif item['role'] == 'assistant' %}{{ '<|assistant|>\n' + item['content'] + '</s>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}",
81
- "stop_token_ids": [
82
- 2
83
- ],
84
- "stop": [
85
- "</s>"
86
- ]
87
55
  }
88
56
  ]