xinference 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (104) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +50 -1
  3. xinference/client/restful/restful_client.py +82 -2
  4. xinference/constants.py +3 -0
  5. xinference/core/chat_interface.py +297 -83
  6. xinference/core/model.py +1 -0
  7. xinference/core/progress_tracker.py +16 -8
  8. xinference/core/supervisor.py +45 -1
  9. xinference/core/worker.py +262 -37
  10. xinference/deploy/cmdline.py +33 -1
  11. xinference/model/audio/core.py +11 -1
  12. xinference/model/audio/megatts.py +105 -0
  13. xinference/model/audio/model_spec.json +24 -1
  14. xinference/model/audio/model_spec_modelscope.json +26 -1
  15. xinference/model/core.py +14 -0
  16. xinference/model/embedding/core.py +6 -1
  17. xinference/model/flexible/core.py +6 -1
  18. xinference/model/image/core.py +6 -1
  19. xinference/model/image/model_spec.json +17 -1
  20. xinference/model/image/model_spec_modelscope.json +17 -1
  21. xinference/model/llm/__init__.py +0 -4
  22. xinference/model/llm/core.py +4 -0
  23. xinference/model/llm/llama_cpp/core.py +40 -16
  24. xinference/model/llm/llm_family.json +413 -84
  25. xinference/model/llm/llm_family.py +24 -1
  26. xinference/model/llm/llm_family_modelscope.json +447 -0
  27. xinference/model/llm/mlx/core.py +16 -2
  28. xinference/model/llm/transformers/__init__.py +14 -0
  29. xinference/model/llm/transformers/core.py +30 -6
  30. xinference/model/llm/transformers/gemma3.py +17 -2
  31. xinference/model/llm/transformers/intern_vl.py +28 -18
  32. xinference/model/llm/transformers/minicpmv26.py +21 -2
  33. xinference/model/llm/transformers/qwen-omni.py +308 -0
  34. xinference/model/llm/transformers/qwen2_audio.py +1 -1
  35. xinference/model/llm/transformers/qwen2_vl.py +20 -4
  36. xinference/model/llm/utils.py +11 -1
  37. xinference/model/llm/vllm/core.py +35 -0
  38. xinference/model/llm/vllm/distributed_executor.py +8 -2
  39. xinference/model/rerank/core.py +6 -1
  40. xinference/model/utils.py +118 -1
  41. xinference/model/video/core.py +6 -1
  42. xinference/thirdparty/megatts3/__init__.py +0 -0
  43. xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
  44. xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
  45. xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
  46. xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
  47. xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
  48. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
  49. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
  50. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
  51. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
  52. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
  53. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
  54. xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
  55. xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
  56. xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
  57. xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
  58. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
  59. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
  60. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
  61. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
  62. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
  63. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
  64. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
  65. xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
  66. xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
  67. xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
  68. xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
  69. xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
  70. xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
  71. xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
  72. xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
  73. xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
  74. xinference/types.py +10 -0
  75. xinference/utils.py +54 -0
  76. xinference/web/ui/build/asset-manifest.json +6 -6
  77. xinference/web/ui/build/index.html +1 -1
  78. xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
  79. xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
  80. xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
  81. xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
  88. xinference/web/ui/src/locales/en.json +2 -1
  89. xinference/web/ui/src/locales/zh.json +2 -1
  90. {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/METADATA +127 -114
  91. {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/RECORD +96 -60
  92. {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
  93. xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
  94. xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
  95. xinference/web/ui/build/static/js/main.5ca4eea1.js +0 -3
  96. xinference/web/ui/build/static/js/main.5ca4eea1.js.map +0 -1
  97. xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +0 -1
  98. xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
  99. xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +0 -1
  100. xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
  101. /xinference/web/ui/build/static/js/{main.5ca4eea1.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
  102. {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
  103. {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
  104. {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
@@ -47,6 +47,22 @@
47
47
  "merge_length_s": 15
48
48
  }
49
49
  },
50
+ {
51
+ "model_name": "paraformer-zh",
52
+ "model_family": "funasr",
53
+ "model_hub": "modelscope",
54
+ "model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
55
+ "model_revision": "master",
56
+ "model_ability": "audio-to-text",
57
+ "multilingual": false,
58
+ "default_model_config": {
59
+ "vad_model": "fsmn-vad",
60
+ "punc_model": "ct-punc"
61
+ },
62
+ "default_transcription_config": {
63
+ "batch_size_s": 300
64
+ }
65
+ },
50
66
  {
51
67
  "model_name": "ChatTTS",
52
68
  "model_family": "ChatTTS",
@@ -62,7 +78,7 @@
62
78
  "model_hub": "modelscope",
63
79
  "model_id": "iic/CosyVoice-300M",
64
80
  "model_revision": "master",
65
- "model_ability": "audio-to-audio",
81
+ "model_ability": "text-to-audio",
66
82
  "multilingual": true
67
83
  },
68
84
  {
@@ -109,5 +125,14 @@
109
125
  "model_revision": "master",
110
126
  "model_ability": "text-to-audio",
111
127
  "multilingual": true
128
+ },
129
+ {
130
+ "model_name": "MegaTTS3",
131
+ "model_family": "MegaTTS",
132
+ "model_hub": "modelscope",
133
+ "model_id": "ByteDance/MegaTTS3",
134
+ "model_revision": "master",
135
+ "model_ability": "text-to-audio",
136
+ "multilingual": true
112
137
  }
113
138
  ]
xinference/model/core.py CHANGED
@@ -30,6 +30,11 @@ class ModelDescription(ABC):
30
30
  self.devices = devices
31
31
  self._model_path = model_path
32
32
 
33
+ @property
34
+ @abstractmethod
35
+ def spec(self):
36
+ pass
37
+
33
38
  def to_dict(self):
34
39
  """
35
40
  Return a dict to describe some information about model.
@@ -155,3 +160,12 @@ class CacheableModelSpec(BaseModel):
155
160
  model_id: str
156
161
  model_revision: Optional[str]
157
162
  model_hub: str = "huggingface"
163
+
164
+
165
+ class VirtualEnvSettings(BaseModel):
166
+ packages: List[str]
167
+ inherit_pip_config: bool = True
168
+ index_url: Optional[str] = None
169
+ extra_index_url: Optional[str] = None
170
+ find_links: Optional[str] = None
171
+ trusted_host: Optional[str] = None
@@ -24,7 +24,7 @@ import torch
24
24
  from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
25
25
  from ...device_utils import empty_cache
26
26
  from ...types import Embedding, EmbeddingData, EmbeddingUsage
27
- from ..core import CacheableModelSpec, ModelDescription
27
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
28
28
  from ..utils import get_cache_dir, is_model_cached
29
29
 
30
30
  logger = logging.getLogger(__name__)
@@ -57,6 +57,7 @@ class EmbeddingModelSpec(CacheableModelSpec):
57
57
  model_id: str
58
58
  model_revision: Optional[str]
59
59
  model_hub: str = "huggingface"
60
+ virtualenv: Optional[VirtualEnvSettings]
60
61
 
61
62
 
62
63
  class EmbeddingModelDescription(ModelDescription):
@@ -70,6 +71,10 @@ class EmbeddingModelDescription(ModelDescription):
70
71
  super().__init__(address, devices, model_path=model_path)
71
72
  self._model_spec = model_spec
72
73
 
74
+ @property
75
+ def spec(self):
76
+ return self._model_spec
77
+
73
78
  def to_dict(self):
74
79
  return {
75
80
  "model_type": "embedding",
@@ -20,7 +20,7 @@ from threading import Lock
20
20
  from typing import Dict, List, Optional, Tuple
21
21
 
22
22
  from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
23
- from ..core import CacheableModelSpec, ModelDescription
23
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
24
24
  from .utils import get_launcher
25
25
 
26
26
  logger = logging.getLogger(__name__)
@@ -34,6 +34,7 @@ class FlexibleModelSpec(CacheableModelSpec):
34
34
  model_uri: Optional[str]
35
35
  launcher: str
36
36
  launcher_args: Optional[str]
37
+ virtualenv: Optional[VirtualEnvSettings]
37
38
 
38
39
  def parser_args(self):
39
40
  return json.loads(self.launcher_args)
@@ -50,6 +51,10 @@ class FlexibleModelDescription(ModelDescription):
50
51
  super().__init__(address, devices, model_path=model_path)
51
52
  self._model_spec = model_spec
52
53
 
54
+ @property
55
+ def spec(self):
56
+ return self._model_spec
57
+
53
58
  def to_dict(self):
54
59
  return {
55
60
  "model_type": "flexible",
@@ -21,7 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
21
21
 
22
22
  from ...constants import XINFERENCE_CACHE_DIR
23
23
  from ...types import PeftModelConfig
24
- from ..core import CacheableModelSpec, ModelDescription
24
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
25
25
  from ..utils import (
26
26
  IS_NEW_HUGGINGFACE_HUB,
27
27
  retry_download,
@@ -59,6 +59,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
59
59
  gguf_model_id: Optional[str]
60
60
  gguf_quantizations: Optional[List[str]]
61
61
  gguf_model_file_name_template: Optional[str]
62
+ virtualenv: Optional[VirtualEnvSettings]
62
63
 
63
64
 
64
65
  class ImageModelDescription(ModelDescription):
@@ -72,6 +73,10 @@ class ImageModelDescription(ModelDescription):
72
73
  super().__init__(address, devices, model_path=model_path)
73
74
  self._model_spec = model_spec
74
75
 
76
+ @property
77
+ def spec(self):
78
+ return self._model_spec
79
+
75
80
  def to_dict(self):
76
81
  if self._model_spec.controlnet is not None:
77
82
  controlnet = [cn.dict() for cn in self._model_spec.controlnet]
@@ -339,6 +339,22 @@
339
339
  "model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
340
340
  "model_ability": [
341
341
  "ocr"
342
- ]
342
+ ],
343
+ "virtualenv": {
344
+ "packages": [
345
+ "transformers==4.37.2",
346
+ "httpx==0.24.0",
347
+ "deepspeed==0.12.3",
348
+ "peft==0.4.0",
349
+ "tiktoken==0.6.0",
350
+ "bitsandbytes==0.41.0",
351
+ "scikit-learn==1.2.2",
352
+ "sentencepiece==0.1.99",
353
+ "einops==0.6.1",
354
+ "einops-exts==0.0.4",
355
+ "timm==0.6.13",
356
+ "numpy==1.26.4"
357
+ ]
358
+ }
343
359
  }
344
360
  ]
@@ -315,6 +315,22 @@
315
315
  "model_hub": "modelscope",
316
316
  "model_ability": [
317
317
  "ocr"
318
- ]
318
+ ],
319
+ "virtualenv": {
320
+ "packages": [
321
+ "transformers==4.37.2",
322
+ "httpx==0.24.0",
323
+ "deepspeed==0.12.3",
324
+ "peft==0.4.0",
325
+ "tiktoken==0.6.0",
326
+ "bitsandbytes==0.41.0",
327
+ "scikit-learn==1.2.2",
328
+ "sentencepiece==0.1.99",
329
+ "einops==0.6.1",
330
+ "einops-exts==0.0.4",
331
+ "timm==0.6.13",
332
+ "numpy==1.26.4"
333
+ ]
334
+ }
319
335
  }
320
336
  ]
@@ -147,13 +147,11 @@ def _install():
147
147
  from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
148
148
  from .transformers.glm4v import Glm4VModel
149
149
  from .transformers.glm_edge_v import GlmEdgeVModel
150
- from .transformers.intern_vl import InternVLChatModel
151
150
  from .transformers.internlm2 import Internlm2PytorchChatModel
152
151
  from .transformers.minicpmv25 import MiniCPMV25Model
153
152
  from .transformers.minicpmv26 import MiniCPMV26Model
154
153
  from .transformers.opt import OptPytorchModel
155
154
  from .transformers.qwen2_audio import Qwen2AudioChatModel
156
- from .transformers.qwen2_vl import Qwen2VLChatModel
157
155
  from .transformers.qwen_vl import QwenVLChatModel
158
156
  from .transformers.yi_vl import YiVLChatModel
159
157
  from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
@@ -184,12 +182,10 @@ def _install():
184
182
  PytorchChatModel,
185
183
  Internlm2PytorchChatModel,
186
184
  QwenVLChatModel,
187
- Qwen2VLChatModel,
188
185
  Qwen2AudioChatModel,
189
186
  YiVLChatModel,
190
187
  DeepSeekVLChatModel,
191
188
  DeepSeekVL2ChatModel,
192
- InternVLChatModel,
193
189
  PytorchModel,
194
190
  CogVLM2Model,
195
191
  CogVLM2VideoModel,
@@ -144,6 +144,10 @@ class LLMDescription(ModelDescription):
144
144
  self._llm_spec = llm_spec
145
145
  self._quantization = quantization
146
146
 
147
+ @property
148
+ def spec(self):
149
+ return self._llm_family
150
+
147
151
  def to_dict(self):
148
152
  return {
149
153
  "model_type": "LLM",
@@ -36,7 +36,7 @@ from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelM
36
36
 
37
37
  logger = logging.getLogger(__name__)
38
38
 
39
- USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
39
+ USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 1)))
40
40
 
41
41
 
42
42
  class _Done:
@@ -142,24 +142,38 @@ class XllamaCppModel(LLM, ChatModelMixin):
142
142
 
143
143
  if os.path.isfile(self.model_path):
144
144
  # mostly passed from --model_path
145
- model_path = os.path.realpath(self.model_path)
145
+ model_path = self.model_path
146
146
  else:
147
147
  # handle legacy cache.
148
- model_path = os.path.realpath(
149
- os.path.join(
148
+ if (
149
+ self.model_spec.model_file_name_split_template
150
+ and self.model_spec.quantization_parts
151
+ ):
152
+ part = self.model_spec.quantization_parts[self.quantization]
153
+ model_path = os.path.join(
154
+ self.model_path,
155
+ self.model_spec.model_file_name_split_template.format(
156
+ quantization=self.quantization, part=part[0]
157
+ ),
158
+ )
159
+ else:
160
+ model_path = os.path.join(
150
161
  self.model_path,
151
162
  self.model_spec.model_file_name_template.format(
152
163
  quantization=self.quantization
153
164
  ),
154
165
  )
155
- )
156
- legacy_model_file_path = os.path.join(self.model_path, "model.bin")
157
- if os.path.exists(legacy_model_file_path):
158
- model_path = legacy_model_file_path
166
+ legacy_model_file_path = os.path.join(self.model_path, "model.bin")
167
+ if os.path.exists(legacy_model_file_path):
168
+ model_path = legacy_model_file_path
159
169
 
160
170
  try:
161
171
  params = CommonParams()
162
- params.model = model_path
172
+ # Compatible with xllamacpp changes
173
+ try:
174
+ params.model = model_path
175
+ except Exception:
176
+ params.model.path = model_path
163
177
  if self.model_family.chat_template:
164
178
  params.chat_template = self.model_family.chat_template
165
179
  # This is the default value, could be overwritten by _llamacpp_model_config
@@ -415,20 +429,30 @@ class LlamaCppModel(LLM):
415
429
 
416
430
  if os.path.isfile(self.model_path):
417
431
  # mostly passed from --model_path
418
- model_path = os.path.realpath(self.model_path)
432
+ model_path = self.model_path
419
433
  else:
420
434
  # handle legacy cache.
421
- model_path = os.path.realpath(
422
- os.path.join(
435
+ if (
436
+ self.model_spec.model_file_name_split_template
437
+ and self.model_spec.quantization_parts
438
+ ):
439
+ part = self.model_spec.quantization_parts[self.quantization]
440
+ model_path = os.path.join(
441
+ self.model_path,
442
+ self.model_spec.model_file_name_split_template.format(
443
+ quantization=self.quantization, part=part[0]
444
+ ),
445
+ )
446
+ else:
447
+ model_path = os.path.join(
423
448
  self.model_path,
424
449
  self.model_spec.model_file_name_template.format(
425
450
  quantization=self.quantization
426
451
  ),
427
452
  )
428
- )
429
- legacy_model_file_path = os.path.join(self.model_path, "model.bin")
430
- if os.path.exists(legacy_model_file_path):
431
- model_path = legacy_model_file_path
453
+ legacy_model_file_path = os.path.join(self.model_path, "model.bin")
454
+ if os.path.exists(legacy_model_file_path):
455
+ model_path = legacy_model_file_path
432
456
 
433
457
  try:
434
458
  self._llm = Llama(