xinference 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (132) hide show
  1. xinference/_compat.py +1 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +54 -1
  4. xinference/client/restful/restful_client.py +82 -2
  5. xinference/constants.py +3 -0
  6. xinference/core/chat_interface.py +297 -83
  7. xinference/core/model.py +24 -3
  8. xinference/core/progress_tracker.py +16 -8
  9. xinference/core/supervisor.py +51 -1
  10. xinference/core/worker.py +315 -47
  11. xinference/deploy/cmdline.py +33 -1
  12. xinference/model/audio/core.py +11 -1
  13. xinference/model/audio/megatts.py +105 -0
  14. xinference/model/audio/model_spec.json +24 -1
  15. xinference/model/audio/model_spec_modelscope.json +26 -1
  16. xinference/model/core.py +14 -0
  17. xinference/model/embedding/core.py +6 -1
  18. xinference/model/flexible/core.py +6 -1
  19. xinference/model/image/core.py +6 -1
  20. xinference/model/image/model_spec.json +17 -1
  21. xinference/model/image/model_spec_modelscope.json +17 -1
  22. xinference/model/llm/__init__.py +4 -6
  23. xinference/model/llm/core.py +5 -0
  24. xinference/model/llm/llama_cpp/core.py +46 -17
  25. xinference/model/llm/llm_family.json +530 -85
  26. xinference/model/llm/llm_family.py +24 -1
  27. xinference/model/llm/llm_family_modelscope.json +572 -1
  28. xinference/model/llm/mlx/core.py +16 -2
  29. xinference/model/llm/reasoning_parser.py +3 -3
  30. xinference/model/llm/sglang/core.py +111 -13
  31. xinference/model/llm/transformers/__init__.py +14 -0
  32. xinference/model/llm/transformers/core.py +31 -6
  33. xinference/model/llm/transformers/deepseek_vl.py +1 -1
  34. xinference/model/llm/transformers/deepseek_vl2.py +287 -0
  35. xinference/model/llm/transformers/gemma3.py +17 -2
  36. xinference/model/llm/transformers/intern_vl.py +28 -18
  37. xinference/model/llm/transformers/minicpmv26.py +21 -2
  38. xinference/model/llm/transformers/qwen-omni.py +308 -0
  39. xinference/model/llm/transformers/qwen2_audio.py +1 -1
  40. xinference/model/llm/transformers/qwen2_vl.py +20 -4
  41. xinference/model/llm/utils.py +37 -15
  42. xinference/model/llm/vllm/core.py +184 -8
  43. xinference/model/llm/vllm/distributed_executor.py +320 -0
  44. xinference/model/rerank/core.py +22 -12
  45. xinference/model/utils.py +118 -1
  46. xinference/model/video/core.py +6 -1
  47. xinference/thirdparty/deepseek_vl2/__init__.py +31 -0
  48. xinference/thirdparty/deepseek_vl2/models/__init__.py +26 -0
  49. xinference/thirdparty/deepseek_vl2/models/configuration_deepseek.py +210 -0
  50. xinference/thirdparty/deepseek_vl2/models/conversation.py +310 -0
  51. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek.py +1975 -0
  52. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek_vl_v2.py +697 -0
  53. xinference/thirdparty/deepseek_vl2/models/processing_deepseek_vl_v2.py +675 -0
  54. xinference/thirdparty/deepseek_vl2/models/siglip_vit.py +661 -0
  55. xinference/thirdparty/deepseek_vl2/serve/__init__.py +0 -0
  56. xinference/thirdparty/deepseek_vl2/serve/app_modules/__init__.py +0 -0
  57. xinference/thirdparty/deepseek_vl2/serve/app_modules/gradio_utils.py +83 -0
  58. xinference/thirdparty/deepseek_vl2/serve/app_modules/overwrites.py +81 -0
  59. xinference/thirdparty/deepseek_vl2/serve/app_modules/presets.py +115 -0
  60. xinference/thirdparty/deepseek_vl2/serve/app_modules/utils.py +333 -0
  61. xinference/thirdparty/deepseek_vl2/serve/assets/Kelpy-Codos.js +100 -0
  62. xinference/thirdparty/deepseek_vl2/serve/assets/avatar.png +0 -0
  63. xinference/thirdparty/deepseek_vl2/serve/assets/custom.css +355 -0
  64. xinference/thirdparty/deepseek_vl2/serve/assets/custom.js +22 -0
  65. xinference/thirdparty/deepseek_vl2/serve/assets/favicon.ico +0 -0
  66. xinference/thirdparty/deepseek_vl2/serve/assets/simsun.ttc +0 -0
  67. xinference/thirdparty/deepseek_vl2/serve/inference.py +197 -0
  68. xinference/thirdparty/deepseek_vl2/utils/__init__.py +18 -0
  69. xinference/thirdparty/deepseek_vl2/utils/io.py +80 -0
  70. xinference/thirdparty/megatts3/__init__.py +0 -0
  71. xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
  72. xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
  73. xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
  74. xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
  75. xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
  76. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
  77. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
  78. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
  79. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
  80. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
  81. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
  82. xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
  83. xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
  84. xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
  85. xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
  86. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
  87. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
  88. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
  89. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
  90. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
  91. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
  92. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
  93. xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
  94. xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
  95. xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
  96. xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
  97. xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
  98. xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
  99. xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
  100. xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
  101. xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
  102. xinference/types.py +10 -0
  103. xinference/utils.py +54 -0
  104. xinference/web/ui/build/asset-manifest.json +6 -6
  105. xinference/web/ui/build/index.html +1 -1
  106. xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
  107. xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
  108. xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
  109. xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
  116. xinference/web/ui/src/locales/en.json +2 -1
  117. xinference/web/ui/src/locales/zh.json +2 -1
  118. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/METADATA +128 -115
  119. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/RECORD +124 -63
  120. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
  121. xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
  122. xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
  123. xinference/web/ui/build/static/js/main.3cea968e.js +0 -3
  124. xinference/web/ui/build/static/js/main.3cea968e.js.map +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
  129. /xinference/web/ui/build/static/js/{main.3cea968e.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
  130. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
  131. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
  132. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ from collections import defaultdict
17
17
  from typing import Any, Dict, List, Literal, Optional, Tuple, Union
18
18
 
19
19
  from ...constants import XINFERENCE_CACHE_DIR
20
- from ..core import CacheableModelSpec, ModelDescription
20
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
21
21
  from ..utils import valid_model_revision
22
22
  from .chattts import ChatTTSModel
23
23
  from .cosyvoice import CosyVoiceModel
@@ -26,6 +26,7 @@ from .f5tts_mlx import F5TTSMLXModel
26
26
  from .fish_speech import FishSpeechModel
27
27
  from .funasr import FunASRModel
28
28
  from .kokoro import KokoroModel
29
+ from .megatts import MegaTTSModel
29
30
  from .melotts import MeloTTSModel
30
31
  from .whisper import WhisperModel
31
32
  from .whisper_mlx import WhisperMLXModel
@@ -55,6 +56,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
55
56
  default_model_config: Optional[Dict[str, Any]]
56
57
  default_transcription_config: Optional[Dict[str, Any]]
57
58
  engine: Optional[str]
59
+ virtualenv: Optional[VirtualEnvSettings]
58
60
 
59
61
 
60
62
  class AudioModelDescription(ModelDescription):
@@ -68,6 +70,10 @@ class AudioModelDescription(ModelDescription):
68
70
  super().__init__(address, devices, model_path=model_path)
69
71
  self._model_spec = model_spec
70
72
 
73
+ @property
74
+ def spec(self):
75
+ return self._model_spec
76
+
71
77
  def to_dict(self):
72
78
  return {
73
79
  "model_type": "audio",
@@ -178,6 +184,7 @@ def create_audio_model_instance(
178
184
  F5TTSMLXModel,
179
185
  MeloTTSModel,
180
186
  KokoroModel,
187
+ MegaTTSModel,
181
188
  ],
182
189
  AudioModelDescription,
183
190
  ]:
@@ -195,6 +202,7 @@ def create_audio_model_instance(
195
202
  F5TTSMLXModel,
196
203
  MeloTTSModel,
197
204
  KokoroModel,
205
+ MegaTTSModel,
198
206
  ]
199
207
  if model_spec.model_family == "whisper":
200
208
  if not model_spec.engine:
@@ -217,6 +225,8 @@ def create_audio_model_instance(
217
225
  model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
218
226
  elif model_spec.model_family == "Kokoro":
219
227
  model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
228
+ elif model_spec.model_family == "MegaTTS":
229
+ model = MegaTTSModel(model_uid, model_path, model_spec, **kwargs)
220
230
  else:
221
231
  raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
222
232
  model_description = AudioModelDescription(
@@ -0,0 +1,105 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import io
15
+ import logging
16
+ from io import BytesIO
17
+ from typing import TYPE_CHECKING, Optional
18
+
19
+ if TYPE_CHECKING:
20
+ from .core import AudioModelFamilyV1
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class MegaTTSModel:
26
+ def __init__(
27
+ self,
28
+ model_uid: str,
29
+ model_path: str,
30
+ model_spec: "AudioModelFamilyV1",
31
+ device: Optional[str] = None,
32
+ **kwargs,
33
+ ):
34
+ self._model_uid = model_uid
35
+ self._model_path = model_path
36
+ self._model_spec = model_spec
37
+ self._device = device
38
+ self._model = None
39
+ self._vocoder = None
40
+ self._kwargs = kwargs
41
+
42
+ @property
43
+ def model_ability(self):
44
+ return self._model_spec.model_ability
45
+
46
+ def load(self):
47
+ import os
48
+ import sys
49
+
50
+ # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
51
+ sys.path.insert(
52
+ 0, os.path.join(os.path.dirname(__file__), "../../thirdparty/megatts3")
53
+ )
54
+ # For whisper
55
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
56
+
57
+ from tts.infer_cli import MegaTTS3DiTInfer
58
+
59
+ self._model = MegaTTS3DiTInfer(ckpt_root=self._model_path)
60
+
61
+ def speech(
62
+ self,
63
+ input: str,
64
+ voice: str,
65
+ response_format: str = "mp3",
66
+ speed: float = 1.0,
67
+ stream: bool = False,
68
+ **kwargs,
69
+ ):
70
+ import soundfile
71
+
72
+ if stream:
73
+ raise Exception("MegaTTS3 does not support stream generation.")
74
+ if voice:
75
+ raise Exception(
76
+ "MegaTTS3 does not support voice, please specify prompt_speech and prompt_latent."
77
+ )
78
+
79
+ prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
80
+ prompt_latent: Optional[bytes] = kwargs.pop("prompt_latent", None)
81
+ if not prompt_speech:
82
+ raise Exception("Please set prompt_speech for MegaTTS3.")
83
+ if not prompt_latent:
84
+ raise Exception("Please set prompt_latent for MegaTTS3.")
85
+
86
+ assert self._model is not None
87
+ with io.BytesIO(prompt_latent) as prompt_latent_io:
88
+ resource_context = self._model.preprocess(
89
+ prompt_speech, latent_file=prompt_latent_io
90
+ )
91
+ wav_bytes = self._model.forward(
92
+ resource_context,
93
+ input,
94
+ time_step=kwargs.get("time_step", 32),
95
+ p_w=kwargs.get("p_w", 1.6),
96
+ t_w=kwargs.get("t_w", 2.5),
97
+ )
98
+
99
+ # Save the generated audio
100
+ with BytesIO() as out:
101
+ with soundfile.SoundFile(
102
+ out, "w", self._model.sr, 1, format=response_format.upper()
103
+ ) as f:
104
+ f.write(wav_bytes)
105
+ return out.getvalue()
@@ -203,6 +203,21 @@
203
203
  "merge_length_s": 15
204
204
  }
205
205
  },
206
+ {
207
+ "model_name": "paraformer-zh",
208
+ "model_family": "funasr",
209
+ "model_id": "funasr/paraformer-zh",
210
+ "model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
211
+ "model_ability": "audio-to-text",
212
+ "multilingual": false,
213
+ "default_model_config": {
214
+ "vad_model": "fsmn-vad",
215
+ "punc_model": "ct-punc"
216
+ },
217
+ "default_transcription_config": {
218
+ "batch_size_s": 300
219
+ }
220
+ },
206
221
  {
207
222
  "model_name": "ChatTTS",
208
223
  "model_family": "ChatTTS",
@@ -216,7 +231,7 @@
216
231
  "model_family": "CosyVoice",
217
232
  "model_id": "FunAudioLLM/CosyVoice-300M",
218
233
  "model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
219
- "model_ability": "audio-to-audio",
234
+ "model_ability": "text-to-audio",
220
235
  "multilingual": true
221
236
  },
222
237
  {
@@ -346,5 +361,13 @@
346
361
  "model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
347
362
  "model_ability": "text-to-audio",
348
363
  "multilingual": true
364
+ },
365
+ {
366
+ "model_name": "MegaTTS3",
367
+ "model_family": "MegaTTS",
368
+ "model_id": "ByteDance/MegaTTS3",
369
+ "model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
370
+ "model_ability": "text-to-audio",
371
+ "multilingual": true
349
372
  }
350
373
  ]
@@ -47,6 +47,22 @@
47
47
  "merge_length_s": 15
48
48
  }
49
49
  },
50
+ {
51
+ "model_name": "paraformer-zh",
52
+ "model_family": "funasr",
53
+ "model_hub": "modelscope",
54
+ "model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
55
+ "model_revision": "master",
56
+ "model_ability": "audio-to-text",
57
+ "multilingual": false,
58
+ "default_model_config": {
59
+ "vad_model": "fsmn-vad",
60
+ "punc_model": "ct-punc"
61
+ },
62
+ "default_transcription_config": {
63
+ "batch_size_s": 300
64
+ }
65
+ },
50
66
  {
51
67
  "model_name": "ChatTTS",
52
68
  "model_family": "ChatTTS",
@@ -62,7 +78,7 @@
62
78
  "model_hub": "modelscope",
63
79
  "model_id": "iic/CosyVoice-300M",
64
80
  "model_revision": "master",
65
- "model_ability": "audio-to-audio",
81
+ "model_ability": "text-to-audio",
66
82
  "multilingual": true
67
83
  },
68
84
  {
@@ -109,5 +125,14 @@
109
125
  "model_revision": "master",
110
126
  "model_ability": "text-to-audio",
111
127
  "multilingual": true
128
+ },
129
+ {
130
+ "model_name": "MegaTTS3",
131
+ "model_family": "MegaTTS",
132
+ "model_hub": "modelscope",
133
+ "model_id": "ByteDance/MegaTTS3",
134
+ "model_revision": "master",
135
+ "model_ability": "text-to-audio",
136
+ "multilingual": true
112
137
  }
113
138
  ]
xinference/model/core.py CHANGED
@@ -30,6 +30,11 @@ class ModelDescription(ABC):
30
30
  self.devices = devices
31
31
  self._model_path = model_path
32
32
 
33
+ @property
34
+ @abstractmethod
35
+ def spec(self):
36
+ pass
37
+
33
38
  def to_dict(self):
34
39
  """
35
40
  Return a dict to describe some information about model.
@@ -155,3 +160,12 @@ class CacheableModelSpec(BaseModel):
155
160
  model_id: str
156
161
  model_revision: Optional[str]
157
162
  model_hub: str = "huggingface"
163
+
164
+
165
+ class VirtualEnvSettings(BaseModel):
166
+ packages: List[str]
167
+ inherit_pip_config: bool = True
168
+ index_url: Optional[str] = None
169
+ extra_index_url: Optional[str] = None
170
+ find_links: Optional[str] = None
171
+ trusted_host: Optional[str] = None
@@ -24,7 +24,7 @@ import torch
24
24
  from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
25
25
  from ...device_utils import empty_cache
26
26
  from ...types import Embedding, EmbeddingData, EmbeddingUsage
27
- from ..core import CacheableModelSpec, ModelDescription
27
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
28
28
  from ..utils import get_cache_dir, is_model_cached
29
29
 
30
30
  logger = logging.getLogger(__name__)
@@ -57,6 +57,7 @@ class EmbeddingModelSpec(CacheableModelSpec):
57
57
  model_id: str
58
58
  model_revision: Optional[str]
59
59
  model_hub: str = "huggingface"
60
+ virtualenv: Optional[VirtualEnvSettings]
60
61
 
61
62
 
62
63
  class EmbeddingModelDescription(ModelDescription):
@@ -70,6 +71,10 @@ class EmbeddingModelDescription(ModelDescription):
70
71
  super().__init__(address, devices, model_path=model_path)
71
72
  self._model_spec = model_spec
72
73
 
74
+ @property
75
+ def spec(self):
76
+ return self._model_spec
77
+
73
78
  def to_dict(self):
74
79
  return {
75
80
  "model_type": "embedding",
@@ -20,7 +20,7 @@ from threading import Lock
20
20
  from typing import Dict, List, Optional, Tuple
21
21
 
22
22
  from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
23
- from ..core import CacheableModelSpec, ModelDescription
23
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
24
24
  from .utils import get_launcher
25
25
 
26
26
  logger = logging.getLogger(__name__)
@@ -34,6 +34,7 @@ class FlexibleModelSpec(CacheableModelSpec):
34
34
  model_uri: Optional[str]
35
35
  launcher: str
36
36
  launcher_args: Optional[str]
37
+ virtualenv: Optional[VirtualEnvSettings]
37
38
 
38
39
  def parser_args(self):
39
40
  return json.loads(self.launcher_args)
@@ -50,6 +51,10 @@ class FlexibleModelDescription(ModelDescription):
50
51
  super().__init__(address, devices, model_path=model_path)
51
52
  self._model_spec = model_spec
52
53
 
54
+ @property
55
+ def spec(self):
56
+ return self._model_spec
57
+
53
58
  def to_dict(self):
54
59
  return {
55
60
  "model_type": "flexible",
@@ -21,7 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
21
21
 
22
22
  from ...constants import XINFERENCE_CACHE_DIR
23
23
  from ...types import PeftModelConfig
24
- from ..core import CacheableModelSpec, ModelDescription
24
+ from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
25
25
  from ..utils import (
26
26
  IS_NEW_HUGGINGFACE_HUB,
27
27
  retry_download,
@@ -59,6 +59,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
59
59
  gguf_model_id: Optional[str]
60
60
  gguf_quantizations: Optional[List[str]]
61
61
  gguf_model_file_name_template: Optional[str]
62
+ virtualenv: Optional[VirtualEnvSettings]
62
63
 
63
64
 
64
65
  class ImageModelDescription(ModelDescription):
@@ -72,6 +73,10 @@ class ImageModelDescription(ModelDescription):
72
73
  super().__init__(address, devices, model_path=model_path)
73
74
  self._model_spec = model_spec
74
75
 
76
+ @property
77
+ def spec(self):
78
+ return self._model_spec
79
+
75
80
  def to_dict(self):
76
81
  if self._model_spec.controlnet is not None:
77
82
  controlnet = [cn.dict() for cn in self._model_spec.controlnet]
@@ -339,6 +339,22 @@
339
339
  "model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
340
340
  "model_ability": [
341
341
  "ocr"
342
- ]
342
+ ],
343
+ "virtualenv": {
344
+ "packages": [
345
+ "transformers==4.37.2",
346
+ "httpx==0.24.0",
347
+ "deepspeed==0.12.3",
348
+ "peft==0.4.0",
349
+ "tiktoken==0.6.0",
350
+ "bitsandbytes==0.41.0",
351
+ "scikit-learn==1.2.2",
352
+ "sentencepiece==0.1.99",
353
+ "einops==0.6.1",
354
+ "einops-exts==0.0.4",
355
+ "timm==0.6.13",
356
+ "numpy==1.26.4"
357
+ ]
358
+ }
343
359
  }
344
360
  ]
@@ -315,6 +315,22 @@
315
315
  "model_hub": "modelscope",
316
316
  "model_ability": [
317
317
  "ocr"
318
- ]
318
+ ],
319
+ "virtualenv": {
320
+ "packages": [
321
+ "transformers==4.37.2",
322
+ "httpx==0.24.0",
323
+ "deepspeed==0.12.3",
324
+ "peft==0.4.0",
325
+ "tiktoken==0.6.0",
326
+ "bitsandbytes==0.41.0",
327
+ "scikit-learn==1.2.2",
328
+ "sentencepiece==0.1.99",
329
+ "einops==0.6.1",
330
+ "einops-exts==0.0.4",
331
+ "timm==0.6.13",
332
+ "numpy==1.26.4"
333
+ ]
334
+ }
319
335
  }
320
336
  ]
@@ -132,7 +132,7 @@ def _install():
132
132
  from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel, XllamaCppModel
133
133
  from .lmdeploy.core import LMDeployChatModel, LMDeployModel
134
134
  from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
135
- from .sglang.core import SGLANGChatModel, SGLANGModel
135
+ from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
136
136
  from .transformers.chatglm import ChatglmPytorchChatModel
137
137
  from .transformers.cogagent import CogAgentChatModel
138
138
  from .transformers.cogvlm2 import CogVLM2Model
@@ -143,16 +143,15 @@ def _install():
143
143
  DeepSeekV2PytorchModel,
144
144
  )
145
145
  from .transformers.deepseek_vl import DeepSeekVLChatModel
146
+ from .transformers.deepseek_vl2 import DeepSeekVL2ChatModel
146
147
  from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
147
148
  from .transformers.glm4v import Glm4VModel
148
149
  from .transformers.glm_edge_v import GlmEdgeVModel
149
- from .transformers.intern_vl import InternVLChatModel
150
150
  from .transformers.internlm2 import Internlm2PytorchChatModel
151
151
  from .transformers.minicpmv25 import MiniCPMV25Model
152
152
  from .transformers.minicpmv26 import MiniCPMV26Model
153
153
  from .transformers.opt import OptPytorchModel
154
154
  from .transformers.qwen2_audio import Qwen2AudioChatModel
155
- from .transformers.qwen2_vl import Qwen2VLChatModel
156
155
  from .transformers.qwen_vl import QwenVLChatModel
157
156
  from .transformers.yi_vl import YiVLChatModel
158
157
  from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
@@ -173,7 +172,7 @@ def _install():
173
172
  XllamaCppModel,
174
173
  ]
175
174
  )
176
- SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
175
+ SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel, SGLANGVisionModel])
177
176
  VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
178
177
  MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
179
178
  LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
@@ -183,11 +182,10 @@ def _install():
183
182
  PytorchChatModel,
184
183
  Internlm2PytorchChatModel,
185
184
  QwenVLChatModel,
186
- Qwen2VLChatModel,
187
185
  Qwen2AudioChatModel,
188
186
  YiVLChatModel,
189
187
  DeepSeekVLChatModel,
190
- InternVLChatModel,
188
+ DeepSeekVL2ChatModel,
191
189
  PytorchModel,
192
190
  CogVLM2Model,
193
191
  CogVLM2VideoModel,
@@ -54,6 +54,7 @@ class LLM(abc.ABC):
54
54
  **kwargs,
55
55
  ):
56
56
  self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
57
+ self.raw_model_uid = replica_model_uid
57
58
  self.model_family = model_family
58
59
  self.model_spec = model_spec
59
60
  self.quantization = quantization
@@ -143,6 +144,10 @@ class LLMDescription(ModelDescription):
143
144
  self._llm_spec = llm_spec
144
145
  self._quantization = quantization
145
146
 
147
+ @property
148
+ def spec(self):
149
+ return self._llm_family
150
+
146
151
  def to_dict(self):
147
152
  return {
148
153
  "model_type": "LLM",
@@ -36,7 +36,7 @@ from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelM
36
36
 
37
37
  logger = logging.getLogger(__name__)
38
38
 
39
- USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
39
+ USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 1)))
40
40
 
41
41
 
42
42
  class _Done:
@@ -142,24 +142,38 @@ class XllamaCppModel(LLM, ChatModelMixin):
142
142
 
143
143
  if os.path.isfile(self.model_path):
144
144
  # mostly passed from --model_path
145
- model_path = os.path.realpath(self.model_path)
145
+ model_path = self.model_path
146
146
  else:
147
147
  # handle legacy cache.
148
- model_path = os.path.realpath(
149
- os.path.join(
148
+ if (
149
+ self.model_spec.model_file_name_split_template
150
+ and self.model_spec.quantization_parts
151
+ ):
152
+ part = self.model_spec.quantization_parts[self.quantization]
153
+ model_path = os.path.join(
154
+ self.model_path,
155
+ self.model_spec.model_file_name_split_template.format(
156
+ quantization=self.quantization, part=part[0]
157
+ ),
158
+ )
159
+ else:
160
+ model_path = os.path.join(
150
161
  self.model_path,
151
162
  self.model_spec.model_file_name_template.format(
152
163
  quantization=self.quantization
153
164
  ),
154
165
  )
155
- )
156
- legacy_model_file_path = os.path.join(self.model_path, "model.bin")
157
- if os.path.exists(legacy_model_file_path):
158
- model_path = legacy_model_file_path
166
+ legacy_model_file_path = os.path.join(self.model_path, "model.bin")
167
+ if os.path.exists(legacy_model_file_path):
168
+ model_path = legacy_model_file_path
159
169
 
160
170
  try:
161
171
  params = CommonParams()
162
- params.model = model_path
172
+ # Compatible with xllamacpp changes
173
+ try:
174
+ params.model = model_path
175
+ except Exception:
176
+ params.model.path = model_path
163
177
  if self.model_family.chat_template:
164
178
  params.chat_template = self.model_family.chat_template
165
179
  # This is the default value, could be overwritten by _llamacpp_model_config
@@ -302,7 +316,12 @@ class XllamaCppModel(LLM, ChatModelMixin):
302
316
  while (r := q.get()) is not _Done:
303
317
  if type(r) is _Error:
304
318
  raise Exception("Got error in chat stream: %s", r.msg)
305
- yield r
319
+ # Get valid keys (O(1) lookup)
320
+ chunk_keys = ChatCompletionChunk.__annotations__
321
+ # The chunk may contain additional keys (e.g., system_fingerprint),
322
+ # which might not conform to OpenAI/DeepSeek formats.
323
+ # Filter out keys that are not part of ChatCompletionChunk.
324
+ yield {key: r[key] for key in chunk_keys if key in r}
306
325
 
307
326
  return self._to_chat_completion_chunks(
308
327
  _to_iterator(), self.reasoning_parser
@@ -410,20 +429,30 @@ class LlamaCppModel(LLM):
410
429
 
411
430
  if os.path.isfile(self.model_path):
412
431
  # mostly passed from --model_path
413
- model_path = os.path.realpath(self.model_path)
432
+ model_path = self.model_path
414
433
  else:
415
434
  # handle legacy cache.
416
- model_path = os.path.realpath(
417
- os.path.join(
435
+ if (
436
+ self.model_spec.model_file_name_split_template
437
+ and self.model_spec.quantization_parts
438
+ ):
439
+ part = self.model_spec.quantization_parts[self.quantization]
440
+ model_path = os.path.join(
441
+ self.model_path,
442
+ self.model_spec.model_file_name_split_template.format(
443
+ quantization=self.quantization, part=part[0]
444
+ ),
445
+ )
446
+ else:
447
+ model_path = os.path.join(
418
448
  self.model_path,
419
449
  self.model_spec.model_file_name_template.format(
420
450
  quantization=self.quantization
421
451
  ),
422
452
  )
423
- )
424
- legacy_model_file_path = os.path.join(self.model_path, "model.bin")
425
- if os.path.exists(legacy_model_file_path):
426
- model_path = legacy_model_file_path
453
+ legacy_model_file_path = os.path.join(self.model_path, "model.bin")
454
+ if os.path.exists(legacy_model_file_path):
455
+ model_path = legacy_model_file_path
427
456
 
428
457
  try:
429
458
  self._llm = Llama(