xinference 0.15.4__py3-none-any.whl → 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (67) hide show
  1. xinference/__init__.py +0 -4
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +48 -0
  4. xinference/client/restful/restful_client.py +19 -0
  5. xinference/constants.py +4 -4
  6. xinference/core/chat_interface.py +5 -1
  7. xinference/core/image_interface.py +5 -1
  8. xinference/core/model.py +195 -34
  9. xinference/core/scheduler.py +10 -7
  10. xinference/core/utils.py +9 -0
  11. xinference/model/__init__.py +4 -0
  12. xinference/model/audio/chattts.py +25 -14
  13. xinference/model/audio/model_spec.json +1 -1
  14. xinference/model/audio/model_spec_modelscope.json +1 -1
  15. xinference/model/embedding/model_spec.json +1 -1
  16. xinference/model/image/core.py +59 -4
  17. xinference/model/image/model_spec.json +24 -3
  18. xinference/model/image/model_spec_modelscope.json +25 -3
  19. xinference/model/image/ocr/__init__.py +13 -0
  20. xinference/model/image/ocr/got_ocr2.py +76 -0
  21. xinference/model/image/scheduler/__init__.py +13 -0
  22. xinference/model/image/scheduler/flux.py +533 -0
  23. xinference/model/image/stable_diffusion/core.py +8 -34
  24. xinference/model/image/stable_diffusion/mlx.py +221 -0
  25. xinference/model/image/utils.py +39 -3
  26. xinference/model/llm/__init__.py +2 -0
  27. xinference/model/llm/llm_family.json +178 -1
  28. xinference/model/llm/llm_family_modelscope.json +119 -0
  29. xinference/model/llm/transformers/chatglm.py +104 -0
  30. xinference/model/llm/transformers/core.py +37 -111
  31. xinference/model/llm/transformers/deepseek_v2.py +0 -226
  32. xinference/model/llm/transformers/internlm2.py +3 -95
  33. xinference/model/llm/transformers/opt.py +68 -0
  34. xinference/model/llm/transformers/utils.py +4 -284
  35. xinference/model/llm/utils.py +2 -2
  36. xinference/model/llm/vllm/core.py +16 -1
  37. xinference/thirdparty/mlx/__init__.py +13 -0
  38. xinference/thirdparty/mlx/flux/__init__.py +15 -0
  39. xinference/thirdparty/mlx/flux/autoencoder.py +357 -0
  40. xinference/thirdparty/mlx/flux/clip.py +154 -0
  41. xinference/thirdparty/mlx/flux/datasets.py +75 -0
  42. xinference/thirdparty/mlx/flux/flux.py +247 -0
  43. xinference/thirdparty/mlx/flux/layers.py +302 -0
  44. xinference/thirdparty/mlx/flux/lora.py +76 -0
  45. xinference/thirdparty/mlx/flux/model.py +134 -0
  46. xinference/thirdparty/mlx/flux/sampler.py +56 -0
  47. xinference/thirdparty/mlx/flux/t5.py +244 -0
  48. xinference/thirdparty/mlx/flux/tokenizers.py +185 -0
  49. xinference/thirdparty/mlx/flux/trainer.py +98 -0
  50. xinference/thirdparty/mlx/flux/utils.py +179 -0
  51. xinference/utils.py +2 -3
  52. xinference/web/ui/build/asset-manifest.json +3 -3
  53. xinference/web/ui/build/index.html +1 -1
  54. xinference/web/ui/build/static/js/{main.e51a356d.js → main.b76aeeb7.js} +3 -3
  55. xinference/web/ui/build/static/js/main.b76aeeb7.js.map +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
  58. {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/METADATA +49 -10
  59. {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/RECORD +64 -44
  60. xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
  61. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +0 -1
  62. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
  63. /xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.b76aeeb7.js.LICENSE.txt} +0 -0
  64. {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/LICENSE +0 -0
  65. {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/WHEEL +0 -0
  66. {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/entry_points.txt +0 -0
  67. {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/top_level.txt +0 -0
@@ -54,7 +54,11 @@ class ChatTTSModel:
54
54
  torch.set_float32_matmul_precision("high")
55
55
  self._model = ChatTTS.Chat()
56
56
  logger.info("Load ChatTTS model with kwargs: %s", self._kwargs)
57
- self._model.load(source="custom", custom_path=self._model_path, **self._kwargs)
57
+ ok = self._model.load(
58
+ source="custom", custom_path=self._model_path, **self._kwargs
59
+ )
60
+ if not ok:
61
+ raise Exception(f"The ChatTTS model is not correct: {self._model_path}")
58
62
 
59
63
  def speech(
60
64
  self,
@@ -114,16 +118,15 @@ class ChatTTSModel:
114
118
  last_pos = 0
115
119
  with writer.open():
116
120
  for it in iter:
117
- for itt in it:
118
- for chunk in itt:
119
- chunk = np.array([chunk]).transpose()
120
- writer.write_audio_chunk(i, torch.from_numpy(chunk))
121
- new_last_pos = out.tell()
122
- if new_last_pos != last_pos:
123
- out.seek(last_pos)
124
- encoded_bytes = out.read()
125
- yield encoded_bytes
126
- last_pos = new_last_pos
121
+ for chunk in it:
122
+ chunk = np.array([chunk]).transpose()
123
+ writer.write_audio_chunk(i, torch.from_numpy(chunk))
124
+ new_last_pos = out.tell()
125
+ if new_last_pos != last_pos:
126
+ out.seek(last_pos)
127
+ encoded_bytes = out.read()
128
+ yield encoded_bytes
129
+ last_pos = new_last_pos
127
130
 
128
131
  return _generator()
129
132
  else:
@@ -131,7 +134,15 @@ class ChatTTSModel:
131
134
 
132
135
  # Save the generated audio
133
136
  with BytesIO() as out:
134
- torchaudio.save(
135
- out, torch.from_numpy(wavs[0]), 24000, format=response_format
136
- )
137
+ try:
138
+ torchaudio.save(
139
+ out,
140
+ torch.from_numpy(wavs[0]).unsqueeze(0),
141
+ 24000,
142
+ format=response_format,
143
+ )
144
+ except:
145
+ torchaudio.save(
146
+ out, torch.from_numpy(wavs[0]), 24000, format=response_format
147
+ )
137
148
  return out.getvalue()
@@ -127,7 +127,7 @@
127
127
  "model_name": "ChatTTS",
128
128
  "model_family": "ChatTTS",
129
129
  "model_id": "2Noise/ChatTTS",
130
- "model_revision": "ce5913842aebd78e4a01a02d47244b8d62ac4ee3",
130
+ "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
131
131
  "model_ability": "text-to-audio",
132
132
  "multilingual": true
133
133
  },
@@ -42,7 +42,7 @@
42
42
  "model_name": "ChatTTS",
43
43
  "model_family": "ChatTTS",
44
44
  "model_hub": "modelscope",
45
- "model_id": "pzc163/chatTTS",
45
+ "model_id": "AI-ModelScope/ChatTTS",
46
46
  "model_revision": "master",
47
47
  "model_ability": "text-to-audio",
48
48
  "multilingual": true
@@ -233,7 +233,7 @@
233
233
  },
234
234
  {
235
235
  "model_name": "gte-Qwen2",
236
- "dimensions": 3584,
236
+ "dimensions": 4096,
237
237
  "max_tokens": 32000,
238
238
  "language": ["zh", "en"],
239
239
  "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
@@ -11,17 +11,21 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import collections.abc
15
16
  import logging
16
17
  import os
18
+ import platform
17
19
  from collections import defaultdict
18
- from typing import Dict, List, Literal, Optional, Tuple
20
+ from typing import Dict, List, Literal, Optional, Tuple, Union
19
21
 
20
22
  from ...constants import XINFERENCE_CACHE_DIR
21
23
  from ...types import PeftModelConfig
22
24
  from ..core import CacheableModelSpec, ModelDescription
23
25
  from ..utils import valid_model_revision
26
+ from .ocr.got_ocr2 import GotOCR2Model
24
27
  from .stable_diffusion.core import DiffusionModel
28
+ from .stable_diffusion.mlx import MLXDiffusionModel
25
29
 
26
30
  logger = logging.getLogger(__name__)
27
31
 
@@ -45,6 +49,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
45
49
  model_hub: str = "huggingface"
46
50
  model_ability: Optional[List[str]]
47
51
  controlnet: Optional[List["ImageModelFamilyV1"]]
52
+ default_model_config: Optional[dict] = {}
48
53
  default_generate_config: Optional[dict] = {}
49
54
 
50
55
 
@@ -180,6 +185,28 @@ def get_cache_status(
180
185
  return valid_model_revision(meta_path, model_spec.model_revision)
181
186
 
182
187
 
188
+ def create_ocr_model_instance(
189
+ subpool_addr: str,
190
+ devices: List[str],
191
+ model_uid: str,
192
+ model_spec: ImageModelFamilyV1,
193
+ model_path: Optional[str] = None,
194
+ **kwargs,
195
+ ) -> Tuple[GotOCR2Model, ImageModelDescription]:
196
+ if not model_path:
197
+ model_path = cache(model_spec)
198
+ model = GotOCR2Model(
199
+ model_uid,
200
+ model_path,
201
+ model_spec=model_spec,
202
+ **kwargs,
203
+ )
204
+ model_description = ImageModelDescription(
205
+ subpool_addr, devices, model_spec, model_path=model_path
206
+ )
207
+ return model, model_description
208
+
209
+
183
210
  def create_image_model_instance(
184
211
  subpool_addr: str,
185
212
  devices: List[str],
@@ -189,8 +216,26 @@ def create_image_model_instance(
189
216
  download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
190
217
  model_path: Optional[str] = None,
191
218
  **kwargs,
192
- ) -> Tuple[DiffusionModel, ImageModelDescription]:
219
+ ) -> Tuple[
220
+ Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model], ImageModelDescription
221
+ ]:
193
222
  model_spec = match_diffusion(model_name, download_hub)
223
+ if model_spec.model_ability and "ocr" in model_spec.model_ability:
224
+ return create_ocr_model_instance(
225
+ subpool_addr=subpool_addr,
226
+ devices=devices,
227
+ model_uid=model_uid,
228
+ model_name=model_name,
229
+ model_spec=model_spec,
230
+ model_path=model_path,
231
+ **kwargs,
232
+ )
233
+
234
+ # use default model config
235
+ model_default_config = (model_spec.default_model_config or {}).copy()
236
+ model_default_config.update(kwargs)
237
+ kwargs = model_default_config
238
+
194
239
  controlnet = kwargs.get("controlnet")
195
240
  # Handle controlnet
196
241
  if controlnet is not None:
@@ -232,10 +277,20 @@ def create_image_model_instance(
232
277
  lora_load_kwargs = None
233
278
  lora_fuse_kwargs = None
234
279
 
235
- model = DiffusionModel(
280
+ if (
281
+ platform.system() == "Darwin"
282
+ and "arm" in platform.machine().lower()
283
+ and model_name in MLXDiffusionModel.supported_models
284
+ ):
285
+ # Mac with M series silicon chips
286
+ model_cls = MLXDiffusionModel
287
+ else:
288
+ model_cls = DiffusionModel # type: ignore
289
+
290
+ model = model_cls(
236
291
  model_uid,
237
292
  model_path,
238
- lora_model_paths=lora_model,
293
+ lora_model=lora_model,
239
294
  lora_load_kwargs=lora_load_kwargs,
240
295
  lora_fuse_kwargs=lora_fuse_kwargs,
241
296
  model_spec=model_spec,
@@ -8,7 +8,11 @@
8
8
  "text2image",
9
9
  "image2image",
10
10
  "inpainting"
11
- ]
11
+ ],
12
+ "default_model_config": {
13
+ "quantize": true,
14
+ "quantize_text_encoder": "text_encoder_2"
15
+ }
12
16
  },
13
17
  {
14
18
  "model_name": "FLUX.1-dev",
@@ -19,7 +23,11 @@
19
23
  "text2image",
20
24
  "image2image",
21
25
  "inpainting"
22
- ]
26
+ ],
27
+ "default_model_config": {
28
+ "quantize": true,
29
+ "quantize_text_encoder": "text_encoder_2"
30
+ }
23
31
  },
24
32
  {
25
33
  "model_name": "sd3-medium",
@@ -30,7 +38,11 @@
30
38
  "text2image",
31
39
  "image2image",
32
40
  "inpainting"
33
- ]
41
+ ],
42
+ "default_model_config": {
43
+ "quantize": true,
44
+ "quantize_text_encoder": "text_encoder_3"
45
+ }
34
46
  },
35
47
  {
36
48
  "model_name": "sd-turbo",
@@ -178,5 +190,14 @@
178
190
  "model_ability": [
179
191
  "inpainting"
180
192
  ]
193
+ },
194
+ {
195
+ "model_name": "GOT-OCR2_0",
196
+ "model_family": "ocr",
197
+ "model_id": "stepfun-ai/GOT-OCR2_0",
198
+ "model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
199
+ "model_ability": [
200
+ "ocr"
201
+ ]
181
202
  }
182
203
  ]
@@ -9,7 +9,11 @@
9
9
  "text2image",
10
10
  "image2image",
11
11
  "inpainting"
12
- ]
12
+ ],
13
+ "default_model_config": {
14
+ "quantize": true,
15
+ "quantize_text_encoder": "text_encoder_2"
16
+ }
13
17
  },
14
18
  {
15
19
  "model_name": "FLUX.1-dev",
@@ -21,7 +25,11 @@
21
25
  "text2image",
22
26
  "image2image",
23
27
  "inpainting"
24
- ]
28
+ ],
29
+ "default_model_config": {
30
+ "quantize": true,
31
+ "quantize_text_encoder": "text_encoder_2"
32
+ }
25
33
  },
26
34
  {
27
35
  "model_name": "sd3-medium",
@@ -33,7 +41,11 @@
33
41
  "text2image",
34
42
  "image2image",
35
43
  "inpainting"
36
- ]
44
+ ],
45
+ "default_model_config": {
46
+ "quantize": true,
47
+ "quantize_text_encoder": "text_encoder_3"
48
+ }
37
49
  },
38
50
  {
39
51
  "model_name": "sd-turbo",
@@ -148,5 +160,15 @@
148
160
  "model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
149
161
  }
150
162
  ]
163
+ },
164
+ {
165
+ "model_name": "GOT-OCR2_0",
166
+ "model_family": "ocr",
167
+ "model_id": "stepfun-ai/GOT-OCR2_0",
168
+ "model_revision": "master",
169
+ "model_hub": "modelscope",
170
+ "model_ability": [
171
+ "ocr"
172
+ ]
151
173
  }
152
174
  ]
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,76 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import TYPE_CHECKING, Optional
17
+
18
+ import PIL.Image
19
+
20
+ if TYPE_CHECKING:
21
+ from ..core import ImageModelFamilyV1
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class GotOCR2Model:
27
+ def __init__(
28
+ self,
29
+ model_uid: str,
30
+ model_path: Optional[str] = None,
31
+ device: Optional[str] = None,
32
+ model_spec: Optional["ImageModelFamilyV1"] = None,
33
+ **kwargs,
34
+ ):
35
+ self._model_uid = model_uid
36
+ self._model_path = model_path
37
+ self._device = device
38
+ # model info when loading
39
+ self._model = None
40
+ self._tokenizer = None
41
+ # info
42
+ self._model_spec = model_spec
43
+ self._abilities = model_spec.model_ability or [] # type: ignore
44
+ self._kwargs = kwargs
45
+
46
+ @property
47
+ def model_ability(self):
48
+ return self._abilities
49
+
50
+ def load(self):
51
+ from transformers import AutoModel, AutoTokenizer
52
+
53
+ self._tokenizer = AutoTokenizer.from_pretrained(
54
+ self._model_path, trust_remote_code=True
55
+ )
56
+ model = AutoModel.from_pretrained(
57
+ self._model_path,
58
+ trust_remote_code=True,
59
+ low_cpu_mem_usage=True,
60
+ device_map="cuda",
61
+ use_safetensors=True,
62
+ pad_token_id=self._tokenizer.eos_token_id,
63
+ )
64
+ self._model = model.eval().cuda()
65
+
66
+ def ocr(
67
+ self,
68
+ image: PIL.Image,
69
+ **kwargs,
70
+ ):
71
+ logger.info("Got OCR 2.0 kwargs: %s", kwargs)
72
+ if "ocr_type" not in kwargs:
73
+ kwargs["ocr_type"] = "ocr"
74
+ assert self._model is not None
75
+ # This chat API limits the max new tokens inside.
76
+ return self._model.chat(self._tokenizer, image, gradio_input=True, **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.