xinference 0.15.4__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +0 -4
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +48 -0
- xinference/client/restful/restful_client.py +19 -0
- xinference/constants.py +4 -4
- xinference/core/chat_interface.py +5 -1
- xinference/core/image_interface.py +5 -1
- xinference/core/model.py +195 -34
- xinference/core/scheduler.py +10 -7
- xinference/core/utils.py +9 -0
- xinference/model/__init__.py +4 -0
- xinference/model/audio/chattts.py +25 -14
- xinference/model/audio/model_spec.json +1 -1
- xinference/model/audio/model_spec_modelscope.json +1 -1
- xinference/model/embedding/model_spec.json +1 -1
- xinference/model/image/core.py +59 -4
- xinference/model/image/model_spec.json +24 -3
- xinference/model/image/model_spec_modelscope.json +25 -3
- xinference/model/image/ocr/__init__.py +13 -0
- xinference/model/image/ocr/got_ocr2.py +76 -0
- xinference/model/image/scheduler/__init__.py +13 -0
- xinference/model/image/scheduler/flux.py +533 -0
- xinference/model/image/stable_diffusion/core.py +8 -34
- xinference/model/image/stable_diffusion/mlx.py +221 -0
- xinference/model/image/utils.py +39 -3
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/llm_family.json +178 -1
- xinference/model/llm/llm_family_modelscope.json +119 -0
- xinference/model/llm/transformers/chatglm.py +104 -0
- xinference/model/llm/transformers/core.py +37 -111
- xinference/model/llm/transformers/deepseek_v2.py +0 -226
- xinference/model/llm/transformers/internlm2.py +3 -95
- xinference/model/llm/transformers/opt.py +68 -0
- xinference/model/llm/transformers/utils.py +4 -284
- xinference/model/llm/utils.py +2 -2
- xinference/model/llm/vllm/core.py +16 -1
- xinference/thirdparty/mlx/__init__.py +13 -0
- xinference/thirdparty/mlx/flux/__init__.py +15 -0
- xinference/thirdparty/mlx/flux/autoencoder.py +357 -0
- xinference/thirdparty/mlx/flux/clip.py +154 -0
- xinference/thirdparty/mlx/flux/datasets.py +75 -0
- xinference/thirdparty/mlx/flux/flux.py +247 -0
- xinference/thirdparty/mlx/flux/layers.py +302 -0
- xinference/thirdparty/mlx/flux/lora.py +76 -0
- xinference/thirdparty/mlx/flux/model.py +134 -0
- xinference/thirdparty/mlx/flux/sampler.py +56 -0
- xinference/thirdparty/mlx/flux/t5.py +244 -0
- xinference/thirdparty/mlx/flux/tokenizers.py +185 -0
- xinference/thirdparty/mlx/flux/trainer.py +98 -0
- xinference/thirdparty/mlx/flux/utils.py +179 -0
- xinference/utils.py +2 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.e51a356d.js → main.b76aeeb7.js} +3 -3
- xinference/web/ui/build/static/js/main.b76aeeb7.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/METADATA +49 -10
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/RECORD +64 -44
- xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
- /xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.b76aeeb7.js.LICENSE.txt} +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/LICENSE +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/WHEEL +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/top_level.txt +0 -0
|
@@ -54,7 +54,11 @@ class ChatTTSModel:
|
|
|
54
54
|
torch.set_float32_matmul_precision("high")
|
|
55
55
|
self._model = ChatTTS.Chat()
|
|
56
56
|
logger.info("Load ChatTTS model with kwargs: %s", self._kwargs)
|
|
57
|
-
self._model.load(
|
|
57
|
+
ok = self._model.load(
|
|
58
|
+
source="custom", custom_path=self._model_path, **self._kwargs
|
|
59
|
+
)
|
|
60
|
+
if not ok:
|
|
61
|
+
raise Exception(f"The ChatTTS model is not correct: {self._model_path}")
|
|
58
62
|
|
|
59
63
|
def speech(
|
|
60
64
|
self,
|
|
@@ -114,16 +118,15 @@ class ChatTTSModel:
|
|
|
114
118
|
last_pos = 0
|
|
115
119
|
with writer.open():
|
|
116
120
|
for it in iter:
|
|
117
|
-
for
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
last_pos = new_last_pos
|
|
121
|
+
for chunk in it:
|
|
122
|
+
chunk = np.array([chunk]).transpose()
|
|
123
|
+
writer.write_audio_chunk(i, torch.from_numpy(chunk))
|
|
124
|
+
new_last_pos = out.tell()
|
|
125
|
+
if new_last_pos != last_pos:
|
|
126
|
+
out.seek(last_pos)
|
|
127
|
+
encoded_bytes = out.read()
|
|
128
|
+
yield encoded_bytes
|
|
129
|
+
last_pos = new_last_pos
|
|
127
130
|
|
|
128
131
|
return _generator()
|
|
129
132
|
else:
|
|
@@ -131,7 +134,15 @@ class ChatTTSModel:
|
|
|
131
134
|
|
|
132
135
|
# Save the generated audio
|
|
133
136
|
with BytesIO() as out:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
+
try:
|
|
138
|
+
torchaudio.save(
|
|
139
|
+
out,
|
|
140
|
+
torch.from_numpy(wavs[0]).unsqueeze(0),
|
|
141
|
+
24000,
|
|
142
|
+
format=response_format,
|
|
143
|
+
)
|
|
144
|
+
except:
|
|
145
|
+
torchaudio.save(
|
|
146
|
+
out, torch.from_numpy(wavs[0]), 24000, format=response_format
|
|
147
|
+
)
|
|
137
148
|
return out.getvalue()
|
|
@@ -127,7 +127,7 @@
|
|
|
127
127
|
"model_name": "ChatTTS",
|
|
128
128
|
"model_family": "ChatTTS",
|
|
129
129
|
"model_id": "2Noise/ChatTTS",
|
|
130
|
-
"model_revision": "
|
|
130
|
+
"model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
|
|
131
131
|
"model_ability": "text-to-audio",
|
|
132
132
|
"multilingual": true
|
|
133
133
|
},
|
xinference/model/image/core.py
CHANGED
|
@@ -11,17 +11,21 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import collections.abc
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
18
|
+
import platform
|
|
17
19
|
from collections import defaultdict
|
|
18
|
-
from typing import Dict, List, Literal, Optional, Tuple
|
|
20
|
+
from typing import Dict, List, Literal, Optional, Tuple, Union
|
|
19
21
|
|
|
20
22
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
21
23
|
from ...types import PeftModelConfig
|
|
22
24
|
from ..core import CacheableModelSpec, ModelDescription
|
|
23
25
|
from ..utils import valid_model_revision
|
|
26
|
+
from .ocr.got_ocr2 import GotOCR2Model
|
|
24
27
|
from .stable_diffusion.core import DiffusionModel
|
|
28
|
+
from .stable_diffusion.mlx import MLXDiffusionModel
|
|
25
29
|
|
|
26
30
|
logger = logging.getLogger(__name__)
|
|
27
31
|
|
|
@@ -45,6 +49,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
|
|
|
45
49
|
model_hub: str = "huggingface"
|
|
46
50
|
model_ability: Optional[List[str]]
|
|
47
51
|
controlnet: Optional[List["ImageModelFamilyV1"]]
|
|
52
|
+
default_model_config: Optional[dict] = {}
|
|
48
53
|
default_generate_config: Optional[dict] = {}
|
|
49
54
|
|
|
50
55
|
|
|
@@ -180,6 +185,28 @@ def get_cache_status(
|
|
|
180
185
|
return valid_model_revision(meta_path, model_spec.model_revision)
|
|
181
186
|
|
|
182
187
|
|
|
188
|
+
def create_ocr_model_instance(
|
|
189
|
+
subpool_addr: str,
|
|
190
|
+
devices: List[str],
|
|
191
|
+
model_uid: str,
|
|
192
|
+
model_spec: ImageModelFamilyV1,
|
|
193
|
+
model_path: Optional[str] = None,
|
|
194
|
+
**kwargs,
|
|
195
|
+
) -> Tuple[GotOCR2Model, ImageModelDescription]:
|
|
196
|
+
if not model_path:
|
|
197
|
+
model_path = cache(model_spec)
|
|
198
|
+
model = GotOCR2Model(
|
|
199
|
+
model_uid,
|
|
200
|
+
model_path,
|
|
201
|
+
model_spec=model_spec,
|
|
202
|
+
**kwargs,
|
|
203
|
+
)
|
|
204
|
+
model_description = ImageModelDescription(
|
|
205
|
+
subpool_addr, devices, model_spec, model_path=model_path
|
|
206
|
+
)
|
|
207
|
+
return model, model_description
|
|
208
|
+
|
|
209
|
+
|
|
183
210
|
def create_image_model_instance(
|
|
184
211
|
subpool_addr: str,
|
|
185
212
|
devices: List[str],
|
|
@@ -189,8 +216,26 @@ def create_image_model_instance(
|
|
|
189
216
|
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
190
217
|
model_path: Optional[str] = None,
|
|
191
218
|
**kwargs,
|
|
192
|
-
) -> Tuple[
|
|
219
|
+
) -> Tuple[
|
|
220
|
+
Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model], ImageModelDescription
|
|
221
|
+
]:
|
|
193
222
|
model_spec = match_diffusion(model_name, download_hub)
|
|
223
|
+
if model_spec.model_ability and "ocr" in model_spec.model_ability:
|
|
224
|
+
return create_ocr_model_instance(
|
|
225
|
+
subpool_addr=subpool_addr,
|
|
226
|
+
devices=devices,
|
|
227
|
+
model_uid=model_uid,
|
|
228
|
+
model_name=model_name,
|
|
229
|
+
model_spec=model_spec,
|
|
230
|
+
model_path=model_path,
|
|
231
|
+
**kwargs,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# use default model config
|
|
235
|
+
model_default_config = (model_spec.default_model_config or {}).copy()
|
|
236
|
+
model_default_config.update(kwargs)
|
|
237
|
+
kwargs = model_default_config
|
|
238
|
+
|
|
194
239
|
controlnet = kwargs.get("controlnet")
|
|
195
240
|
# Handle controlnet
|
|
196
241
|
if controlnet is not None:
|
|
@@ -232,10 +277,20 @@ def create_image_model_instance(
|
|
|
232
277
|
lora_load_kwargs = None
|
|
233
278
|
lora_fuse_kwargs = None
|
|
234
279
|
|
|
235
|
-
|
|
280
|
+
if (
|
|
281
|
+
platform.system() == "Darwin"
|
|
282
|
+
and "arm" in platform.machine().lower()
|
|
283
|
+
and model_name in MLXDiffusionModel.supported_models
|
|
284
|
+
):
|
|
285
|
+
# Mac with M series silicon chips
|
|
286
|
+
model_cls = MLXDiffusionModel
|
|
287
|
+
else:
|
|
288
|
+
model_cls = DiffusionModel # type: ignore
|
|
289
|
+
|
|
290
|
+
model = model_cls(
|
|
236
291
|
model_uid,
|
|
237
292
|
model_path,
|
|
238
|
-
|
|
293
|
+
lora_model=lora_model,
|
|
239
294
|
lora_load_kwargs=lora_load_kwargs,
|
|
240
295
|
lora_fuse_kwargs=lora_fuse_kwargs,
|
|
241
296
|
model_spec=model_spec,
|
|
@@ -8,7 +8,11 @@
|
|
|
8
8
|
"text2image",
|
|
9
9
|
"image2image",
|
|
10
10
|
"inpainting"
|
|
11
|
-
]
|
|
11
|
+
],
|
|
12
|
+
"default_model_config": {
|
|
13
|
+
"quantize": true,
|
|
14
|
+
"quantize_text_encoder": "text_encoder_2"
|
|
15
|
+
}
|
|
12
16
|
},
|
|
13
17
|
{
|
|
14
18
|
"model_name": "FLUX.1-dev",
|
|
@@ -19,7 +23,11 @@
|
|
|
19
23
|
"text2image",
|
|
20
24
|
"image2image",
|
|
21
25
|
"inpainting"
|
|
22
|
-
]
|
|
26
|
+
],
|
|
27
|
+
"default_model_config": {
|
|
28
|
+
"quantize": true,
|
|
29
|
+
"quantize_text_encoder": "text_encoder_2"
|
|
30
|
+
}
|
|
23
31
|
},
|
|
24
32
|
{
|
|
25
33
|
"model_name": "sd3-medium",
|
|
@@ -30,7 +38,11 @@
|
|
|
30
38
|
"text2image",
|
|
31
39
|
"image2image",
|
|
32
40
|
"inpainting"
|
|
33
|
-
]
|
|
41
|
+
],
|
|
42
|
+
"default_model_config": {
|
|
43
|
+
"quantize": true,
|
|
44
|
+
"quantize_text_encoder": "text_encoder_3"
|
|
45
|
+
}
|
|
34
46
|
},
|
|
35
47
|
{
|
|
36
48
|
"model_name": "sd-turbo",
|
|
@@ -178,5 +190,14 @@
|
|
|
178
190
|
"model_ability": [
|
|
179
191
|
"inpainting"
|
|
180
192
|
]
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"model_name": "GOT-OCR2_0",
|
|
196
|
+
"model_family": "ocr",
|
|
197
|
+
"model_id": "stepfun-ai/GOT-OCR2_0",
|
|
198
|
+
"model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
|
|
199
|
+
"model_ability": [
|
|
200
|
+
"ocr"
|
|
201
|
+
]
|
|
181
202
|
}
|
|
182
203
|
]
|
|
@@ -9,7 +9,11 @@
|
|
|
9
9
|
"text2image",
|
|
10
10
|
"image2image",
|
|
11
11
|
"inpainting"
|
|
12
|
-
]
|
|
12
|
+
],
|
|
13
|
+
"default_model_config": {
|
|
14
|
+
"quantize": true,
|
|
15
|
+
"quantize_text_encoder": "text_encoder_2"
|
|
16
|
+
}
|
|
13
17
|
},
|
|
14
18
|
{
|
|
15
19
|
"model_name": "FLUX.1-dev",
|
|
@@ -21,7 +25,11 @@
|
|
|
21
25
|
"text2image",
|
|
22
26
|
"image2image",
|
|
23
27
|
"inpainting"
|
|
24
|
-
]
|
|
28
|
+
],
|
|
29
|
+
"default_model_config": {
|
|
30
|
+
"quantize": true,
|
|
31
|
+
"quantize_text_encoder": "text_encoder_2"
|
|
32
|
+
}
|
|
25
33
|
},
|
|
26
34
|
{
|
|
27
35
|
"model_name": "sd3-medium",
|
|
@@ -33,7 +41,11 @@
|
|
|
33
41
|
"text2image",
|
|
34
42
|
"image2image",
|
|
35
43
|
"inpainting"
|
|
36
|
-
]
|
|
44
|
+
],
|
|
45
|
+
"default_model_config": {
|
|
46
|
+
"quantize": true,
|
|
47
|
+
"quantize_text_encoder": "text_encoder_3"
|
|
48
|
+
}
|
|
37
49
|
},
|
|
38
50
|
{
|
|
39
51
|
"model_name": "sd-turbo",
|
|
@@ -148,5 +160,15 @@
|
|
|
148
160
|
"model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
|
|
149
161
|
}
|
|
150
162
|
]
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"model_name": "GOT-OCR2_0",
|
|
166
|
+
"model_family": "ocr",
|
|
167
|
+
"model_id": "stepfun-ai/GOT-OCR2_0",
|
|
168
|
+
"model_revision": "master",
|
|
169
|
+
"model_hub": "modelscope",
|
|
170
|
+
"model_ability": [
|
|
171
|
+
"ocr"
|
|
172
|
+
]
|
|
151
173
|
}
|
|
152
174
|
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import TYPE_CHECKING, Optional
|
|
17
|
+
|
|
18
|
+
import PIL.Image
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from ..core import ImageModelFamilyV1
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GotOCR2Model:
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
model_uid: str,
|
|
30
|
+
model_path: Optional[str] = None,
|
|
31
|
+
device: Optional[str] = None,
|
|
32
|
+
model_spec: Optional["ImageModelFamilyV1"] = None,
|
|
33
|
+
**kwargs,
|
|
34
|
+
):
|
|
35
|
+
self._model_uid = model_uid
|
|
36
|
+
self._model_path = model_path
|
|
37
|
+
self._device = device
|
|
38
|
+
# model info when loading
|
|
39
|
+
self._model = None
|
|
40
|
+
self._tokenizer = None
|
|
41
|
+
# info
|
|
42
|
+
self._model_spec = model_spec
|
|
43
|
+
self._abilities = model_spec.model_ability or [] # type: ignore
|
|
44
|
+
self._kwargs = kwargs
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def model_ability(self):
|
|
48
|
+
return self._abilities
|
|
49
|
+
|
|
50
|
+
def load(self):
|
|
51
|
+
from transformers import AutoModel, AutoTokenizer
|
|
52
|
+
|
|
53
|
+
self._tokenizer = AutoTokenizer.from_pretrained(
|
|
54
|
+
self._model_path, trust_remote_code=True
|
|
55
|
+
)
|
|
56
|
+
model = AutoModel.from_pretrained(
|
|
57
|
+
self._model_path,
|
|
58
|
+
trust_remote_code=True,
|
|
59
|
+
low_cpu_mem_usage=True,
|
|
60
|
+
device_map="cuda",
|
|
61
|
+
use_safetensors=True,
|
|
62
|
+
pad_token_id=self._tokenizer.eos_token_id,
|
|
63
|
+
)
|
|
64
|
+
self._model = model.eval().cuda()
|
|
65
|
+
|
|
66
|
+
def ocr(
|
|
67
|
+
self,
|
|
68
|
+
image: PIL.Image,
|
|
69
|
+
**kwargs,
|
|
70
|
+
):
|
|
71
|
+
logger.info("Got OCR 2.0 kwargs: %s", kwargs)
|
|
72
|
+
if "ocr_type" not in kwargs:
|
|
73
|
+
kwargs["ocr_type"] = "ocr"
|
|
74
|
+
assert self._model is not None
|
|
75
|
+
# This chat API limits the max new tokens inside.
|
|
76
|
+
return self._model.chat(self._tokenizer, image, gradio_input=True, **kwargs)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|