xinference 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +8 -0
- xinference/_version.py +3 -3
- xinference/api/oauth2/utils.py +26 -5
- xinference/core/model.py +1 -10
- xinference/device_utils.py +11 -1
- xinference/model/embedding/model_spec.json +70 -0
- xinference/model/image/core.py +20 -10
- xinference/model/image/model_spec.json +55 -3
- xinference/model/image/ocr/__init__.py +5 -0
- xinference/model/image/ocr/deepseek_ocr.py +958 -0
- xinference/model/llm/core.py +2 -0
- xinference/model/llm/llama_cpp/core.py +2 -0
- xinference/model/llm/llm_family.json +319 -6
- xinference/model/llm/lmdeploy/core.py +2 -0
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/core.py +22 -36
- xinference/model/llm/transformers/multimodal/qwen-omni.py +60 -11
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
- xinference/model/llm/transformers/utils.py +0 -20
- xinference/model/llm/vllm/core.py +2 -0
- xinference/model/rerank/model_spec.json +368 -252
- xinference/model/rerank/sentence_transformers/core.py +10 -2
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +71 -5
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +51 -1
- xinference/ui/gradio/media_interface.py +469 -4
- xinference/ui/gradio/utils/__init__.py +19 -0
- xinference/ui/gradio/utils/latex.py +342 -0
- xinference/ui/web/ui/build/asset-manifest.json +3 -3
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/js/{main.45e78536.js → main.87d6859b.js} +3 -3
- xinference/ui/web/ui/build/static/js/main.87d6859b.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/412a6b414a8267c7a349d9beda4593cdf218abf32edaaf339e6a230df40397b8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e6770a05771952175c9fbf48fce283c9bb1bc8b5763e39edc36d099d1fe16b4a.json +1 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/METADATA +11 -11
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/RECORD +40 -37
- xinference/ui/web/ui/build/static/js/main.45e78536.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +0 -1
- /xinference/ui/web/ui/build/static/js/{main.45e78536.js.LICENSE.txt → main.87d6859b.js.LICENSE.txt} +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/WHEEL +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/top_level.txt +0 -0
xinference/__init__.py
CHANGED
|
@@ -12,6 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
# Configure MPS memory management to avoid "invalid low watermark ratio" error in PyTorch 3.13+
|
|
18
|
+
if os.environ.get("PYTORCH_MPS_HIGH_WATERMARK_RATIO") is None:
|
|
19
|
+
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "1.0"
|
|
20
|
+
if os.environ.get("PYTORCH_MPS_LOW_WATERMARK_RATIO") is None:
|
|
21
|
+
os.environ["PYTORCH_MPS_LOW_WATERMARK_RATIO"] = "0.2"
|
|
22
|
+
|
|
15
23
|
from . import _version
|
|
16
24
|
|
|
17
25
|
__version__ = _version.get_versions()["version"]
|
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-11-02T20:34:18+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "117ba29e07a77a7534496f18b9ced1d567c0673f",
|
|
15
|
+
"version": "1.12.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/oauth2/utils.py
CHANGED
|
@@ -14,10 +14,8 @@
|
|
|
14
14
|
from datetime import datetime, timedelta
|
|
15
15
|
from typing import Union
|
|
16
16
|
|
|
17
|
+
import bcrypt
|
|
17
18
|
from jose import jwt
|
|
18
|
-
from passlib.context import CryptContext
|
|
19
|
-
|
|
20
|
-
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
21
19
|
|
|
22
20
|
|
|
23
21
|
def create_access_token(
|
|
@@ -37,8 +35,31 @@ def create_access_token(
|
|
|
37
35
|
|
|
38
36
|
|
|
39
37
|
def verify_password(plain_password, hashed_password):
|
|
40
|
-
|
|
38
|
+
if isinstance(plain_password, str):
|
|
39
|
+
plain_password = plain_password.encode("utf-8")
|
|
40
|
+
if isinstance(hashed_password, str):
|
|
41
|
+
hashed_password = hashed_password.encode("utf-8")
|
|
42
|
+
|
|
43
|
+
if len(plain_password) > 72:
|
|
44
|
+
import hashlib
|
|
45
|
+
|
|
46
|
+
password_hash = hashlib.sha256(plain_password).digest()
|
|
47
|
+
plain_password = password_hash[:72]
|
|
48
|
+
|
|
49
|
+
return bcrypt.checkpw(plain_password, hashed_password)
|
|
41
50
|
|
|
42
51
|
|
|
43
52
|
def get_password_hash(password):
|
|
44
|
-
|
|
53
|
+
if isinstance(password, str):
|
|
54
|
+
password = password.encode("utf-8")
|
|
55
|
+
|
|
56
|
+
if len(password) > 72:
|
|
57
|
+
import hashlib
|
|
58
|
+
|
|
59
|
+
password_hash = hashlib.sha256(password).digest()
|
|
60
|
+
password = password_hash[:72]
|
|
61
|
+
|
|
62
|
+
salt = bcrypt.gensalt()
|
|
63
|
+
hashed = bcrypt.hashpw(password, salt)
|
|
64
|
+
|
|
65
|
+
return hashed.decode("utf-8")
|
xinference/core/model.py
CHANGED
|
@@ -206,10 +206,6 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
206
206
|
):
|
|
207
207
|
super().__init__()
|
|
208
208
|
|
|
209
|
-
from ..model.llm.llama_cpp.core import XllamaCppModel
|
|
210
|
-
from ..model.llm.lmdeploy.core import LMDeployModel
|
|
211
|
-
from ..model.llm.sglang.core import SGLANGModel
|
|
212
|
-
from ..model.llm.transformers.core import PytorchModel
|
|
213
209
|
from ..model.llm.vllm.core import VLLMModel
|
|
214
210
|
|
|
215
211
|
self._supervisor_address = supervisor_address
|
|
@@ -223,12 +219,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
223
219
|
self._pending_requests: asyncio.Queue = asyncio.Queue()
|
|
224
220
|
self._handle_pending_requests_task = None
|
|
225
221
|
self._lock = (
|
|
226
|
-
None
|
|
227
|
-
if isinstance(
|
|
228
|
-
self._model,
|
|
229
|
-
(PytorchModel, VLLMModel, SGLANGModel, LMDeployModel, XllamaCppModel),
|
|
230
|
-
)
|
|
231
|
-
else asyncio.locks.Lock()
|
|
222
|
+
None if getattr(self._model, "allow_batch", False) else asyncio.locks.Lock()
|
|
232
223
|
)
|
|
233
224
|
self._worker_ref = None
|
|
234
225
|
self._progress_tracker_ref = None
|
xinference/device_utils.py
CHANGED
|
@@ -108,7 +108,17 @@ def empty_cache():
|
|
|
108
108
|
if torch.cuda.is_available():
|
|
109
109
|
torch.cuda.empty_cache()
|
|
110
110
|
if torch.backends.mps.is_available():
|
|
111
|
-
|
|
111
|
+
try:
|
|
112
|
+
torch.mps.empty_cache()
|
|
113
|
+
except RuntimeError as e:
|
|
114
|
+
# Handle known MPS memory management issues in PyTorch 3.13+
|
|
115
|
+
if "invalid low watermark ratio" in str(e):
|
|
116
|
+
# This is a known issue with PyTorch 3.13+ on macOS.
|
|
117
|
+
# We can safely ignore this error as it doesn't affect functionality.
|
|
118
|
+
pass
|
|
119
|
+
else:
|
|
120
|
+
# Re-raise other RuntimeErrors
|
|
121
|
+
raise
|
|
112
122
|
if is_xpu_available():
|
|
113
123
|
torch.xpu.empty_cache()
|
|
114
124
|
if is_npu_available():
|
|
@@ -1190,5 +1190,75 @@
|
|
|
1190
1190
|
],
|
|
1191
1191
|
"no_build_isolation": true
|
|
1192
1192
|
}
|
|
1193
|
+
},
|
|
1194
|
+
{
|
|
1195
|
+
"version": 2,
|
|
1196
|
+
"model_name": "gme-Qwen2-VL-2B-Instruct",
|
|
1197
|
+
"dimensions": 1536,
|
|
1198
|
+
"max_tokens": 32768,
|
|
1199
|
+
"language": [
|
|
1200
|
+
"en",
|
|
1201
|
+
"zh"
|
|
1202
|
+
],
|
|
1203
|
+
"virtualenv": {
|
|
1204
|
+
"packages": [
|
|
1205
|
+
"sentence_transformers",
|
|
1206
|
+
"transformers==4.51.3"
|
|
1207
|
+
]
|
|
1208
|
+
},
|
|
1209
|
+
"model_specs": [
|
|
1210
|
+
{
|
|
1211
|
+
"model_format": "pytorch",
|
|
1212
|
+
"model_src": {
|
|
1213
|
+
"huggingface": {
|
|
1214
|
+
"model_id": "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
|
|
1215
|
+
"quantizations": [
|
|
1216
|
+
"none"
|
|
1217
|
+
]
|
|
1218
|
+
},
|
|
1219
|
+
"modelscope": {
|
|
1220
|
+
"model_id": "iic/gme-Qwen2-VL-2B-Instruct",
|
|
1221
|
+
"quantizations": [
|
|
1222
|
+
"none"
|
|
1223
|
+
]
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
]
|
|
1228
|
+
},
|
|
1229
|
+
{
|
|
1230
|
+
"version": 2,
|
|
1231
|
+
"model_name": "gme-Qwen2-VL-7B-Instruct",
|
|
1232
|
+
"dimensions": 3584,
|
|
1233
|
+
"max_tokens": 32768,
|
|
1234
|
+
"language": [
|
|
1235
|
+
"en",
|
|
1236
|
+
"zh"
|
|
1237
|
+
],
|
|
1238
|
+
"virtualenv": {
|
|
1239
|
+
"packages": [
|
|
1240
|
+
"sentence_transformers",
|
|
1241
|
+
"transformers==4.51.3"
|
|
1242
|
+
]
|
|
1243
|
+
},
|
|
1244
|
+
"model_specs": [
|
|
1245
|
+
{
|
|
1246
|
+
"model_format": "pytorch",
|
|
1247
|
+
"model_src": {
|
|
1248
|
+
"huggingface": {
|
|
1249
|
+
"model_id": "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
|
|
1250
|
+
"quantizations": [
|
|
1251
|
+
"none"
|
|
1252
|
+
]
|
|
1253
|
+
},
|
|
1254
|
+
"modelscope": {
|
|
1255
|
+
"model_id": "iic/gme-Qwen2-VL-7B-Instruct",
|
|
1256
|
+
"quantizations": [
|
|
1257
|
+
"none"
|
|
1258
|
+
]
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
]
|
|
1193
1263
|
}
|
|
1194
1264
|
]
|
xinference/model/image/core.py
CHANGED
|
@@ -21,6 +21,7 @@ from typing import Dict, List, Literal, Optional, Union
|
|
|
21
21
|
from ...types import PeftModelConfig
|
|
22
22
|
from ..core import CacheableModelSpec, VirtualEnvSettings
|
|
23
23
|
from ..utils import ModelInstanceInfoMixin
|
|
24
|
+
from .ocr.deepseek_ocr import DeepSeekOCRModel
|
|
24
25
|
from .ocr.got_ocr2 import GotOCR2Model
|
|
25
26
|
from .stable_diffusion.core import DiffusionModel
|
|
26
27
|
from .stable_diffusion.mlx import MLXDiffusionModel
|
|
@@ -159,19 +160,29 @@ def create_ocr_model_instance(
|
|
|
159
160
|
model_spec: ImageModelFamilyV2,
|
|
160
161
|
model_path: Optional[str] = None,
|
|
161
162
|
**kwargs,
|
|
162
|
-
) -> GotOCR2Model:
|
|
163
|
+
) -> Union[DeepSeekOCRModel, GotOCR2Model]:
|
|
163
164
|
from .cache_manager import ImageCacheManager
|
|
164
165
|
|
|
165
166
|
if not model_path:
|
|
166
167
|
cache_manager = ImageCacheManager(model_spec)
|
|
167
168
|
model_path = cache_manager.cache()
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
169
|
+
|
|
170
|
+
# Choose OCR model based on model_name
|
|
171
|
+
if model_spec.model_name == "DeepSeek-OCR":
|
|
172
|
+
return DeepSeekOCRModel(
|
|
173
|
+
model_uid,
|
|
174
|
+
model_path,
|
|
175
|
+
model_spec=model_spec,
|
|
176
|
+
**kwargs,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
# Default to GOT-OCR2 for other OCR models
|
|
180
|
+
return GotOCR2Model(
|
|
181
|
+
model_uid,
|
|
182
|
+
model_path,
|
|
183
|
+
model_spec=model_spec,
|
|
184
|
+
**kwargs,
|
|
185
|
+
)
|
|
175
186
|
|
|
176
187
|
|
|
177
188
|
def create_image_model_instance(
|
|
@@ -187,14 +198,13 @@ def create_image_model_instance(
|
|
|
187
198
|
lightning_version: Optional[str] = None,
|
|
188
199
|
lightning_model_path: Optional[str] = None,
|
|
189
200
|
**kwargs,
|
|
190
|
-
) -> Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model]:
|
|
201
|
+
) -> Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model, DeepSeekOCRModel]:
|
|
191
202
|
from .cache_manager import ImageCacheManager
|
|
192
203
|
|
|
193
204
|
model_spec = match_diffusion(model_name, download_hub)
|
|
194
205
|
if model_spec.model_ability and "ocr" in model_spec.model_ability:
|
|
195
206
|
return create_ocr_model_instance(
|
|
196
207
|
model_uid=model_uid,
|
|
197
|
-
model_name=model_name,
|
|
198
208
|
model_spec=model_spec,
|
|
199
209
|
model_path=model_path,
|
|
200
210
|
**kwargs,
|
|
@@ -734,7 +734,7 @@
|
|
|
734
734
|
],
|
|
735
735
|
"virtualenv": {
|
|
736
736
|
"packages": [
|
|
737
|
-
"transformers==4.
|
|
737
|
+
"transformers==4.47.1",
|
|
738
738
|
"httpx==0.24.0",
|
|
739
739
|
"deepspeed==0.12.3",
|
|
740
740
|
"peft==0.4.0",
|
|
@@ -870,7 +870,15 @@
|
|
|
870
870
|
"Q6_K",
|
|
871
871
|
"Q8_0"
|
|
872
872
|
],
|
|
873
|
-
"gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
|
|
873
|
+
"gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf",
|
|
874
|
+
"lightning_model_id": "lightx2v/Qwen-Image-Lightning",
|
|
875
|
+
"lightning_versions": [
|
|
876
|
+
"4steps-V1.0-bf16",
|
|
877
|
+
"4steps-V1.0-fp32",
|
|
878
|
+
"8steps-V1.0-bf16",
|
|
879
|
+
"8steps-V1.0-fp32"
|
|
880
|
+
],
|
|
881
|
+
"lightning_model_file_name_template": "Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-{lightning_version}.safetensors"
|
|
874
882
|
},
|
|
875
883
|
"modelscope": {
|
|
876
884
|
"model_id": "Qwen/Qwen-Image-Edit-2509",
|
|
@@ -891,7 +899,15 @@
|
|
|
891
899
|
"Q6_K",
|
|
892
900
|
"Q8_0"
|
|
893
901
|
],
|
|
894
|
-
"gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
|
|
902
|
+
"gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf",
|
|
903
|
+
"lightning_model_id": "lightx2v/Qwen-Image-Lightning",
|
|
904
|
+
"lightning_versions": [
|
|
905
|
+
"4steps-V1.0-bf16",
|
|
906
|
+
"4steps-V1.0-fp32",
|
|
907
|
+
"8steps-V1.0-bf16",
|
|
908
|
+
"8steps-V1.0-fp32"
|
|
909
|
+
],
|
|
910
|
+
"lightning_model_file_name_template": "Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-{lightning_version}.safetensors"
|
|
895
911
|
}
|
|
896
912
|
},
|
|
897
913
|
"default_model_config": {
|
|
@@ -911,5 +927,41 @@
|
|
|
911
927
|
],
|
|
912
928
|
"no_build_isolation": true
|
|
913
929
|
}
|
|
930
|
+
},
|
|
931
|
+
{
|
|
932
|
+
"version": 2,
|
|
933
|
+
"model_name": "DeepSeek-OCR",
|
|
934
|
+
"model_family": "ocr",
|
|
935
|
+
"model_ability": [
|
|
936
|
+
"ocr"
|
|
937
|
+
],
|
|
938
|
+
"virtualenv": {
|
|
939
|
+
"packages": [
|
|
940
|
+
"torch==2.6.0",
|
|
941
|
+
"torchvision==0.21.0",
|
|
942
|
+
"torchaudio==2.6.0",
|
|
943
|
+
"transformers==4.46.3",
|
|
944
|
+
"tokenizers==0.20.3",
|
|
945
|
+
"PyMuPDF",
|
|
946
|
+
"img2pdf",
|
|
947
|
+
"einops",
|
|
948
|
+
"easydict",
|
|
949
|
+
"addict",
|
|
950
|
+
"Pillow",
|
|
951
|
+
"numpy",
|
|
952
|
+
"flash-attn==2.7.3"
|
|
953
|
+
],
|
|
954
|
+
"no_build_isolation": true
|
|
955
|
+
},
|
|
956
|
+
"model_src": {
|
|
957
|
+
"huggingface": {
|
|
958
|
+
"model_id": "deepseek-ai/DeepSeek-OCR",
|
|
959
|
+
"model_revision": "main"
|
|
960
|
+
},
|
|
961
|
+
"modelscope": {
|
|
962
|
+
"model_id": "deepseek-ai/DeepSeek-OCR",
|
|
963
|
+
"model_revision": "master"
|
|
964
|
+
}
|
|
965
|
+
}
|
|
914
966
|
}
|
|
915
967
|
]
|
|
@@ -11,3 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .deepseek_ocr import DeepSeekOCRModel
|
|
16
|
+
from .got_ocr2 import GotOCR2Model
|
|
17
|
+
|
|
18
|
+
__all__ = ["DeepSeekOCRModel", "GotOCR2Model"]
|