xinference 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show
  1. xinference/__init__.py +8 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/oauth2/utils.py +26 -5
  4. xinference/core/model.py +1 -10
  5. xinference/device_utils.py +11 -1
  6. xinference/model/embedding/model_spec.json +70 -0
  7. xinference/model/image/core.py +20 -10
  8. xinference/model/image/model_spec.json +55 -3
  9. xinference/model/image/ocr/__init__.py +5 -0
  10. xinference/model/image/ocr/deepseek_ocr.py +958 -0
  11. xinference/model/llm/core.py +2 -0
  12. xinference/model/llm/llama_cpp/core.py +2 -0
  13. xinference/model/llm/llm_family.json +319 -6
  14. xinference/model/llm/lmdeploy/core.py +2 -0
  15. xinference/model/llm/sglang/core.py +2 -0
  16. xinference/model/llm/transformers/core.py +22 -36
  17. xinference/model/llm/transformers/multimodal/qwen-omni.py +60 -11
  18. xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
  19. xinference/model/llm/transformers/utils.py +0 -20
  20. xinference/model/llm/vllm/core.py +2 -0
  21. xinference/model/rerank/model_spec.json +368 -252
  22. xinference/model/rerank/sentence_transformers/core.py +10 -2
  23. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +71 -5
  24. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +51 -1
  25. xinference/ui/gradio/media_interface.py +469 -4
  26. xinference/ui/gradio/utils/__init__.py +19 -0
  27. xinference/ui/gradio/utils/latex.py +342 -0
  28. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  29. xinference/ui/web/ui/build/index.html +1 -1
  30. xinference/ui/web/ui/build/static/js/{main.45e78536.js → main.87d6859b.js} +3 -3
  31. xinference/ui/web/ui/build/static/js/main.87d6859b.js.map +1 -0
  32. xinference/ui/web/ui/node_modules/.cache/babel-loader/412a6b414a8267c7a349d9beda4593cdf218abf32edaaf339e6a230df40397b8.json +1 -0
  33. xinference/ui/web/ui/node_modules/.cache/babel-loader/e6770a05771952175c9fbf48fce283c9bb1bc8b5763e39edc36d099d1fe16b4a.json +1 -0
  34. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/METADATA +11 -11
  35. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/RECORD +40 -37
  36. xinference/ui/web/ui/build/static/js/main.45e78536.js.map +0 -1
  37. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +0 -1
  38. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +0 -1
  39. /xinference/ui/web/ui/build/static/js/{main.45e78536.js.LICENSE.txt → main.87d6859b.js.LICENSE.txt} +0 -0
  40. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/WHEEL +0 -0
  41. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/entry_points.txt +0 -0
  42. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/licenses/LICENSE +0 -0
  43. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/top_level.txt +0 -0
xinference/__init__.py CHANGED
@@ -12,6 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import os
16
+
17
+ # Configure MPS memory management to avoid "invalid low watermark ratio" error in PyTorch 3.13+
18
+ if os.environ.get("PYTORCH_MPS_HIGH_WATERMARK_RATIO") is None:
19
+ os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "1.0"
20
+ if os.environ.get("PYTORCH_MPS_LOW_WATERMARK_RATIO") is None:
21
+ os.environ["PYTORCH_MPS_LOW_WATERMARK_RATIO"] = "0.2"
22
+
15
23
  from . import _version
16
24
 
17
25
  __version__ = _version.get_versions()["version"]
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-19T20:53:12+0800",
11
+ "date": "2025-11-02T20:34:18+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "baaa40b463e4948762b078f5995d67775df53704",
15
- "version": "1.11.0"
14
+ "full-revisionid": "117ba29e07a77a7534496f18b9ced1d567c0673f",
15
+ "version": "1.12.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -14,10 +14,8 @@
14
14
  from datetime import datetime, timedelta
15
15
  from typing import Union
16
16
 
17
+ import bcrypt
17
18
  from jose import jwt
18
- from passlib.context import CryptContext
19
-
20
- pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
21
19
 
22
20
 
23
21
  def create_access_token(
@@ -37,8 +35,31 @@ def create_access_token(
37
35
 
38
36
 
39
37
  def verify_password(plain_password, hashed_password):
40
- return pwd_context.verify(plain_password, hashed_password)
38
+ if isinstance(plain_password, str):
39
+ plain_password = plain_password.encode("utf-8")
40
+ if isinstance(hashed_password, str):
41
+ hashed_password = hashed_password.encode("utf-8")
42
+
43
+ if len(plain_password) > 72:
44
+ import hashlib
45
+
46
+ password_hash = hashlib.sha256(plain_password).digest()
47
+ plain_password = password_hash[:72]
48
+
49
+ return bcrypt.checkpw(plain_password, hashed_password)
41
50
 
42
51
 
43
52
  def get_password_hash(password):
44
- return pwd_context.hash(password)
53
+ if isinstance(password, str):
54
+ password = password.encode("utf-8")
55
+
56
+ if len(password) > 72:
57
+ import hashlib
58
+
59
+ password_hash = hashlib.sha256(password).digest()
60
+ password = password_hash[:72]
61
+
62
+ salt = bcrypt.gensalt()
63
+ hashed = bcrypt.hashpw(password, salt)
64
+
65
+ return hashed.decode("utf-8")
xinference/core/model.py CHANGED
@@ -206,10 +206,6 @@ class ModelActor(xo.StatelessActor, CancelMixin):
206
206
  ):
207
207
  super().__init__()
208
208
 
209
- from ..model.llm.llama_cpp.core import XllamaCppModel
210
- from ..model.llm.lmdeploy.core import LMDeployModel
211
- from ..model.llm.sglang.core import SGLANGModel
212
- from ..model.llm.transformers.core import PytorchModel
213
209
  from ..model.llm.vllm.core import VLLMModel
214
210
 
215
211
  self._supervisor_address = supervisor_address
@@ -223,12 +219,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
223
219
  self._pending_requests: asyncio.Queue = asyncio.Queue()
224
220
  self._handle_pending_requests_task = None
225
221
  self._lock = (
226
- None
227
- if isinstance(
228
- self._model,
229
- (PytorchModel, VLLMModel, SGLANGModel, LMDeployModel, XllamaCppModel),
230
- )
231
- else asyncio.locks.Lock()
222
+ None if getattr(self._model, "allow_batch", False) else asyncio.locks.Lock()
232
223
  )
233
224
  self._worker_ref = None
234
225
  self._progress_tracker_ref = None
@@ -108,7 +108,17 @@ def empty_cache():
108
108
  if torch.cuda.is_available():
109
109
  torch.cuda.empty_cache()
110
110
  if torch.backends.mps.is_available():
111
- torch.mps.empty_cache()
111
+ try:
112
+ torch.mps.empty_cache()
113
+ except RuntimeError as e:
114
+ # Handle known MPS memory management issues in PyTorch 3.13+
115
+ if "invalid low watermark ratio" in str(e):
116
+ # This is a known issue with PyTorch 3.13+ on macOS.
117
+ # We can safely ignore this error as it doesn't affect functionality.
118
+ pass
119
+ else:
120
+ # Re-raise other RuntimeErrors
121
+ raise
112
122
  if is_xpu_available():
113
123
  torch.xpu.empty_cache()
114
124
  if is_npu_available():
@@ -1190,5 +1190,75 @@
1190
1190
  ],
1191
1191
  "no_build_isolation": true
1192
1192
  }
1193
+ },
1194
+ {
1195
+ "version": 2,
1196
+ "model_name": "gme-Qwen2-VL-2B-Instruct",
1197
+ "dimensions": 1536,
1198
+ "max_tokens": 32768,
1199
+ "language": [
1200
+ "en",
1201
+ "zh"
1202
+ ],
1203
+ "virtualenv": {
1204
+ "packages": [
1205
+ "sentence_transformers",
1206
+ "transformers==4.51.3"
1207
+ ]
1208
+ },
1209
+ "model_specs": [
1210
+ {
1211
+ "model_format": "pytorch",
1212
+ "model_src": {
1213
+ "huggingface": {
1214
+ "model_id": "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
1215
+ "quantizations": [
1216
+ "none"
1217
+ ]
1218
+ },
1219
+ "modelscope": {
1220
+ "model_id": "iic/gme-Qwen2-VL-2B-Instruct",
1221
+ "quantizations": [
1222
+ "none"
1223
+ ]
1224
+ }
1225
+ }
1226
+ }
1227
+ ]
1228
+ },
1229
+ {
1230
+ "version": 2,
1231
+ "model_name": "gme-Qwen2-VL-7B-Instruct",
1232
+ "dimensions": 3584,
1233
+ "max_tokens": 32768,
1234
+ "language": [
1235
+ "en",
1236
+ "zh"
1237
+ ],
1238
+ "virtualenv": {
1239
+ "packages": [
1240
+ "sentence_transformers",
1241
+ "transformers==4.51.3"
1242
+ ]
1243
+ },
1244
+ "model_specs": [
1245
+ {
1246
+ "model_format": "pytorch",
1247
+ "model_src": {
1248
+ "huggingface": {
1249
+ "model_id": "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
1250
+ "quantizations": [
1251
+ "none"
1252
+ ]
1253
+ },
1254
+ "modelscope": {
1255
+ "model_id": "iic/gme-Qwen2-VL-7B-Instruct",
1256
+ "quantizations": [
1257
+ "none"
1258
+ ]
1259
+ }
1260
+ }
1261
+ }
1262
+ ]
1193
1263
  }
1194
1264
  ]
@@ -21,6 +21,7 @@ from typing import Dict, List, Literal, Optional, Union
21
21
  from ...types import PeftModelConfig
22
22
  from ..core import CacheableModelSpec, VirtualEnvSettings
23
23
  from ..utils import ModelInstanceInfoMixin
24
+ from .ocr.deepseek_ocr import DeepSeekOCRModel
24
25
  from .ocr.got_ocr2 import GotOCR2Model
25
26
  from .stable_diffusion.core import DiffusionModel
26
27
  from .stable_diffusion.mlx import MLXDiffusionModel
@@ -159,19 +160,29 @@ def create_ocr_model_instance(
159
160
  model_spec: ImageModelFamilyV2,
160
161
  model_path: Optional[str] = None,
161
162
  **kwargs,
162
- ) -> GotOCR2Model:
163
+ ) -> Union[DeepSeekOCRModel, GotOCR2Model]:
163
164
  from .cache_manager import ImageCacheManager
164
165
 
165
166
  if not model_path:
166
167
  cache_manager = ImageCacheManager(model_spec)
167
168
  model_path = cache_manager.cache()
168
- model = GotOCR2Model(
169
- model_uid,
170
- model_path,
171
- model_spec=model_spec,
172
- **kwargs,
173
- )
174
- return model
169
+
170
+ # Choose OCR model based on model_name
171
+ if model_spec.model_name == "DeepSeek-OCR":
172
+ return DeepSeekOCRModel(
173
+ model_uid,
174
+ model_path,
175
+ model_spec=model_spec,
176
+ **kwargs,
177
+ )
178
+ else:
179
+ # Default to GOT-OCR2 for other OCR models
180
+ return GotOCR2Model(
181
+ model_uid,
182
+ model_path,
183
+ model_spec=model_spec,
184
+ **kwargs,
185
+ )
175
186
 
176
187
 
177
188
  def create_image_model_instance(
@@ -187,14 +198,13 @@ def create_image_model_instance(
187
198
  lightning_version: Optional[str] = None,
188
199
  lightning_model_path: Optional[str] = None,
189
200
  **kwargs,
190
- ) -> Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model]:
201
+ ) -> Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model, DeepSeekOCRModel]:
191
202
  from .cache_manager import ImageCacheManager
192
203
 
193
204
  model_spec = match_diffusion(model_name, download_hub)
194
205
  if model_spec.model_ability and "ocr" in model_spec.model_ability:
195
206
  return create_ocr_model_instance(
196
207
  model_uid=model_uid,
197
- model_name=model_name,
198
208
  model_spec=model_spec,
199
209
  model_path=model_path,
200
210
  **kwargs,
@@ -734,7 +734,7 @@
734
734
  ],
735
735
  "virtualenv": {
736
736
  "packages": [
737
- "transformers==4.37.2",
737
+ "transformers==4.47.1",
738
738
  "httpx==0.24.0",
739
739
  "deepspeed==0.12.3",
740
740
  "peft==0.4.0",
@@ -870,7 +870,15 @@
870
870
  "Q6_K",
871
871
  "Q8_0"
872
872
  ],
873
- "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
873
+ "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf",
874
+ "lightning_model_id": "lightx2v/Qwen-Image-Lightning",
875
+ "lightning_versions": [
876
+ "4steps-V1.0-bf16",
877
+ "4steps-V1.0-fp32",
878
+ "8steps-V1.0-bf16",
879
+ "8steps-V1.0-fp32"
880
+ ],
881
+ "lightning_model_file_name_template": "Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-{lightning_version}.safetensors"
874
882
  },
875
883
  "modelscope": {
876
884
  "model_id": "Qwen/Qwen-Image-Edit-2509",
@@ -891,7 +899,15 @@
891
899
  "Q6_K",
892
900
  "Q8_0"
893
901
  ],
894
- "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
902
+ "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf",
903
+ "lightning_model_id": "lightx2v/Qwen-Image-Lightning",
904
+ "lightning_versions": [
905
+ "4steps-V1.0-bf16",
906
+ "4steps-V1.0-fp32",
907
+ "8steps-V1.0-bf16",
908
+ "8steps-V1.0-fp32"
909
+ ],
910
+ "lightning_model_file_name_template": "Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-{lightning_version}.safetensors"
895
911
  }
896
912
  },
897
913
  "default_model_config": {
@@ -911,5 +927,41 @@
911
927
  ],
912
928
  "no_build_isolation": true
913
929
  }
930
+ },
931
+ {
932
+ "version": 2,
933
+ "model_name": "DeepSeek-OCR",
934
+ "model_family": "ocr",
935
+ "model_ability": [
936
+ "ocr"
937
+ ],
938
+ "virtualenv": {
939
+ "packages": [
940
+ "torch==2.6.0",
941
+ "torchvision==0.21.0",
942
+ "torchaudio==2.6.0",
943
+ "transformers==4.46.3",
944
+ "tokenizers==0.20.3",
945
+ "PyMuPDF",
946
+ "img2pdf",
947
+ "einops",
948
+ "easydict",
949
+ "addict",
950
+ "Pillow",
951
+ "numpy",
952
+ "flash-attn==2.7.3"
953
+ ],
954
+ "no_build_isolation": true
955
+ },
956
+ "model_src": {
957
+ "huggingface": {
958
+ "model_id": "deepseek-ai/DeepSeek-OCR",
959
+ "model_revision": "main"
960
+ },
961
+ "modelscope": {
962
+ "model_id": "deepseek-ai/DeepSeek-OCR",
963
+ "model_revision": "master"
964
+ }
965
+ }
914
966
  }
915
967
  ]
@@ -11,3 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+ from .deepseek_ocr import DeepSeekOCRModel
16
+ from .got_ocr2 import GotOCR2Model
17
+
18
+ __all__ = ["DeepSeekOCRModel", "GotOCR2Model"]