PyPI - xinference - Versions diffs - 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

xinference 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show

xinference/__init__.py CHANGED Viewed

@@ -12,6 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+# Configure MPS memory management to avoid "invalid low watermark ratio" error in PyTorch 3.13+
+if os.environ.get("PYTORCH_MPS_HIGH_WATERMARK_RATIO") is None:
+    os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "1.0"
+if os.environ.get("PYTORCH_MPS_LOW_WATERMARK_RATIO") is None:
+    os.environ["PYTORCH_MPS_LOW_WATERMARK_RATIO"] = "0.2"
 from . import _version
 __version__ = _version.get_versions()["version"]

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-10-19T20:53:12+0800",
+ "date": "2025-11-02T20:34:18+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "baaa40b463e4948762b078f5995d67775df53704",
- "version": "1.11.0"
+ "full-revisionid": "117ba29e07a77a7534496f18b9ced1d567c0673f",
+ "version": "1.12.0"
 }
 '''  # END VERSION_JSON

xinference/api/oauth2/utils.py CHANGED Viewed

@@ -14,10 +14,8 @@
 from datetime import datetime, timedelta
 from typing import Union
+import bcrypt
 from jose import jwt
-from passlib.context import CryptContext
-pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
 def create_access_token(
@@ -37,8 +35,31 @@ def create_access_token(
 def verify_password(plain_password, hashed_password):
-    return pwd_context.verify(plain_password, hashed_password)
+    if isinstance(plain_password, str):
+        plain_password = plain_password.encode("utf-8")
+    if isinstance(hashed_password, str):
+        hashed_password = hashed_password.encode("utf-8")
+    if len(plain_password) > 72:
+        import hashlib
+        password_hash = hashlib.sha256(plain_password).digest()
+        plain_password = password_hash[:72]
+    return bcrypt.checkpw(plain_password, hashed_password)
 def get_password_hash(password):
-    return pwd_context.hash(password)
+    if isinstance(password, str):
+        password = password.encode("utf-8")
+    if len(password) > 72:
+        import hashlib
+        password_hash = hashlib.sha256(password).digest()
+        password = password_hash[:72]
+    salt = bcrypt.gensalt()
+    hashed = bcrypt.hashpw(password, salt)
+    return hashed.decode("utf-8")

xinference/core/model.py CHANGED Viewed

@@ -206,10 +206,6 @@ class ModelActor(xo.StatelessActor, CancelMixin):
     ):
         super().__init__()
-        from ..model.llm.llama_cpp.core import XllamaCppModel
-        from ..model.llm.lmdeploy.core import LMDeployModel
-        from ..model.llm.sglang.core import SGLANGModel
-        from ..model.llm.transformers.core import PytorchModel
         from ..model.llm.vllm.core import VLLMModel
         self._supervisor_address = supervisor_address
@@ -223,12 +219,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
         self._pending_requests: asyncio.Queue = asyncio.Queue()
         self._handle_pending_requests_task = None
         self._lock = (
-            None
-            if isinstance(
-                self._model,
-                (PytorchModel, VLLMModel, SGLANGModel, LMDeployModel, XllamaCppModel),
-            )
-            else asyncio.locks.Lock()
+            None if getattr(self._model, "allow_batch", False) else asyncio.locks.Lock()
         )
         self._worker_ref = None
         self._progress_tracker_ref = None

xinference/device_utils.py CHANGED Viewed

@@ -108,7 +108,17 @@ def empty_cache():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     if torch.backends.mps.is_available():
-        torch.mps.empty_cache()
+        try:
+            torch.mps.empty_cache()
+        except RuntimeError as e:
+            # Handle known MPS memory management issues in PyTorch 3.13+
+            if "invalid low watermark ratio" in str(e):
+                # This is a known issue with PyTorch 3.13+ on macOS.
+                # We can safely ignore this error as it doesn't affect functionality.
+                pass
+            else:
+                # Re-raise other RuntimeErrors
+                raise
     if is_xpu_available():
         torch.xpu.empty_cache()
     if is_npu_available():

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -1190,5 +1190,75 @@
       ],
       "no_build_isolation": true
     }
+  },
+  {
+    "version": 2,
+    "model_name": "gme-Qwen2-VL-2B-Instruct",
+    "dimensions": 1536,
+    "max_tokens": 32768,
+    "language": [
+      "en",
+      "zh"
+    ],
+    "virtualenv": {
+      "packages": [
+        "sentence_transformers",
+        "transformers==4.51.3"
+      ]
+    },
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_src": {
+          "huggingface": {
+            "model_id": "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
+            "quantizations": [
+              "none"
+            ]
+          },
+          "modelscope": {
+            "model_id": "iic/gme-Qwen2-VL-2B-Instruct",
+            "quantizations": [
+              "none"
+            ]
+          }
+        }
+      }
+    ]
+  },
+  {
+    "version": 2,
+    "model_name": "gme-Qwen2-VL-7B-Instruct",
+    "dimensions": 3584,
+    "max_tokens": 32768,
+    "language": [
+      "en",
+      "zh"
+    ],
+    "virtualenv": {
+      "packages": [
+        "sentence_transformers",
+        "transformers==4.51.3"
+      ]
+    },
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_src": {
+          "huggingface": {
+            "model_id": "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
+            "quantizations": [
+              "none"
+            ]
+          },
+          "modelscope": {
+            "model_id": "iic/gme-Qwen2-VL-7B-Instruct",
+            "quantizations": [
+              "none"
+            ]
+          }
+        }
+      }
+    ]
   }
 ]

xinference/model/image/core.py CHANGED Viewed

@@ -21,6 +21,7 @@ from typing import Dict, List, Literal, Optional, Union
 from ...types import PeftModelConfig
 from ..core import CacheableModelSpec, VirtualEnvSettings
 from ..utils import ModelInstanceInfoMixin
+from .ocr.deepseek_ocr import DeepSeekOCRModel
 from .ocr.got_ocr2 import GotOCR2Model
 from .stable_diffusion.core import DiffusionModel
 from .stable_diffusion.mlx import MLXDiffusionModel
@@ -159,19 +160,29 @@ def create_ocr_model_instance(
     model_spec: ImageModelFamilyV2,
     model_path: Optional[str] = None,
     **kwargs,
-) -> GotOCR2Model:
+) -> Union[DeepSeekOCRModel, GotOCR2Model]:
     from .cache_manager import ImageCacheManager
     if not model_path:
         cache_manager = ImageCacheManager(model_spec)
         model_path = cache_manager.cache()
-    model = GotOCR2Model(
-        model_uid,
-        model_path,
-        model_spec=model_spec,
-        **kwargs,
-    )
-    return model
+    # Choose OCR model based on model_name
+    if model_spec.model_name == "DeepSeek-OCR":
+        return DeepSeekOCRModel(
+            model_uid,
+            model_path,
+            model_spec=model_spec,
+            **kwargs,
+        )
+    else:
+        # Default to GOT-OCR2 for other OCR models
+        return GotOCR2Model(
+            model_uid,
+            model_path,
+            model_spec=model_spec,
+            **kwargs,
+        )
 def create_image_model_instance(
@@ -187,14 +198,13 @@ def create_image_model_instance(
     lightning_version: Optional[str] = None,
     lightning_model_path: Optional[str] = None,
     **kwargs,
-) -> Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model]:
+) -> Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model, DeepSeekOCRModel]:
     from .cache_manager import ImageCacheManager
     model_spec = match_diffusion(model_name, download_hub)
     if model_spec.model_ability and "ocr" in model_spec.model_ability:
         return create_ocr_model_instance(
             model_uid=model_uid,
-            model_name=model_name,
             model_spec=model_spec,
             model_path=model_path,
             **kwargs,

xinference/model/image/model_spec.json CHANGED Viewed

@@ -734,7 +734,7 @@
     ],
     "virtualenv": {
       "packages": [
-        "transformers==4.37.2",
+        "transformers==4.47.1",
         "httpx==0.24.0",
         "deepspeed==0.12.3",
         "peft==0.4.0",
@@ -870,7 +870,15 @@
           "Q6_K",
           "Q8_0"
         ],
-        "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
+        "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf",
+        "lightning_model_id": "lightx2v/Qwen-Image-Lightning",
+        "lightning_versions": [
+          "4steps-V1.0-bf16",
+          "4steps-V1.0-fp32",
+          "8steps-V1.0-bf16",
+          "8steps-V1.0-fp32"
+        ],
+        "lightning_model_file_name_template": "Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-{lightning_version}.safetensors"
       },
       "modelscope": {
         "model_id": "Qwen/Qwen-Image-Edit-2509",
@@ -891,7 +899,15 @@
           "Q6_K",
           "Q8_0"
         ],
-        "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
+        "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf",
+        "lightning_model_id": "lightx2v/Qwen-Image-Lightning",
+        "lightning_versions": [
+          "4steps-V1.0-bf16",
+          "4steps-V1.0-fp32",
+          "8steps-V1.0-bf16",
+          "8steps-V1.0-fp32"
+        ],
+        "lightning_model_file_name_template": "Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-{lightning_version}.safetensors"
       }
     },
     "default_model_config": {
@@ -911,5 +927,41 @@
       ],
       "no_build_isolation": true
     }
+  },
+  {
+    "version": 2,
+    "model_name": "DeepSeek-OCR",
+    "model_family": "ocr",
+    "model_ability": [
+      "ocr"
+    ],
+    "virtualenv": {
+      "packages": [
+        "torch==2.6.0",
+        "torchvision==0.21.0",
+        "torchaudio==2.6.0",
+        "transformers==4.46.3",
+        "tokenizers==0.20.3",
+        "PyMuPDF",
+        "img2pdf",
+        "einops",
+        "easydict",
+        "addict",
+        "Pillow",
+        "numpy",
+        "flash-attn==2.7.3"
+      ],
+      "no_build_isolation": true
+    },
+    "model_src": {
+      "huggingface": {
+        "model_id": "deepseek-ai/DeepSeek-OCR",
+        "model_revision": "main"
+      },
+      "modelscope": {
+        "model_id": "deepseek-ai/DeepSeek-OCR",
+        "model_revision": "master"
+      }
+    }
   }
 ]

xinference/model/image/ocr/__init__.py CHANGED Viewed

@@ -11,3 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .deepseek_ocr import DeepSeekOCRModel
+from .got_ocr2 import GotOCR2Model
+__all__ = ["DeepSeekOCRModel", "GotOCR2Model"]

xinference 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

Potentially problematic release.

xinference 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl