PyPI - xinference - Versions diffs - 1.8.1rc1__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

xinference 1.8.1rc1py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (64) hide show

xinference/model/rerank/custom.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import List, Literal, Optional
+from typing import List, Literal
 from ..custom import ModelRegistry
 from .core import RerankModelFamilyV2
@@ -22,10 +22,6 @@ logger = logging.getLogger(__name__)
 class CustomRerankModelFamilyV2(RerankModelFamilyV2):
     version: Literal[2] = 2
-    model_id: Optional[str]  # type: ignore
-    model_revision: Optional[str]  # type: ignore
-    model_uri: Optional[str]
-    model_type: Literal["rerank"] = "rerank"  # for frontend
 UD_RERANKS: List[CustomRerankModelFamilyV2] = []
@@ -35,12 +31,41 @@ class RerankModelRegistry(ModelRegistry):
     model_type = "rerank"
     def __init__(self):
-        from . import BUILTIN_RERANK_MODELS
+        from .rerank_family import BUILTIN_RERANK_MODELS
         super().__init__()
         self.models = UD_RERANKS
         self.builtin_models = list(BUILTIN_RERANK_MODELS.keys())
+    def add_ud_model(self, model_spec):
+        from . import generate_engine_config_by_model_name
+        UD_RERANKS.append(model_spec)
+        generate_engine_config_by_model_name(model_spec)
+    def check_model_uri(self, model_family: "RerankModelFamilyV2"):
+        from ..utils import is_valid_model_uri
+        for spec in model_family.model_specs:
+            model_uri = spec.model_uri
+            if model_uri and not is_valid_model_uri(model_uri):
+                raise ValueError(f"Invalid model URI {model_uri}.")
+    def remove_ud_model(self, model_family: "CustomRerankModelFamilyV2"):
+        from .rerank_family import RERANK_ENGINES
+        UD_RERANKS.remove(model_family)
+        del RERANK_ENGINES[model_family.model_name]
+    def remove_ud_model_files(self, model_family: "CustomRerankModelFamilyV2"):
+        from .cache_manager import RerankCacheManager
+        _model_family = model_family.copy()
+        for spec in model_family.model_specs:
+            _model_family.model_specs = [spec]
+            cache_manager = RerankCacheManager(_model_family)
+            cache_manager.unregister_custom_model(self.model_type)
 def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
     from ..custom import RegistryManager
@@ -49,11 +74,11 @@ def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
     return registry.get_custom_models()
-def register_rerank(model_spec: CustomRerankModelFamilyV2, persist: bool):
+def register_rerank(model_family: CustomRerankModelFamilyV2, persist: bool):
     from ..custom import RegistryManager
     registry = RegistryManager.get_registry("rerank")
-    registry.register(model_spec, persist)
+    registry.register(model_family, persist)
 def unregister_rerank(model_name: str, raise_error: bool = True):

xinference/model/rerank/model_spec.json CHANGED Viewed

@@ -1,215 +1,254 @@
 [
-  {
-    "version": 2,
-    "model_name": "bge-reranker-large",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 512,
-    "model_src": {
-      "huggingface": {
-        "model_id": "BAAI/bge-reranker-large",
-        "model_revision": "27c9168d479987529781de8474dff94d69beca11"
-      },
-      "modelscope": {
-        "model_id": "Xorbits/bge-reranker-large",
-        "model_revision": "v0.0.1"
-      }
+    {
+        "version": 2,
+        "model_name": "bge-reranker-large",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 512,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "BAAI/bge-reranker-large",
+                        "model_revision": "27c9168d479987529781de8474dff94d69beca11",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "Xorbits/bge-reranker-large",
+                        "model_revision": "v0.0.1",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "bge-reranker-base",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 512,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "BAAI/bge-reranker-base",
+                        "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "Xorbits/bge-reranker-base",
+                        "model_revision": "v0.0.1",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "bce-reranker-base_v1",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 512,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "maidalun1020/bce-reranker-base_v1",
+                        "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "maidalun/bce-reranker-base_v1",
+                        "model_revision": "v0.0.1",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "bge-reranker-v2-m3",
+        "type": "normal",
+        "language": ["en", "zh", "multilingual"],
+        "max_tokens": 8192,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "BAAI/bge-reranker-v2-m3",
+                        "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "AI-ModelScope/bge-reranker-v2-m3",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "bge-reranker-v2-gemma",
+        "type": "LLM-based",
+        "language": ["en", "zh", "multilingual"],
+        "max_tokens": 8192,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "BAAI/bge-reranker-v2-gemma",
+                        "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "AI-ModelScope/bge-reranker-v2-gemma",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "bge-reranker-v2-minicpm-layerwise",
+        "type": "LLM-based layerwise",
+        "language": ["en", "zh", "multilingual"],
+        "max_tokens": 2048,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
+                        "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "jina-reranker-v2",
+        "type": "normal",
+        "language": ["en", "zh", "multilingual"],
+        "max_tokens": 1024,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "jinaai/jina-reranker-v2-base-multilingual",
+                        "model_revision": "298e48cada4a9318650d7fbd795f63827f884087",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "minicpm-reranker",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 1024,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "openbmb/MiniCPM-Reranker",
+                        "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "OpenBMB/MiniCPM-Reranker",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "Qwen3-Reranker-0.6B",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 32768,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "Qwen/Qwen3-Reranker-0.6B",
+                        "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "Qwen/Qwen3-Reranker-0.6B",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "Qwen3-Reranker-4B",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 32768,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "Qwen/Qwen3-Reranker-4B",
+                        "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "Qwen/Qwen3-Reranker-4B",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
+    },
+    {
+        "version": 2,
+        "model_name": "Qwen3-Reranker-8B",
+        "type": "normal",
+        "language": ["en", "zh"],
+        "max_tokens": 32768,
+        "model_specs": [
+            {
+                "model_format": "pytorch",
+                "model_src": {
+                    "huggingface": {
+                        "model_id": "Qwen/Qwen3-Reranker-8B",
+                        "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db",
+                        "quantizations": ["none"]
+                    },
+                    "modelscope": {
+                        "model_id": "Qwen/Qwen3-Reranker-8B",
+                        "quantizations": ["none"]
+                    }
+                }
+            }
+        ]
     }
-  },
-  {
-    "version": 2,
-    "model_name": "bge-reranker-base",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 512,
-    "model_src": {
-      "huggingface": {
-        "model_id": "BAAI/bge-reranker-base",
-        "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
-      },
-      "modelscope": {
-        "model_id": "Xorbits/bge-reranker-base",
-        "model_revision": "v0.0.1"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "bce-reranker-base_v1",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 512,
-    "model_src": {
-      "huggingface": {
-        "model_id": "maidalun1020/bce-reranker-base_v1",
-        "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
-      },
-      "modelscope": {
-        "model_id": "maidalun/bce-reranker-base_v1",
-        "model_revision": "v0.0.1"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "bge-reranker-v2-m3",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh",
-      "multilingual"
-    ],
-    "max_tokens": 8192,
-    "model_src": {
-      "huggingface": {
-        "model_id": "BAAI/bge-reranker-v2-m3",
-        "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4"
-      },
-      "modelscope": {
-        "model_id": "AI-ModelScope/bge-reranker-v2-m3"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "bge-reranker-v2-gemma",
-    "type": "LLM-based",
-    "language": [
-      "en",
-      "zh",
-      "multilingual"
-    ],
-    "max_tokens": 8192,
-    "model_src": {
-      "huggingface": {
-        "model_id": "BAAI/bge-reranker-v2-gemma",
-        "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17"
-      },
-      "modelscope": {
-        "model_id": "AI-ModelScope/bge-reranker-v2-gemma"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "bge-reranker-v2-minicpm-layerwise",
-    "type": "LLM-based layerwise",
-    "language": [
-      "en",
-      "zh",
-      "multilingual"
-    ],
-    "max_tokens": 2048,
-    "model_src": {
-      "huggingface": {
-        "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
-        "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881"
-      },
-      "modelscope": {
-        "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "jina-reranker-v2",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh",
-      "multilingual"
-    ],
-    "max_tokens": 1024,
-    "model_src": {
-      "huggingface": {
-        "model_id": "jinaai/jina-reranker-v2-base-multilingual",
-        "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "minicpm-reranker",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 1024,
-    "model_src": {
-      "huggingface": {
-        "model_id": "openbmb/MiniCPM-Reranker",
-        "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
-      },
-      "modelscope": {
-        "model_id": "OpenBMB/MiniCPM-Reranker"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "Qwen3-Reranker-0.6B",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 32768,
-    "model_src": {
-      "huggingface": {
-        "model_id": "Qwen/Qwen3-Reranker-0.6B",
-        "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3"
-      },
-      "modelscope": {
-        "model_id": "Qwen/Qwen3-Reranker-0.6B"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "Qwen3-Reranker-4B",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 32768,
-    "model_src": {
-      "huggingface": {
-        "model_id": "Qwen/Qwen3-Reranker-4B",
-        "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5"
-      },
-      "modelscope": {
-        "model_id": "Qwen/Qwen3-Reranker-4B"
-      }
-    }
-  },
-  {
-    "version": 2,
-    "model_name": "Qwen3-Reranker-8B",
-    "type": "normal",
-    "language": [
-      "en",
-      "zh"
-    ],
-    "max_tokens": 32768,
-    "model_src": {
-      "huggingface": {
-        "model_id": "Qwen/Qwen3-Reranker-8B",
-        "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db"
-      },
-      "modelscope": {
-        "model_id": "Qwen/Qwen3-Reranker-8B"
-      }
-    }
-  }
 ]

xinference/model/rerank/rerank_family.py ADDED Viewed

@@ -0,0 +1,137 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Type, Union
+if TYPE_CHECKING:
+    from .core import RerankModel, RerankModelFamilyV2, RerankSpecV1
+FLAG_RERANKER_CLASSES: List[Type["RerankModel"]] = []
+SENTENCE_TRANSFORMER_CLASSES: List[Type["RerankModel"]] = []
+VLLM_CLASSES: List[Type["RerankModel"]] = []
+BUILTIN_RERANK_MODELS: Dict[str, "RerankModelFamilyV2"] = {}
+logger = logging.getLogger(__name__)
+def match_rerank(
+    model_name: str,
+    model_format: Optional[str] = None,
+    quantization: Optional[str] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
+) -> "RerankModelFamilyV2":
+    from ..utils import download_from_modelscope
+    from .custom import get_user_defined_reranks
+    target_family = None
+    if model_name in BUILTIN_RERANK_MODELS:
+        target_family = BUILTIN_RERANK_MODELS[model_name]
+    else:
+        for model_family in get_user_defined_reranks():
+            if model_name == model_family.model_name:
+                target_family = model_family
+                break
+    if target_family is None:
+        raise ValueError(
+            f"Rerank model {model_name} not found, available models: {BUILTIN_RERANK_MODELS.keys()}"
+        )
+    if download_hub == "modelscope" or download_from_modelscope():
+        specs = [
+            x for x in target_family.model_specs if x.model_hub == "modelscope"
+        ] + [x for x in target_family.model_specs if x.model_hub == "huggingface"]
+    else:
+        specs = [x for x in target_family.model_specs if x.model_hub == "huggingface"]
+    def _match_quantization(q: Union[str, None], _quantization: str):
+        # Currently, the quantization name could include both uppercase and lowercase letters,
+        # so it is necessary to ensure that the case sensitivity does not
+        # affect the matching results.
+        if q is None:
+            return None
+        return _quantization if q.lower() == _quantization.lower() else None
+    def _apply_format_to_model_id(_spec: "RerankSpecV1", q: str) -> "RerankSpecV1":
+        # Different quantized versions of some models use different model ids,
+        # Here we check the `{}` in the model id to format the id.
+        if _spec.model_id and "{" in _spec.model_id:
+            _spec.model_id = _spec.model_id.format(quantization=q)
+        return _spec
+    for spec in specs:
+        matched_quantization = _match_quantization(quantization, spec.quantization)
+        if (
+            model_format
+            and model_format != spec.model_format
+            or quantization
+            and matched_quantization is None
+        ):
+            continue
+        # Copy spec to avoid _apply_format_to_model_id modify the original spec.
+        spec = spec.copy()
+        _family = target_family.copy()
+        if quantization:
+            _family.model_specs = [
+                _apply_format_to_model_id(spec, matched_quantization)
+            ]
+            return _family
+        else:
+            # TODO: If user does not specify quantization, just use the first one
+            _q = "none" if spec.model_format == "pytorch" else spec.quantization
+            _family.model_specs = [_apply_format_to_model_id(spec, _q)]
+            return _family
+    raise ValueError(
+        f"Rerank model {model_name} with format {model_format} and quantization {quantization} not found."
+    )
+# { rerank model name -> { engine name -> engine params } }
+RERANK_ENGINES: Dict[str, Dict[str, List[Dict[str, Type["RerankModel"]]]]] = {}
+SUPPORTED_ENGINES: Dict[str, List[Type["RerankModel"]]] = {}
+def check_engine_by_model_name_and_engine(
+    model_engine: str,
+    model_name: str,
+    model_format: Optional[str],
+    quantization: Optional[str],
+) -> Type["RerankModel"]:
+    def get_model_engine_from_spell(engine_str: str) -> str:
+        for engine in RERANK_ENGINES[model_name].keys():
+            if engine.lower() == engine_str.lower():
+                return engine
+        return engine_str
+    if model_name not in RERANK_ENGINES:
+        raise ValueError(f"Model {model_name} not found.")
+    model_engine = get_model_engine_from_spell(model_engine)
+    if model_engine not in RERANK_ENGINES[model_name]:
+        raise ValueError(f"Model {model_name} cannot be run on engine {model_engine}.")
+    match_params = RERANK_ENGINES[model_name][model_engine]
+    for param in match_params:
+        if model_name != param["model_name"]:
+            continue
+        if (model_format and model_format != param["model_format"]) or (
+            quantization and quantization != param["quantization"]
+        ):
+            continue
+        return param["rerank_class"]
+    raise ValueError(f"Model {model_name} cannot be run on engine {model_engine}.")

xinference/model/rerank/sentence_transformers/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference 1.8.1rc1__py3-none-any.whl → 1.9.0__py3-none-any.whl

Potentially problematic release.

xinference 1.8.1rc1py3-none-any.whl → 1.9.0py3-none-any.whl