PyPI - xinference - Versions diffs - 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl - Mend

xinference 0.16.1py3-none-any.whl → 0.16.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (39) hide show

xinference/_version.py +3 -3
xinference/conftest.py +0 -8
xinference/constants.py +2 -0
xinference/core/model.py +34 -2
xinference/core/supervisor.py +5 -5
xinference/core/utils.py +9 -10
xinference/core/worker.py +8 -5
xinference/deploy/cmdline.py +5 -0
xinference/deploy/utils.py +7 -4
xinference/model/audio/core.py +6 -2
xinference/model/audio/model_spec.json +1 -1
xinference/model/core.py +3 -1
xinference/model/embedding/core.py +6 -2
xinference/model/image/core.py +6 -2
xinference/model/image/ocr/got_ocr2.py +3 -0
xinference/model/llm/__init__.py +33 -0
xinference/model/llm/core.py +4 -4
xinference/model/llm/llm_family.json +87 -0
xinference/model/llm/llm_family.py +68 -2
xinference/model/llm/llm_family_modelscope.json +91 -0
xinference/model/llm/llm_family_openmind_hub.json +1359 -0
xinference/model/llm/vllm/core.py +2 -1
xinference/model/rerank/core.py +9 -1
xinference/model/utils.py +7 -0
xinference/model/video/core.py +6 -2
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.b76aeeb7.js → main.2f269bb3.js} +3 -3
xinference/web/ui/build/static/js/main.2f269bb3.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +1 -0
{xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/METADATA +5 -4
{xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/RECORD +37 -36
xinference/web/ui/build/static/js/main.b76aeeb7.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +0 -1
/xinference/web/ui/build/static/js/{main.b76aeeb7.js.LICENSE.txt → main.2f269bb3.js.LICENSE.txt} +0 -0
{xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/LICENSE +0 -0
{xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/WHEEL +0 -0
{xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/entry_points.txt +0 -0
{xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/top_level.txt +0 -0

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -41,6 +41,7 @@ from ..utils import (
     create_symlink,
     download_from_csghub,
     download_from_modelscope,
+    download_from_openmind_hub,
     is_valid_model_uri,
     parse_uri,
     retry_download,
@@ -239,6 +240,7 @@ LLAMA_CLASSES: List[Type[LLM]] = []
 BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
 BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []
+BUILTIN_OPENMIND_HUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
 BUILTIN_CSGHUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
 SGLANG_CLASSES: List[Type[LLM]] = []
@@ -301,6 +303,9 @@ def cache(
             elif llm_spec.model_hub == "modelscope":
                 logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
                 return cache_from_modelscope(llm_family, llm_spec, quantization)
+            elif llm_spec.model_hub == "openmind_hub":
+                logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
+                return cache_from_openmind_hub(llm_family, llm_spec, quantization)
             elif llm_spec.model_hub == "csghub":
                 logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
                 return cache_from_csghub(llm_family, llm_spec, quantization)
@@ -474,7 +479,7 @@ def _skip_download(
     model_revision: Optional[str],
     quantization: Optional[str] = None,
 ) -> bool:
-    if model_format == "pytorch":
+    if model_format in ["pytorch", "mindspore"]:
         model_hub_to_meta_path = {
             "huggingface": _get_meta_path(
                 cache_dir, model_format, "huggingface", quantization
@@ -482,6 +487,9 @@ def _skip_download(
             "modelscope": _get_meta_path(
                 cache_dir, model_format, "modelscope", quantization
             ),
+            "openmind_hub": _get_meta_path(
+                cache_dir, model_format, "openmind_hub", quantization
+            ),
             "csghub": _get_meta_path(cache_dir, model_format, "csghub", quantization),
         }
         if valid_model_revision(model_hub_to_meta_path[model_hub], model_revision):
@@ -702,6 +710,50 @@ def cache_from_modelscope(
     return cache_dir
+def cache_from_openmind_hub(
+    llm_family: LLMFamilyV1,
+    llm_spec: "LLMSpecV1",
+    quantization: Optional[str] = None,
+) -> str:
+    """
+    Cache model from openmind_hub. Return the cache directory.
+    """
+    from openmind_hub import snapshot_download
+    cache_dir = _get_cache_dir(llm_family, llm_spec)
+    if _skip_download(
+        cache_dir,
+        llm_spec.model_format,
+        llm_spec.model_hub,
+        llm_spec.model_revision,
+        quantization,
+    ):
+        return cache_dir
+    if llm_spec.model_format in ["pytorch", "mindspore"]:
+        download_dir = retry_download(
+            snapshot_download,
+            llm_family.model_name,
+            {
+                "model_size": llm_spec.model_size_in_billions,
+                "model_format": llm_spec.model_format,
+            },
+            llm_spec.model_id,
+            revision=llm_spec.model_revision,
+        )
+        create_symlink(download_dir, cache_dir)
+    else:
+        raise ValueError(f"Unsupported format: {llm_spec.model_format}")
+    meta_path = _get_meta_path(
+        cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
+    )
+    _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
+    return cache_dir
 def cache_from_huggingface(
     llm_family: LLMFamilyV1,
     llm_spec: "LLMSpecV1",
@@ -893,7 +945,9 @@ def match_llm(
     model_format: Optional[str] = None,
     model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> Optional[Tuple[LLMFamilyV1, LLMSpecV1, str]]:
     """
     Find an LLM family, spec, and quantization that satisfy given criteria.
@@ -924,6 +978,12 @@ def match_llm(
             + BUILTIN_LLM_FAMILIES
             + user_defined_llm_families
         )
+    elif download_hub == "openmind_hub":
+        all_families = (
+            BUILTIN_OPENMIND_HUB_LLM_FAMILIES
+            + BUILTIN_LLM_FAMILIES
+            + user_defined_llm_families
+        )
     elif download_hub == "csghub":
         all_families = (
             BUILTIN_CSGHUB_LLM_FAMILIES
@@ -938,6 +998,12 @@ def match_llm(
             + BUILTIN_LLM_FAMILIES
             + user_defined_llm_families
         )
+    elif download_from_openmind_hub():
+        all_families = (
+            BUILTIN_OPENMIND_HUB_LLM_FAMILIES
+            + BUILTIN_LLM_FAMILIES
+            + user_defined_llm_families
+        )
     elif download_from_csghub():
         all_families = (
             BUILTIN_CSGHUB_LLM_FAMILIES

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -363,6 +363,97 @@
       "<|eom_id|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision-instruct",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"chat",
+	"vision"
+    ],
+    "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "LLM-Research/Llama-3.2-11B-Vision-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "LLM-Research/Llama-3.2-90B-Vision-Instruct",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+    "stop_token_ids": [
+	128001,
+	128008,
+	128009
+    ],
+    "stop": [
+      "<|end_of_text|>",
+	"<|eot_id|>",
+	"<|eom_id|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"generate",
+	"vision"
+    ],
+    "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+          "model_id": "LLM-Research/Llama-3.2-11B-Vision",
+	  "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+          "model_id": "LLM-Research/Llama-3.2-90B-Vision",
+	  "model_hub": "modelscope"
+      }
+    ]
+  },
   {
     "version": 1,
     "context_length": 2048,

xinference 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl

Potentially problematic release.

xinference 0.16.1py3-none-any.whl → 0.16.3py3-none-any.whl