PyPI - xinference - Versions diffs - 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl - Mend

xinference 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (48) hide show

xinference/deploy/cmdline.py CHANGED Viewed

@@ -360,7 +360,7 @@ def worker(
     )
-@cli.command("register", help="Registers a new model with Xinference for deployment.")
+@cli.command("register", help="Register a new model with Xinference for deployment.")
 @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
 @click.option(
     "--model-type",
@@ -397,7 +397,7 @@ def register_model(
 @cli.command(
     "unregister",
-    help="Unregisters a model from Xinference, removing it from deployment.",
+    help="Unregister a model from Xinference, removing it from deployment.",
 )
 @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
 @click.option(
@@ -423,7 +423,7 @@ def unregister_model(
     )
-@cli.command("registrations", help="Lists all registered models in Xinference.")
+@cli.command("registrations", help="List all registered models in Xinference.")
 @click.option(
     "--endpoint",
     "-e",
@@ -488,6 +488,22 @@ def list_model_registrations(
             ),
             file=sys.stderr,
         )
+    elif model_type == "rerank":
+        for registration in registrations:
+            model_name = registration["model_name"]
+            model_family = client.get_model_registration(model_type, model_name)
+            table.append(
+                [
+                    model_type,
+                    model_family["model_name"],
+                    model_family["language"],
+                    registration["is_builtin"],
+                ]
+            )
+        print(
+            tabulate(table, headers=["Type", "Name", "Language", "Is-built-in"]),
+            file=sys.stderr,
+        )
     elif model_type == "image":
         for registration in registrations:
             model_name = registration["model_name"]
@@ -711,6 +727,9 @@ def model_list(endpoint: Optional[str]):
     llm_table = []
     embedding_table = []
+    rerank_table = []
+    image_table = []
+    audio_table = []
     models = client.list_models()
     for model_uid, model_spec in models.items():
         if model_spec["model_type"] == "LLM":
@@ -733,6 +752,23 @@ def model_list(endpoint: Optional[str]):
                     model_spec["dimensions"],
                 ]
             )
+        elif model_spec["model_type"] == "rerank":
+            rerank_table.append(
+                [model_uid, model_spec["model_type"], model_spec["model_name"]]
+            )
+        elif model_spec["model_type"] == "image":
+            image_table.append(
+                [
+                    model_uid,
+                    model_spec["model_type"],
+                    model_spec["model_name"],
+                    str(model_spec["controlnet"]),
+                ]
+            )
+        elif model_spec["model_type"] == "audio":
+            audio_table.append(
+                [model_uid, model_spec["model_type"], model_spec["model_name"]]
+            )
     if llm_table:
         print(
             tabulate(
@@ -748,6 +784,7 @@ def model_list(endpoint: Optional[str]):
             ),
             file=sys.stderr,
         )
+        print()  # add a blank line for better visual experience
     if embedding_table:
         print(
             tabulate(
@@ -761,6 +798,34 @@ def model_list(endpoint: Optional[str]):
             ),
             file=sys.stderr,
         )
+        print()
+    if rerank_table:
+        print(
+            tabulate(
+                rerank_table,
+                headers=["UID", "Type", "Name"],
+            ),
+            file=sys.stderr,
+        )
+        print()
+    if image_table:
+        print(
+            tabulate(
+                image_table,
+                headers=["UID", "Type", "Name", "Controlnet"],
+            ),
+            file=sys.stderr,
+        )
+        print()
+    if audio_table:
+        print(
+            tabulate(
+                audio_table,
+                headers=["UID", "Type", "Name"],
+            ),
+            file=sys.stderr,
+        )
+        print()
 @cli.command(
@@ -981,7 +1046,7 @@ def model_chat(
             )
-@cli.command("vllm-models", help="Query and display models compatible with VLLM.")
+@cli.command("vllm-models", help="Query and display models compatible with vLLM.")
 @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
 def vllm_models(endpoint: Optional[str]):
     endpoint = get_endpoint(endpoint)

xinference/deploy/local.py CHANGED Viewed

@@ -132,4 +132,4 @@ def main(
             auth_config_file=auth_config_file,
         )
     finally:
-        local_cluster.terminate()
+        local_cluster.kill()

xinference/deploy/supervisor.py CHANGED Viewed

@@ -98,4 +98,4 @@ def main(
             auth_config_file=auth_config_file,
         )
     finally:
-        local_cluster.terminate()
+        local_cluster.kill()

xinference/model/image/__init__.py CHANGED Viewed

@@ -18,7 +18,9 @@ import os
 from itertools import chain
 from .core import (
+    BUILTIN_IMAGE_MODELS,
     IMAGE_MODEL_DESCRIPTIONS,
+    MODELSCOPE_IMAGE_MODELS,
     ImageModelFamilyV1,
     generate_image_description,
     get_cache_status,
@@ -29,14 +31,18 @@ _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
 _model_spec_modelscope_json = os.path.join(
     os.path.dirname(__file__), "model_spec_modelscope.json"
 )
-BUILTIN_IMAGE_MODELS = dict(
-    (spec["model_name"], ImageModelFamilyV1(**spec))
-    for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
+BUILTIN_IMAGE_MODELS.update(
+    dict(
+        (spec["model_name"], ImageModelFamilyV1(**spec))
+        for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
+    )
 )
-MODELSCOPE_IMAGE_MODELS = dict(
-    (spec["model_name"], ImageModelFamilyV1(**spec))
-    for spec in json.load(
-        codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
+MODELSCOPE_IMAGE_MODELS.update(
+    dict(
+        (spec["model_name"], ImageModelFamilyV1(**spec))
+        for spec in json.load(
+            codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
+        )
     )
 )

xinference/model/image/core.py CHANGED Viewed

@@ -27,6 +27,8 @@ MAX_ATTEMPTS = 3
 logger = logging.getLogger(__name__)
 IMAGE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+BUILTIN_IMAGE_MODELS: Dict[str, "ImageModelFamilyV1"] = {}
+MODELSCOPE_IMAGE_MODELS: Dict[str, "ImageModelFamilyV1"] = {}
 def get_image_model_descriptions():
@@ -151,7 +153,21 @@ def get_cache_status(
 ) -> bool:
     cache_dir = get_cache_dir(model_spec)
     meta_path = os.path.join(cache_dir, "__valid_download")
-    return valid_model_revision(meta_path, model_spec.model_revision)
+    model_name = model_spec.model_name
+    if model_name in BUILTIN_IMAGE_MODELS and model_name in MODELSCOPE_IMAGE_MODELS:
+        hf_spec = BUILTIN_IMAGE_MODELS[model_name]
+        ms_spec = MODELSCOPE_IMAGE_MODELS[model_name]
+        return any(
+            [
+                valid_model_revision(meta_path, hf_spec.model_revision),
+                valid_model_revision(meta_path, ms_spec.model_revision),
+            ]
+        )
+    else:  # Usually for UT
+        logger.warning(f"Cannot find builtin image model spec: {model_name}")
+        return valid_model_revision(meta_path, model_spec.model_revision)
 def create_image_model_instance(

xinference/model/llm/__init__.py CHANGED Viewed

@@ -60,6 +60,7 @@ def _install():
     from .pytorch.qwen_vl import QwenVLChatModel
     from .pytorch.vicuna import VicunaPytorchChatModel
     from .pytorch.yi_vl import YiVLChatModel
+    from .sglang.core import SGLANGChatModel, SGLANGModel
     from .vllm.core import VLLMChatModel, VLLMModel
     # register llm classes.
@@ -79,6 +80,7 @@ def _install():
             CtransformersModel,
         ]
     )
+    LLM_CLASSES.extend([SGLANGModel, SGLANGChatModel])
     LLM_CLASSES.extend([VLLMModel, VLLMChatModel])
     LLM_CLASSES.extend(
         [

xinference/model/llm/ggml/llamacpp.py CHANGED Viewed

@@ -52,9 +52,6 @@ class LlamaCppModel(LLM):
         )
         self._llm = None
-    def _can_apply_metal(self):
-        return self.quantization.lower() in ["q4_0", "q4_1", "q4_k_s", "q4_k_m"]
     def _can_apply_cublas(self):
         # TODO: figure out the quantizations supported.
         return True
@@ -78,8 +75,7 @@ class LlamaCppModel(LLM):
             llamacpp_model_config["use_mlock"] = False
             llamacpp_model_config["n_gqa"] = 8
-        if self._is_darwin_and_apple_silicon() and self._can_apply_metal():
-            # TODO: platform.processor() is not safe, need to be replaced to other method.
+        if self._is_darwin_and_apple_silicon():
             llamacpp_model_config.setdefault("n_gpu_layers", -1)
         elif self._is_linux() and self._can_apply_cublas():
             llamacpp_model_config.setdefault("n_gpu_layers", -1)

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -98,6 +98,72 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8194,
+    "model_name": "codeshell",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "WisdomShell/CodeShell-7B",
+        "model_revision": "1c79ab7fd316a62ab41d764facd3548a23fa5dee"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8194,
+    "model_name": "codeshell-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "WisdomShell/CodeShell-7B-Chat",
+        "model_revision": "3cb06f589b7b1e2f8e728c77280b1114191d24de"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CodeShell",
+      "system_prompt": "",
+      "roles": [
+        "## human:",
+        "## assistant: "
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        70000
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "|||",
+        "|<end>|"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -573,7 +639,7 @@
         64797,
         2
       ],
-      "stop":[
+      "stop": [
         "<|user|>",
         "<|observation|>"
       ]
@@ -616,7 +682,7 @@
         64797,
         2
       ],
-      "stop":[
+      "stop": [
         "<|user|>",
         "<|observation|>"
       ]
@@ -667,7 +733,6 @@
       ]
     }
   },
   {
     "version": 1,
     "context_length": 2048,
@@ -715,8 +780,7 @@
         "model_revision": "7f1b7394f74c630f50612a19ba90bd021c373989"
       }
     ]
-  }
- ,
+  },
   {
     "version": 1,
     "context_length": 4096,
@@ -1606,7 +1670,10 @@
         "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
         "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
         "quantization_parts": {
-          "q4_k_m": ["a", "b"]
+          "q4_k_m": [
+            "a",
+            "b"
+          ]
         }
       }
     ],
@@ -2658,7 +2725,11 @@
     "context_length": 32768,
     "model_name": "mixtral-v0.1",
     "model_lang": [
-      "en", "fr", "it", "de", "es"
+      "en",
+      "fr",
+      "it",
+      "de",
+      "es"
     ],
     "model_ability": [
       "generate"
@@ -2699,7 +2770,11 @@
     "context_length": 32768,
     "model_name": "mixtral-instruct-v0.1",
     "model_lang": [
-      "en", "fr", "it", "de", "es"
+      "en",
+      "fr",
+      "it",
+      "de",
+      "es"
     ],
     "model_ability": [
       "chat"
@@ -2798,6 +2873,17 @@
         "model_id": "01-ai/Yi-6B",
         "model_revision": "25beebcb1166b9f49458459eb7b68130b9f9cf4d"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-9B",
+        "model_revision": "f70a5ff8b2e51c5d5b20e649d7b5f4238ffe6d5b"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 34,
@@ -3264,10 +3350,8 @@
       ],
       "intra_message_sep": "\n",
       "inter_message_sep": "\n",
-      "stop_token_ids": [
-      ],
-      "stop": [
-      ]
+      "stop_token_ids": [],
+      "stop": []
     }
   },
   {
@@ -3365,7 +3449,8 @@
     "context_length": 4096,
     "model_name": "deepseek-coder-instruct",
     "model_lang": [
-      "en", "zh"
+      "en",
+      "zh"
     ],
     "model_ability": [
       "chat"

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -338,7 +338,7 @@
         64797,
         2
       ],
-      "stop":[
+      "stop": [
         "<|user|>",
         "<|observation|>"
       ]
@@ -382,13 +382,12 @@
         64797,
         2
       ],
-      "stop":[
+      "stop": [
         "<|user|>",
         "<|observation|>"
       ]
     }
   },
   {
     "version": 1,
     "context_length": 2048,
@@ -728,6 +727,74 @@
       }
     ]
   },
+  {
+    "version": 1,
+    "context_length": 8194,
+    "model_name": "codeshell",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "WisdomShell/CodeShell-7B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8194,
+    "model_name": "codeshell-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "WisdomShell/CodeShell-7B-Chat",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CodeShell",
+      "system_prompt": "",
+      "roles": [
+        "## human:",
+        "## assistant: "
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        70000
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "|||",
+        "|<end>|"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 100000,
@@ -970,7 +1037,11 @@
     "context_length": 32768,
     "model_name": "mixtral-v0.1",
     "model_lang": [
-      "en", "fr", "it", "de", "es"
+      "en",
+      "fr",
+      "it",
+      "de",
+      "es"
     ],
     "model_ability": [
       "generate"
@@ -996,7 +1067,11 @@
     "context_length": 32768,
     "model_name": "mixtral-instruct-v0.1",
     "model_lang": [
-      "en", "fr", "it", "de", "es"
+      "en",
+      "fr",
+      "it",
+      "de",
+      "es"
     ],
     "model_ability": [
       "chat"
@@ -1052,6 +1127,18 @@
         "model_id": "01ai/Yi-6B",
         "model_revision": "master"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-9B",
+        "model_revision": "master"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 34,
@@ -1917,7 +2004,10 @@
         "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
         "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
         "quantization_parts": {
-          "q4_k_m": ["a", "b"]
+          "q4_k_m": [
+            "a",
+            "b"
+          ]
         }
       }
     ],
@@ -1996,7 +2086,8 @@
     "context_length": 4096,
     "model_name": "deepseek-coder-instruct",
     "model_lang": [
-      "en", "zh"
+      "en",
+      "zh"
     ],
     "model_ability": [
       "chat"

xinference/model/llm/pytorch/chatglm.py CHANGED Viewed

@@ -148,6 +148,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                 def _stream_generator():
                     last_chunk_text_length = 0
+                    chunk_id = "chat-" + str(uuid.uuid1())
                     for chunk_text, _ in self._model.stream_chat(
                         self._tokenizer, prompt, chat_history, **kwargs
                     ):
@@ -157,7 +158,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                             text=chunk_text, index=0, logprobs=None, finish_reason=None
                         )
                         yield CompletionChunk(
-                            id=str(uuid.uuid1()),
+                            id=chunk_id,
                             object="text_completion",
                             created=int(time.time()),
                             model=self.model_uid,

xinference/model/llm/pytorch/internlm2.py CHANGED Viewed

@@ -118,6 +118,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
             def _stream_generator():
                 last_chunk_text_length = 0
+                chunk_id = "chat-" + str(uuid.uuid1())
                 for chunk_text, _ in self._model.stream_chat(
                     self._tokenizer, prompt, input_history, **kwargs
                 ):
@@ -127,7 +128,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
                         text=chunk_text, index=0, logprobs=None, finish_reason=None
                     )
                     yield CompletionChunk(
-                        id=str(uuid.uuid1()),
+                        id=chunk_id,
                         object="text_completion",
                         created=int(time.time()),
                         model=self.model_uid,

xinference/model/llm/sglang/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2024 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

Potentially problematic release.

xinference 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl