PyPI - xinference - Versions diffs - 0.14.0.post1__py3-none-any.whl → 0.14.1__py3-none-any.whl - Mend

xinference 0.14.0.post1py3-none-any.whl → 0.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (50) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +54 -0
xinference/client/handlers.py +0 -3
xinference/client/restful/restful_client.py +51 -134
xinference/constants.py +1 -0
xinference/core/chat_interface.py +1 -4
xinference/core/image_interface.py +33 -5
xinference/core/model.py +28 -2
xinference/core/supervisor.py +37 -0
xinference/core/worker.py +128 -84
xinference/deploy/cmdline.py +1 -4
xinference/model/audio/core.py +11 -3
xinference/model/audio/funasr.py +114 -0
xinference/model/audio/model_spec.json +20 -0
xinference/model/audio/model_spec_modelscope.json +21 -0
xinference/model/audio/whisper.py +1 -1
xinference/model/core.py +12 -0
xinference/model/image/core.py +3 -4
xinference/model/image/model_spec.json +41 -13
xinference/model/image/model_spec_modelscope.json +30 -10
xinference/model/image/stable_diffusion/core.py +53 -2
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +83 -1
xinference/model/llm/llm_family_modelscope.json +85 -1
xinference/model/llm/pytorch/core.py +1 -0
xinference/model/llm/pytorch/minicpmv26.py +247 -0
xinference/model/llm/sglang/core.py +72 -34
xinference/model/llm/vllm/core.py +38 -0
xinference/model/video/__init__.py +62 -0
xinference/model/video/core.py +178 -0
xinference/model/video/diffusers.py +180 -0
xinference/model/video/model_spec.json +11 -0
xinference/model/video/model_spec_modelscope.json +12 -0
xinference/types.py +10 -24
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.ef2a203a.js → main.17ca0398.js} +3 -3
xinference/web/ui/build/static/js/main.17ca0398.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +1 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/METADATA +14 -8
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/RECORD +47 -40
xinference/web/ui/build/static/js/main.ef2a203a.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +0 -1
/xinference/web/ui/build/static/js/{main.ef2a203a.js.LICENSE.txt → main.17ca0398.js.LICENSE.txt} +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/LICENSE +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/WHEEL +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/entry_points.txt +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/top_level.txt +0 -0

xinference/model/image/model_spec.json CHANGED Viewed

@@ -1,11 +1,29 @@
 [
+  {
+    "model_name": "FLUX.1-schnell",
+    "model_family": "stable_diffusion",
+    "model_id": "black-forest-labs/FLUX.1-schnell",
+    "model_revision": "768d12a373ed5cc9ef9a9dea7504dc09fcc14842",
+    "model_ability": [
+      "text2image"
+    ]
+  },
+  {
+    "model_name": "FLUX.1-dev",
+    "model_family": "stable_diffusion",
+    "model_id": "black-forest-labs/FLUX.1-dev",
+    "model_revision": "01aa605f2c300568dd6515476f04565a954fcb59",
+    "model_ability": [
+      "text2image"
+    ]
+  },
   {
     "model_name": "sd3-medium",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-3-medium-diffusers",
     "model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671",
-    "abilities": [
-      "text2iamge",
+    "model_ability": [
+      "text2image",
       "image2image"
     ]
   },
@@ -14,8 +32,8 @@
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/sd-turbo",
     "model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c",
-    "abilities": [
-      "text2iamge"
+    "model_ability": [
+      "text2image"
     ]
   },
   {
@@ -23,8 +41,8 @@
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/sdxl-turbo",
     "model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b",
-    "abilities": [
-      "text2iamge"
+    "model_ability": [
+      "text2image"
     ]
   },
   {
@@ -32,8 +50,8 @@
     "model_family": "stable_diffusion",
     "model_id": "runwayml/stable-diffusion-v1-5",
     "model_revision": "1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9",
-    "abilities": [
-      "text2iamge",
+    "model_ability": [
+      "text2image",
       "image2image"
     ],
     "controlnet": [
@@ -86,8 +104,8 @@
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-xl-base-1.0",
     "model_revision": "f898a3e026e802f68796b95e9702464bac78d76f",
-    "abilities": [
-      "text2iamge",
+    "model_ability": [
+      "text2image",
       "image2image"
     ],
     "controlnet": [
@@ -111,12 +129,22 @@
       }
     ]
   },
+  {
+    "model_name": "kolors",
+    "model_family": "stable_diffusion",
+    "model_id": "Kwai-Kolors/Kolors-diffusers",
+    "model_revision": "7e091c75199e910a26cd1b51ed52c28de5db3711",
+    "model_ability": [
+      "text2image",
+      "image2image"
+    ]
+  },
   {
     "model_name": "stable-diffusion-inpainting",
     "model_family": "stable_diffusion",
     "model_id": "runwayml/stable-diffusion-inpainting",
     "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
-    "abilities": [
+    "model_ability": [
       "inpainting"
     ]
   },
@@ -125,7 +153,7 @@
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-2-inpainting",
     "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
-    "abilities": [
+    "model_ability": [
       "inpainting"
     ]
   },
@@ -134,7 +162,7 @@
     "model_family": "stable_diffusion",
     "model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
     "model_revision": "115134f363124c53c7d878647567d04daf26e41e",
-    "abilities": [
+    "model_ability": [
       "inpainting"
     ]
   }

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -1,12 +1,32 @@
 [
+  {
+    "model_name": "FLUX.1-schnell",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/FLUX.1-schnell",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image"
+    ]
+  },
+  {
+    "model_name": "FLUX.1-dev",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/FLUX.1-dev",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image"
+    ]
+  },
   {
     "model_name": "sd3-medium",
     "model_family": "stable_diffusion",
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/stable-diffusion-3-medium-diffusers",
     "model_revision": "master",
-    "abilities": [
-      "text2iamge",
+    "model_ability": [
+      "text2image",
       "image2image"
     ]
   },
@@ -16,8 +36,8 @@
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/sd-turbo",
     "model_revision": "master",
-    "abilities": [
-      "text2iamge"
+    "model_ability": [
+      "text2image"
     ]
   },
   {
@@ -26,8 +46,8 @@
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/sdxl-turbo",
     "model_revision": "master",
-    "abilities": [
-      "text2iamge"
+    "model_ability": [
+      "text2image"
     ]
   },
   {
@@ -36,8 +56,8 @@
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/stable-diffusion-v1-5",
     "model_revision": "master",
-    "abilities": [
-      "text2iamge",
+    "model_ability": [
+      "text2image",
       "image2image"
     ],
     "controlnet": [
@@ -91,8 +111,8 @@
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/stable-diffusion-xl-base-1.0",
     "model_revision": "master",
-    "abilities": [
-      "text2iamge",
+    "model_ability": [
+      "text2image",
       "image2image"
     ],
     "controlnet": [

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -51,7 +51,7 @@ class DiffusionModel:
         self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
-        self._abilities = abilities
+        self._abilities = abilities or []
         self._kwargs = kwargs
     def _apply_lora(self):
@@ -88,7 +88,48 @@ class DiffusionModel:
         if sys.platform != "darwin" and torch_dtype is None:
             # The following params crashes on Mac M2
             self._kwargs["torch_dtype"] = torch.float16
+            self._kwargs["variant"] = "fp16"
             self._kwargs["use_safetensors"] = True
+        if isinstance(torch_dtype, str):
+            self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
+        quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
+        if quantize_text_encoder:
+            try:
+                from transformers import BitsAndBytesConfig, T5EncoderModel
+            except ImportError:
+                error_message = "Failed to import module 'transformers'"
+                installation_guide = [
+                    "Please make sure 'transformers' is installed. ",
+                    "You can install it by `pip install transformers`\n",
+                ]
+                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+            try:
+                import bitsandbytes  # noqa: F401
+            except ImportError:
+                error_message = "Failed to import module 'bitsandbytes'"
+                installation_guide = [
+                    "Please make sure 'bitsandbytes' is installed. ",
+                    "You can install it by `pip install bitsandbytes`\n",
+                ]
+                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+            for text_encoder_name in quantize_text_encoder.split(","):
+                quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+                quantization_kwargs = {}
+                if torch_dtype:
+                    quantization_kwargs["torch_dtype"] = torch_dtype
+                text_encoder = T5EncoderModel.from_pretrained(
+                    self._model_path,
+                    subfolder=text_encoder_name,
+                    quantization_config=quantization_config,
+                    **quantization_kwargs,
+                )
+                self._kwargs[text_encoder_name] = text_encoder
+                self._kwargs["device_map"] = "balanced"
         logger.debug("Loading model %s", AutoPipelineModel)
         self._model = AutoPipelineModel.from_pretrained(
@@ -98,7 +139,7 @@ class DiffusionModel:
         if self._kwargs.get("cpu_offload", False):
             logger.debug("CPU offloading model")
             self._model.enable_model_cpu_offload()
-        else:
+        elif not self._kwargs.get("device_map"):
             logger.debug("Loading model to available device")
             self._model = move_model_to_available_device(self._model)
         # Recommended if your computer has < 64 GB of RAM
@@ -141,6 +182,12 @@ class DiffusionModel:
         else:
             raise ValueError(f"Unsupported response format: {response_format}")
+    @classmethod
+    def _filter_kwargs(cls, kwargs: dict):
+        for arg in ["negative_prompt", "num_inference_steps"]:
+            if not kwargs.get(arg):
+                kwargs.pop(arg, None)
     def text_to_image(
         self,
         prompt: str,
@@ -152,6 +199,7 @@ class DiffusionModel:
         # References:
         # https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
         width, height = map(int, re.split(r"[^\d]+", size))
+        self._filter_kwargs(kwargs)
         return self._call_model(
             prompt=prompt,
             height=height,
@@ -174,6 +222,8 @@ class DiffusionModel:
         if "controlnet" in self._kwargs:
             model = self._model
         else:
+            if "image2image" not in self._abilities:
+                raise RuntimeError(f"{self._model_uid} does not support image2image")
             if self._i2i_model is not None:
                 model = self._i2i_model
             else:
@@ -186,6 +236,7 @@ class DiffusionModel:
             width, height = map(int, re.split(r"[^\d]+", size))
             kwargs["width"] = width
             kwargs["height"] = height
+        self._filter_kwargs(kwargs)
         return self._call_model(
             image=image,
             prompt=prompt,

xinference/model/llm/__init__.py CHANGED Viewed

@@ -125,6 +125,7 @@ def _install():
     from .pytorch.internlm2 import Internlm2PytorchChatModel
     from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
     from .pytorch.minicpmv25 import MiniCPMV25Model
+    from .pytorch.minicpmv26 import MiniCPMV26Model
     from .pytorch.qwen_vl import QwenVLChatModel
     from .pytorch.vicuna import VicunaPytorchChatModel
     from .pytorch.yi_vl import YiVLChatModel
@@ -167,6 +168,7 @@ def _install():
             PytorchModel,
             CogVLM2Model,
             MiniCPMV25Model,
+            MiniCPMV26Model,
             Glm4VModel,
         ]
     )

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -1797,6 +1797,16 @@
           "none"
         ],
         "model_id": "meta-llama/Meta-Llama-3.1-70B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.1-405B"
       }
     ]
   },
@@ -1975,6 +1985,32 @@
           "none"
         ],
         "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
       }
     ],
     "prompt_style": {
@@ -6771,7 +6807,7 @@
   },
   {
     "version":1,
-    "context_length":2048,
+    "context_length":8192,
     "model_name":"MiniCPM-Llama3-V-2_5",
     "model_lang":[
       "en",
@@ -6811,6 +6847,52 @@
       ]
     }
   },
+  {
+    "version":1,
+    "context_length":32768,
+    "model_name":"MiniCPM-V-2.6",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"openbmb/MiniCPM-V-2_6",
+        "model_revision":"3f7a8da1b7a8b928b5ee229fae33cf43fd64cf31"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "4-bit"
+        ],
+        "model_id":"openbmb/MiniCPM-V-2_6-int4",
+        "model_revision":"051e2df6505f1fc4305f2c9bd42ed90db8bf4874"
+      }
+    ],
+    "prompt_style":{
+      "style_name":"QWEN",
+      "system_prompt":"You are a helpful assistant",
+      "roles":[
+        "user",
+        "assistant"
+      ],
+      "stop": [
+        "<|im_end|>",
+        "<|endoftext|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -234,6 +234,17 @@
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-70B",
         "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3.1-405B",
+        "model_hub": "modelscope"
       }
     ]
   },
@@ -325,6 +336,35 @@
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
         "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 405,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4",
+        "model_hub": "modelscope"
       }
     ],
     "prompt_style": {
@@ -4509,7 +4549,7 @@
   },
   {
     "version":1,
-    "context_length":2048,
+    "context_length":8192,
     "model_name":"MiniCPM-Llama3-V-2_5",
     "model_lang":[
       "en",
@@ -4551,6 +4591,50 @@
       ]
     }
   },
+  {
+    "version":1,
+    "context_length":32768,
+    "model_name":"MiniCPM-V-2.6",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id":"OpenBMB/MiniCPM-V-2_6",
+        "model_revision":"master"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "4-bit"
+        ],
+        "model_hub": "modelscope",
+        "model_id":"OpenBMB/MiniCPM-V-2_6-int4",
+        "model_revision":"master"
+      }
+    ],
+    "prompt_style":{
+      "style_name":"QWEN",
+      "system_prompt":"You are a helpful assistant",
+      "roles":[
+        "user",
+        "assistant"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,

xinference/model/llm/pytorch/core.py CHANGED Viewed

@@ -72,6 +72,7 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
     "mini-internvl-chat",
     "cogvlm2",
     "MiniCPM-Llama3-V-2_5",
+    "MiniCPM-V-2.6",
     "glm-4v",
 ]

xinference 0.14.0.post1__py3-none-any.whl → 0.14.1__py3-none-any.whl

Potentially problematic release.

xinference 0.14.0.post1py3-none-any.whl → 0.14.1py3-none-any.whl