PyPI - xinference - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

xinference 1.1.0py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (210) hide show

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -12,8 +12,24 @@
     ],
     "default_model_config": {
       "quantize": true,
-      "quantize_text_encoder": "text_encoder_2"
-    }
+      "quantize_text_encoder": "text_encoder_2",
+      "torch_dtype": "bfloat16"
+    },
+    "gguf_model_id": "Xorbits/FLUX.1-schnell-gguf",
+    "gguf_quantizations": [
+      "F16",
+      "Q2_K",
+      "Q3_K_S",
+      "Q4_0",
+      "Q4_1",
+      "Q4_K_S",
+      "Q5_0",
+      "Q5_1",
+      "Q5_K_S",
+      "Q6_K",
+      "Q8_0"
+    ],
+    "gguf_model_file_name_template": "flux1-schnell-{quantization}.gguf"
   },
   {
     "model_name": "FLUX.1-dev",
@@ -28,8 +44,24 @@
     ],
     "default_model_config": {
       "quantize": true,
-      "quantize_text_encoder": "text_encoder_2"
-    }
+      "quantize_text_encoder": "text_encoder_2",
+      "torch_dtype": "bfloat16"
+    },
+    "gguf_model_id": "AI-ModelScope/FLUX.1-dev-gguf",
+    "gguf_quantizations": [
+      "F16",
+      "Q2_K",
+      "Q3_K_S",
+      "Q4_0",
+      "Q4_1",
+      "Q4_K_S",
+      "Q5_0",
+      "Q5_1",
+      "Q5_K_S",
+      "Q6_K",
+      "Q8_0"
+    ],
+    "gguf_model_file_name_template": "flux1-dev-{quantization}.gguf"
   },
   {
     "model_name": "sd3-medium",
@@ -47,6 +79,120 @@
       "quantize_text_encoder": "text_encoder_3"
     }
   },
+  {
+    "model_name": "sd3.5-medium",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/stable-diffusion-3.5-medium",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image",
+      "image2image",
+      "inpainting"
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_3",
+      "torch_dtype": "bfloat16"
+    },
+    "gguf_model_id": "Xorbits/stable-diffusion-3.5-medium-gguf",
+    "gguf_quantizations": [
+      "F16",
+      "Q3_K_M",
+      "Q3_K_S",
+      "Q4_0",
+      "Q4_1",
+      "Q4_K_M",
+      "Q4_K_S",
+      "Q5_0",
+      "Q5_1",
+      "Q5_K_M",
+      "Q5_K_S",
+      "Q6_K",
+      "Q8_0"
+    ],
+    "gguf_model_file_name_template": "sd3.5_medium-{quantization}.gguf"
+  },
+  {
+    "model_name": "sd3.5-large",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/stable-diffusion-3.5-large",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image",
+      "image2image",
+      "inpainting"
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_3",
+      "torch_dtype": "bfloat16",
+      "transformer_nf4": true
+    },
+    "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-gguf",
+    "gguf_quantizations": [
+      "F16",
+      "Q4_0",
+      "Q4_1",
+      "Q5_0",
+      "Q5_1",
+      "Q8_0"
+    ],
+    "gguf_model_file_name_template": "sd3.5_large-{quantization}.gguf"
+  },
+  {
+    "model_name": "sd3.5-large-turbo",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/stable-diffusion-3.5-large-turbo",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image",
+      "image2image",
+      "inpainting"
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_3",
+      "torch_dtype": "bfloat16",
+      "transformer_nf4": true
+    },
+    "default_generate_config": {
+      "guidance_scale": 1.0,
+      "num_inference_steps": 4
+    },
+    "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-turbo-gguf",
+    "gguf_quantizations": [
+      "F16",
+      "Q4_0",
+      "Q4_1",
+      "Q5_0",
+      "Q5_1",
+      "Q8_0"
+    ],
+    "gguf_model_file_name_template": "sd3.5_large_turbo-{quantization}.gguf"
+  },
+  {
+    "model_name": "HunyuanDiT-v1.2",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "Xorbits/HunyuanDiT-v1.2-Diffusers",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image"
+    ]
+  },
+  {
+    "model_name": "HunyuanDiT-v1.2-Distilled",
+    "model_family": "stable_diffusion",
+    "model_hub": "modelscope",
+    "model_id": "Xorbits/HunyuanDiT-v1.2-Diffusers-Distilled",
+    "model_revision": "master",
+    "model_ability": [
+      "text2image"
+    ]
+  },
   {
     "model_name": "sd-turbo",
     "model_family": "stable_diffusion",

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -14,8 +14,10 @@
 import contextlib
 import gc
+import importlib
 import inspect
 import itertools
+import json
 import logging
 import os
 import re
@@ -86,6 +88,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
         model_spec: Optional["ImageModelFamilyV1"] = None,
+        gguf_model_path: Optional[str] = None,
         **kwargs,
     ):
         self._model_uid = model_uid
@@ -109,6 +112,8 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
         self._model_spec = model_spec
         self._abilities = model_spec.model_ability or []  # type: ignore
         self._kwargs = kwargs
+        # gguf
+        self._gguf_model_path = gguf_model_path
     @property
     def model_ability(self):
@@ -184,7 +189,17 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
             self._model.fuse_lora(**self._lora_fuse_kwargs)
             logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
+    def _get_layer_cls(self, layer: str):
+        with open(os.path.join(self._model_path, "model_index.json")) as f:  # type: ignore
+            model_index = json.load(f)
+            layer_info = model_index[layer]
+            module_name, class_name = layer_info
+            module = importlib.import_module(module_name)
+            return getattr(module, class_name)
     def load(self):
+        from transformers import BitsAndBytesConfig, T5EncoderModel
         if "text2image" in self._abilities or "image2image" in self._abilities:
             from diffusers import AutoPipelineForText2Image as AutoPipelineModel
         elif "inpainting" in self._abilities:
@@ -200,7 +215,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
                 glob(os.path.join(self._model_path, "*/*.safetensors"))
             )
         if isinstance(torch_dtype, str):
-            self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
+            self._torch_dtype = torch_dtype = self._kwargs["torch_dtype"] = getattr(
+                torch, torch_dtype
+            )
         controlnet = self._kwargs.get("controlnet")
         if controlnet is not None:
@@ -212,18 +229,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
                 ]
         quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
-        if quantize_text_encoder:
-            try:
-                from transformers import BitsAndBytesConfig, T5EncoderModel
-            except ImportError:
-                error_message = "Failed to import module 'transformers'"
-                installation_guide = [
-                    "Please make sure 'transformers' is installed. ",
-                    "You can install it by `pip install transformers`\n",
-                ]
-                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        if quantize_text_encoder and not self._gguf_model_path:
             try:
                 import bitsandbytes  # noqa: F401
             except ImportError:
@@ -249,6 +255,32 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
                 self._kwargs[text_encoder_name] = text_encoder
                 self._kwargs["device_map"] = "balanced"
+        if self._gguf_model_path:
+            from diffusers import GGUFQuantizationConfig
+            # GGUF transformer
+            self._kwargs["transformer"] = self._get_layer_cls(
+                "transformer"
+            ).from_single_file(
+                self._gguf_model_path,
+                quantization_config=GGUFQuantizationConfig(compute_dtype=torch_dtype),
+                torch_dtype=torch_dtype,
+                config=os.path.join(self._model_path, "transformer"),
+            )
+        elif self._kwargs.get("transformer_nf4"):
+            nf4_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch_dtype,
+            )
+            model_nf4 = self._get_layer_cls("transformer").from_pretrained(
+                self._model_path,
+                subfolder="transformer",
+                quantization_config=nf4_config,
+                torch_dtype=torch_dtype,
+            )
+            self._kwargs["transformer"] = model_nf4
         logger.debug(
             "Loading model from %s, kwargs: %s", self._model_path, self._kwargs
         )

xinference/model/llm/__init__.py CHANGED Viewed

@@ -134,6 +134,7 @@ def _install():
     from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
     from .sglang.core import SGLANGChatModel, SGLANGModel
     from .transformers.chatglm import ChatglmPytorchChatModel
+    from .transformers.cogagent import CogAgentChatModel
     from .transformers.cogvlm2 import CogVLM2Model
     from .transformers.cogvlm2_video import CogVLM2VideoModel
     from .transformers.core import PytorchChatModel, PytorchModel
@@ -195,6 +196,7 @@ def _install():
             DeepSeekV2PytorchChatModel,
             OptPytorchModel,
             GlmEdgeVModel,
+            CogAgentChatModel,
         ]
     )
     if OmniLMMModel:  # type: ignore

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -8942,5 +8942,148 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "QvQ-72B-Preview",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/QVQ-72B-Preview"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/QVQ-72B-Preview-{quantization}"
+      }
+    ],
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "marco-o1",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "AIDC-AI/Marco-o1"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "QuantFactory/Marco-o1-GGUF",
+        "model_file_name_template": "Marco-o1.{quantization}.gguf"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手，你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n        \n## 重要！！！！！\n当你回答问题时，你的思考应该在<Thought>内完成，<Output>内输出你的结果。\n<Thought>应该尽可能是英文，但是有2个特例，一个是对原文中的引用，另一个是是数学应该使用markdown格式，<Output>内的输出需要遵循用户输入的语言。\n        <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151643,
+      151644,
+      151645
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|im_start|>",
+      "<|im_end|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "cogagent",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "9",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/cogagent-9b-20241220"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151329,
+      151336,
+      151338
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|user|>",
+      "<|observation|>"
+    ]
   }
 ]

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -972,46 +972,25 @@ def match_llm(
         return spec
     # priority: download_hub > download_from_modelscope() and download_from_csghub()
-    if download_hub == "modelscope":
-        all_families = (
-            BUILTIN_MODELSCOPE_LLM_FAMILIES
-            + BUILTIN_LLM_FAMILIES
-            + user_defined_llm_families
-        )
-    elif download_hub == "openmind_hub":
-        all_families = (
-            BUILTIN_OPENMIND_HUB_LLM_FAMILIES
-            + BUILTIN_LLM_FAMILIES
-            + user_defined_llm_families
-        )
-    elif download_hub == "csghub":
-        all_families = (
-            BUILTIN_CSGHUB_LLM_FAMILIES
-            + BUILTIN_LLM_FAMILIES
-            + user_defined_llm_families
-        )
-    elif download_hub == "huggingface":
-        all_families = BUILTIN_LLM_FAMILIES + user_defined_llm_families
+    # set base model
+    base_families = BUILTIN_LLM_FAMILIES + user_defined_llm_families
+    hub_families_map = {
+        "modelscope": BUILTIN_MODELSCOPE_LLM_FAMILIES,
+        "openmind_hub": BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
+        "csghub": BUILTIN_CSGHUB_LLM_FAMILIES,
+    }
+    if download_hub == "huggingface":
+        all_families = base_families
+    elif download_hub in hub_families_map:
+        all_families = hub_families_map[download_hub] + base_families
     elif download_from_modelscope():
-        all_families = (
-            BUILTIN_MODELSCOPE_LLM_FAMILIES
-            + BUILTIN_LLM_FAMILIES
-            + user_defined_llm_families
-        )
+        all_families = BUILTIN_MODELSCOPE_LLM_FAMILIES + base_families
     elif download_from_openmind_hub():
-        all_families = (
-            BUILTIN_OPENMIND_HUB_LLM_FAMILIES
-            + BUILTIN_LLM_FAMILIES
-            + user_defined_llm_families
-        )
+        all_families = BUILTIN_OPENMIND_HUB_LLM_FAMILIES + base_families
     elif download_from_csghub():
-        all_families = (
-            BUILTIN_CSGHUB_LLM_FAMILIES
-            + BUILTIN_LLM_FAMILIES
-            + user_defined_llm_families
-        )
+        all_families = BUILTIN_CSGHUB_LLM_FAMILIES + base_families
     else:
-        all_families = BUILTIN_LLM_FAMILIES + user_defined_llm_families
+        all_families = base_families
     for family in all_families:
         if model_name != family.model_name:

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -6673,5 +6673,153 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "QvQ-72B-Preview",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/QVQ-72B-Preview",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/QVQ-72B-Preview-{quantization}",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
+   {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "marco-o1",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "AIDC-AI/Marco-o1",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_file_name_template": "Marco-o1.{quantization}.gguf",
+        "model_hub": "modelscope",
+        "model_id": "QuantFactory/Marco-o1-GGUF"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手，你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n        \n## 重要！！！！！\n当你回答问题时，你的思考应该在<Thought>内完成，<Output>内输出你的结果。\n<Thought>应该尽可能是英文，但是有2个特例，一个是对原文中的引用，另一个是是数学应该使用markdown格式，<Output>内的输出需要遵循用户输入的语言。\n        <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151643,
+      151644,
+      151645
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|im_start|>",
+      "<|im_end|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "cogagent",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "9",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "ZhipuAI/cogagent-9b-20241220",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151329,
+      151336,
+      151338
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|user|>",
+      "<|observation|>"
+    ]
   }
 ]

xinference 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

Potentially problematic release.

xinference 1.1.0py3-none-any.whl → 1.2.0py3-none-any.whl