PyPI - xinference - Versions diffs - 0.11.2.post1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

xinference 0.11.2.post1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (36) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +83 -8
xinference/client/restful/restful_client.py +70 -0
xinference/constants.py +8 -0
xinference/core/__init__.py +0 -2
xinference/core/cache_tracker.py +22 -1
xinference/core/chat_interface.py +71 -10
xinference/core/model.py +141 -12
xinference/core/scheduler.py +428 -0
xinference/core/supervisor.py +31 -3
xinference/core/worker.py +8 -3
xinference/isolation.py +9 -2
xinference/model/audio/chattts.py +84 -0
xinference/model/audio/core.py +10 -3
xinference/model/audio/model_spec.json +20 -0
xinference/model/llm/__init__.py +6 -0
xinference/model/llm/llm_family.json +1063 -260
xinference/model/llm/llm_family_modelscope.json +686 -13
xinference/model/llm/pytorch/baichuan.py +2 -1
xinference/model/llm/pytorch/chatglm.py +2 -1
xinference/model/llm/pytorch/cogvlm2.py +316 -0
xinference/model/llm/pytorch/core.py +92 -6
xinference/model/llm/pytorch/glm4v.py +258 -0
xinference/model/llm/pytorch/intern_vl.py +5 -10
xinference/model/llm/pytorch/minicpmv25.py +232 -0
xinference/model/llm/pytorch/utils.py +386 -2
xinference/model/llm/vllm/core.py +7 -1
xinference/thirdparty/ChatTTS/__init__.py +1 -0
xinference/thirdparty/ChatTTS/core.py +200 -0
xinference/types.py +3 -0
{xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/METADATA +28 -11
{xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/RECORD +36 -29
{xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/LICENSE +0 -0
{xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/WHEEL +0 -0
{xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/entry_points.txt +0 -0
{xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/top_level.txt +0 -0

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -522,6 +522,142 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "glm4-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/glm-4-9b-chat",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        151329,
+        151336,
+        151338
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 1048576,
+    "model_name": "glm4-chat-1m",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/glm-4-9b-chat-1m",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        151329,
+        151336,
+        151338
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "glm-4v",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/glm-4v-9b",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        151329,
+        151336,
+        151338
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -1496,6 +1632,127 @@
         "model_hub": "modelscope",
         "model_id": "01ai/Yi-1.5-34B-Chat",
         "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-GPTQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-GPTQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-GPTQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-AWQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-AWQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-AWQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATML",
+      "system_prompt": "",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        2,
+        6,
+        7,
+        8
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "Yi-1.5-chat-16k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-1.5-9B-Chat-16K",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-1.5-34B-Chat-16K",
+        "model_revision": "master"
       }
     ],
     "prompt_style": {
@@ -2529,39 +2786,266 @@
   },
   {
     "version": 1,
-    "context_length": 4096,
-    "model_name": "deepseek-vl-chat",
+    "context_length": 32768,
+    "model_name": "qwen2-instruct",
     "model_lang": [
       "en",
       "zh"
     ],
     "model_ability": [
       "chat",
-      "vision"
+      "tools"
     ],
-    "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
+    "model_description": "Qwen2 is the new series of Qwen large language models",
     "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": "1_3",
+        "model_size_in_billions": "0_5",
         "quantizations": [
+          "4-bit",
+          "8-bit",
           "none"
         ],
-        "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
+        "model_id": "qwen/Qwen2-0.5B-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2-1.5B-Instruct",
         "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
+          "4-bit",
+          "8-bit",
           "none"
         ],
-        "model_id": "deepseek-ai/deepseek-vl-7b-chat",
+        "model_id": "qwen/Qwen2-7B-Instruct",
         "model_hub": "modelscope"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "DEEPSEEK_CHAT",
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2-72B-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2-0.5B-Instruct-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2-1.5B-Instruct-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2-7B-Instruct-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2-72B-Instruct-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2-0.5B-Instruct-AWQ",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2-1.5B-Instruct-AWQ",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2-7B-Instruct-AWQ",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2-72B-Instruct-AWQ",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0",
+          "fp16"
+        ],
+        "model_id": "qwen/Qwen2-0.5B-Instruct-GGUF",
+        "model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "QWEN",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n",
+      "stop_token_ids": [
+        151643,
+        151644,
+        151645
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "qwen2-moe-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Qwen2 is the new series of Qwen large language models. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2-57B-A14B-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "QWEN",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n",
+      "stop_token_ids": [
+        151643,
+        151644,
+        151645
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-7b-chat",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "DEEPSEEK_CHAT",
       "system_prompt": "<｜begin▁of▁sentence｜>",
       "roles": [
         "User",
@@ -3115,7 +3599,7 @@
       "chat",
       "vision"
     ],
-    "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
+    "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
     "model_specs":[
       {
         "model_format":"pytorch",
@@ -3347,6 +3831,50 @@
       ]
     }
   },
+  {
+    "version":1,
+    "context_length":2048,
+    "model_name":"MiniCPM-Llama3-V-2_5",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id":"OpenBMB/MiniCPM-Llama3-V-2_5",
+        "model_revision":"master"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "int4"
+        ],
+        "model_hub": "modelscope",
+        "model_id":"OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
+        "model_revision":"master"
+      }
+    ],
+    "prompt_style":{
+      "style_name":"OmniLMM",
+      "system_prompt":"The role of first msg should be user",
+      "roles":[
+        "user",
+        "assistant"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -3739,5 +4267,150 @@
             "<|im_end|>"
         ]
     }
-}
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "int4"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "telechat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TeleAI/telechat-7B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "int4",
+          "int8"
+        ],
+        "model_id": "TeleAI/telechat-7B-{quantization}",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TeleAI/TeleChat-12B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "int4",
+          "int8"
+        ],
+        "model_id": "TeleAI/TeleChat-12B-{quantization}",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 52,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TeleAI/TeleChat-52B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "NO_COLON_TWO",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "<_user>",
+        "<_bot>"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "",
+      "stop": [
+        "<_end>",
+        "<_start>"
+      ],
+      "stop_token_ids": [
+        160133,
+        160132
+      ]
+    }
+  }
 ]

xinference 0.11.2.post1__py3-none-any.whl → 0.12.0__py3-none-any.whl

Potentially problematic release.

xinference 0.11.2.post1py3-none-any.whl → 0.12.0py3-none-any.whl