PyPI - xinference - Versions diffs - 0.11.2.post1__py3-none-any.whl → 0.11.3__py3-none-any.whl - Mend

xinference 0.11.2.post1py3-none-any.whl → 0.11.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (22) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +14 -8
xinference/constants.py +4 -0
xinference/core/__init__.py +0 -2
xinference/core/cache_tracker.py +22 -1
xinference/core/chat_interface.py +71 -10
xinference/core/supervisor.py +5 -3
xinference/core/worker.py +8 -3
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +336 -39
xinference/model/llm/llm_family_modelscope.json +267 -1
xinference/model/llm/pytorch/baichuan.py +2 -1
xinference/model/llm/pytorch/cogvlm2.py +257 -0
xinference/model/llm/pytorch/core.py +1 -0
xinference/model/llm/pytorch/intern_vl.py +5 -10
xinference/model/llm/vllm/core.py +1 -1
{xinference-0.11.2.post1.dist-info → xinference-0.11.3.dist-info}/METADATA +4 -4
{xinference-0.11.2.post1.dist-info → xinference-0.11.3.dist-info}/RECORD +22 -21
{xinference-0.11.2.post1.dist-info → xinference-0.11.3.dist-info}/LICENSE +0 -0
{xinference-0.11.2.post1.dist-info → xinference-0.11.3.dist-info}/WHEEL +0 -0
{xinference-0.11.2.post1.dist-info → xinference-0.11.3.dist-info}/entry_points.txt +0 -0
{xinference-0.11.2.post1.dist-info → xinference-0.11.3.dist-info}/top_level.txt +0 -0

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -3891,6 +3891,201 @@
         ],
         "model_id": "01-ai/Yi-1.5-34B-Chat",
         "model_revision": "fa695ee438bfcd0ec2b378fa1c7e0dea1b40393e"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Q3_K_L",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "lmstudio-community/Yi-1.5-6B-Chat-GGUF",
+        "model_file_name_template": "Yi-1.5-6B-Chat-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q3_K_L",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "lmstudio-community/Yi-1.5-9B-Chat-GGUF",
+        "model_file_name_template": "Yi-1.5-9B-Chat-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "lmstudio-community/Yi-1.5-34B-Chat-GGUF",
+        "model_file_name_template": "Yi-1.5-34B-Chat-{quantization}.gguf"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "modelscope/Yi-1.5-6B-Chat-GPTQ",
+        "model_revision": "2ad3a602e64d1c79e28e6e92beced2935047367c"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "modelscope/Yi-1.5-9B-Chat-GPTQ",
+        "model_revision": "76f47d16982923f7b6674c4e23ddac7c3b1d2e03"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "modelscope/Yi-1.5-34B-Chat-GPTQ",
+        "model_revision": "173fb4036265b2dac1d6296a8e2fd2f652c19968"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "modelscope/Yi-1.5-6B-Chat-AWQ",
+        "model_revision": "23bf37f1666874e15e239422de0d3948d8735fa9"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "modelscope/Yi-1.5-9B-Chat-AWQ",
+        "model_revision": "2605f388332672789eae1f422644add2901b433f"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
+        "model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATML",
+      "system_prompt": "",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        2,
+        6,
+        7,
+        8
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "Yi-1.5-chat-16k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-9B-Chat-16K",
+        "model_revision": "551220fb24d69b6bfec5defceeb160395ce5da8d"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-34B-Chat-16K",
+        "model_revision": "dfdbc67be750972bfcc1ac7ffd7fe48689c856fd"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "QuantFactory/Yi-1.5-9B-Chat-16K-GGUF",
+        "model_file_name_template": "Yi-1.5-9B-Chat-16K.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
+        "model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
       }
     ],
     "prompt_style": {
@@ -6009,23 +6204,32 @@
     ],
     "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
     "model_specs": [
+      {
+          "model_format": "pytorch",
+          "model_size_in_billions": 2,
+          "quantizations": [
+              "none"
+          ],
+          "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
+          "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
+        },
         {
-            "model_format": "pytorch",
-            "model_size_in_billions": 26,
-            "quantizations": [
-                "none"
-            ],
-            "model_id": "OpenGVLab/InternVL-Chat-V1-5",
-            "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
+          "model_format": "pytorch",
+          "model_size_in_billions": 26,
+          "quantizations": [
+              "none"
+          ],
+          "model_id": "OpenGVLab/InternVL-Chat-V1-5",
+          "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
         },
         {
-            "model_format": "pytorch",
-            "model_size_in_billions": 26,
-            "quantizations": [
-                "Int8"
-            ],
-            "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
-            "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
+          "model_format": "pytorch",
+          "model_size_in_billions": 26,
+          "quantizations": [
+              "Int8"
+          ],
+          "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
+          "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
         }
     ],
     "prompt_style": {
@@ -6043,11 +6247,11 @@
             "<|im_end|>"
         ]
     }
-},
+  },
   {
     "version": 1,
-    "context_length": 32768,
-    "model_name": "mini-internvl-chat",
+    "context_length": 8192,
+    "model_name": "cogvlm2",
     "model_lang": [
         "en",
         "zh"
@@ -6056,32 +6260,125 @@
         "chat",
         "vision"
     ],
-    "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
     "model_specs": [
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 2,
-            "quantizations": [
-                "none"
-            ],
-            "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
-            "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
-        }
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B",
+        "model_revision": "d88b352bce5ee58a289b1ac8328553eb31efa2ef"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "int4"
+        ],
+        "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantizations}",
+        "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
+      }
     ],
     "prompt_style": {
-        "style_name": "INTERNLM2",
-        "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
-        "roles": [
-            "<|im_start|>user",
-            "<|im_start|>assistant"
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "telechat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
         ],
-        "intra_message_sep": "<|im_end|>",
-        "stop_token_ids": [
-            92542
+        "model_id": "Tele-AI/telechat-7B"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "int4",
+          "int8"
         ],
-        "stop": [
-            "<|im_end|>"
-        ]
+        "model_id": "Tele-AI/telechat-7B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Tele-AI/TeleChat-12B"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "int4",
+          "int8"
+        ],
+        "model_id": "Tele-AI/TeleChat-12B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 52,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Tele-AI/TeleChat-52B"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "NO_COLON_TWO",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "<_user>",
+        "<_bot>"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "",
+      "stop": [
+        "<_end>",
+        "<_start>"
+      ],
+      "stop_token_ids": [
+        160133,
+        160132
+      ]
     }
-}
+  }
 ]

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -1496,6 +1496,127 @@
         "model_hub": "modelscope",
         "model_id": "01ai/Yi-1.5-34B-Chat",
         "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-GPTQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-GPTQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-GPTQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-AWQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-AWQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-AWQ",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATML",
+      "system_prompt": "",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        2,
+        6,
+        7,
+        8
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "Yi-1.5-chat-16k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-1.5-9B-Chat-16K",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-1.5-34B-Chat-16K",
+        "model_revision": "master"
       }
     ],
     "prompt_style": {
@@ -3739,5 +3860,150 @@
             "<|im_end|>"
         ]
     }
-}
+},
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "int4"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "telechat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TeleAI/telechat-7B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "int4",
+          "int8"
+        ],
+        "model_id": "TeleAI/telechat-7B-{quantization}",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TeleAI/TeleChat-12B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "int4",
+          "int8"
+        ],
+        "model_id": "TeleAI/TeleChat-12B-{quantization}",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 52,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "TeleAI/TeleChat-52B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "NO_COLON_TWO",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "<_user>",
+        "<_bot>"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "",
+      "stop": [
+        "<_end>",
+        "<_start>"
+      ],
+      "stop_token_ids": [
+        160133,
+        160132
+      ]
+    }
+  }
 ]

xinference 0.11.2.post1__py3-none-any.whl → 0.11.3__py3-none-any.whl

Potentially problematic release.

xinference 0.11.2.post1py3-none-any.whl → 0.11.3py3-none-any.whl