PyPI - xinference - Versions diffs - 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.3.1py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (45) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -5786,6 +5786,265 @@
       "<start_of_turn>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "gemma-3-1b-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-1b-it"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-1b-it-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+    "stop_token_ids": [
+      1,
+      105,
+      106
+    ],
+    "stop": [
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "gemma-3-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-4b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-12b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-27b-it"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-4b-it-{quantization}"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-12b-it-{quantization}"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-27b-it-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+    "stop_token_ids": [
+      1,
+      105,
+      106
+    ],
+    "stop": [
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -6923,7 +7182,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-1B"
+          "model_id": "OpenGVLab/InternVL2_5-1B-MPO"
         },
         {
           "model_format": "pytorch",
@@ -6933,7 +7192,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-2B"
+          "model_id": "OpenGVLab/InternVL2_5-2B-MPO"
         },
         {
           "model_format": "pytorch",
@@ -6943,7 +7202,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-4B"
+          "model_id": "OpenGVLab/InternVL2_5-4B-MPO"
         },
         {
           "model_format": "awq",
@@ -6961,7 +7220,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-8B"
+          "model_id": "OpenGVLab/InternVL2_5-8B-MPO"
         },
         {
           "model_format": "awq",
@@ -6969,7 +7228,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -6979,7 +7238,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-26B"
+          "model_id": "OpenGVLab/InternVL2_5-26B-MPO"
         },
         {
           "model_format": "awq",
@@ -6987,7 +7246,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -6997,7 +7256,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-38B"
+          "model_id": "OpenGVLab/InternVL2_5-38B-MPO"
         },
         {
           "model_format": "awq",
@@ -7005,7 +7264,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -7015,7 +7274,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-78B"
+          "model_id": "OpenGVLab/InternVL2_5-78B-MPO"
         },
         {
           "model_format": "awq",
@@ -7023,7 +7282,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
         }
     ],
     "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -7892,7 +8151,7 @@
         "model_id": "mlx-community/DeepSeek-V3-{quantization}"
       }
     ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <｜User｜> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.  Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables.  {{ tools }} {% endif %} {{ message.content }} {% if last %} <｜Assistant｜> {% endif %} {% elif message.role == \"assistant\" %} <｜Assistant｜> {% if message.tool_calls %} <｜tool▁calls▁begin｜> {% for tool in message.tool_calls %} <｜tool▁call▁begin｜> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <｜tool▁call▁end｜> {% endfor %} <｜tool▁calls▁end｜> {% else %} {{ message.content }} {% if not last %} <｜end▁of▁sentence｜> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <｜tool▁outputs▁begin｜> <｜tool▁output▁begin｜> {{ message.content }} <｜tool▁output▁end｜> <｜tool▁outputs▁end｜> {% if last and message.role != \"assistant\" %} <｜Assistant｜> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <｜User｜> {{ prompt }} {% endif %} <｜Assistant｜> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
     "stop_token_ids": [
       1
     ],
@@ -9449,7 +9708,7 @@
   },
   {
     "version": 1,
-    "context_length": 32768,
+    "context_length": 131072,
     "model_name": "QwQ-32B",
     "model_lang": [
       "en",
@@ -9496,15 +9755,99 @@
         "model_size_in_billions": 32,
         "quantizations": [
           "fp16",
-          "Q2_k",
-          "Q3_K_M",
-          "Q4_0",
-          "Q4_K_M",
-          "Q5_0",
-          "Q5_K_M",
-          "Q6_K",
-          "Q8_0"
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
         ],
+        "quantization_parts": {
+          "fp16": [
+            "00001-of-000017",
+            "00002-of-000017",
+            "00003-of-000017",
+            "00004-of-000017",
+            "00005-of-000017",
+            "00006-of-000017",
+            "00007-of-000017",
+            "00008-of-000017",
+            "00009-of-000017",
+            "00010-of-000017",
+            "00011-of-000017",
+            "00012-of-000017",
+            "00013-of-000017",
+            "00014-of-000017",
+            "00015-of-000017",
+            "00016-of-000017",
+            "00017-of-000017"
+          ],
+          "q2_k": [
+            "00001-of-00004",
+            "00002-of-00004",
+            "00003-of-00004",
+            "00004-of-00004"
+          ],
+          "q3_k_m": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "q4_0": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "q4_k_m": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "q5_0": [
+            "00001-of-00006",
+            "00002-of-00006",
+            "00003-of-00006",
+            "00004-of-00006",
+            "00005-of-00006",
+            "00006-of-00006"
+          ],
+          "q5_k_m": [
+            "00001-of-00006",
+            "00002-of-00006",
+            "00003-of-00006",
+            "00004-of-00006",
+            "00005-of-00006",
+            "00006-of-00006"
+          ],
+          "q6_k": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "q8_0": [
+            "00001-of-00009",
+            "00002-of-00009",
+            "00003-of-00009",
+            "00004-of-00009",
+            "00005-of-00009",
+            "00006-of-00009",
+            "00007-of-00009",
+            "00008-of-00009",
+            "00009-of-00009"
+          ]
+        },
         "model_id": "Qwen/QwQ-32B-GGUF",
         "model_file_name_template": "qwq-32b-{quantization}.gguf"
       }