PyPI - xinference - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +4 -7
xinference/client/handlers.py +3 -0
xinference/core/chat_interface.py +6 -1
xinference/core/model.py +2 -0
xinference/core/scheduler.py +4 -7
xinference/core/supervisor.py +114 -23
xinference/core/worker.py +70 -4
xinference/deploy/local.py +2 -1
xinference/model/audio/core.py +11 -0
xinference/model/audio/cosyvoice.py +16 -5
xinference/model/audio/kokoro.py +139 -0
xinference/model/audio/melotts.py +110 -0
xinference/model/audio/model_spec.json +80 -0
xinference/model/audio/model_spec_modelscope.json +18 -0
xinference/model/audio/whisper.py +35 -10
xinference/model/llm/llama_cpp/core.py +21 -14
xinference/model/llm/llm_family.json +527 -1
xinference/model/llm/llm_family.py +4 -1
xinference/model/llm/llm_family_modelscope.json +495 -3
xinference/model/llm/memory.py +1 -1
xinference/model/llm/mlx/core.py +24 -6
xinference/model/llm/transformers/core.py +9 -1
xinference/model/llm/transformers/qwen2_audio.py +3 -1
xinference/model/llm/transformers/qwen2_vl.py +20 -3
xinference/model/llm/transformers/utils.py +22 -11
xinference/model/llm/utils.py +115 -1
xinference/model/llm/vllm/core.py +14 -4
xinference/model/llm/vllm/xavier/block.py +3 -4
xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
xinference/model/llm/vllm/xavier/collective.py +74 -0
xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
xinference/model/llm/vllm/xavier/executor.py +18 -16
xinference/model/llm/vllm/xavier/scheduler.py +79 -63
xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
xinference/model/llm/vllm/xavier/transfer.py +53 -32
xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
xinference/thirdparty/melo/__init__.py +0 -0
xinference/thirdparty/melo/api.py +135 -0
xinference/thirdparty/melo/app.py +61 -0
xinference/thirdparty/melo/attentions.py +459 -0
xinference/thirdparty/melo/commons.py +160 -0
xinference/thirdparty/melo/configs/config.json +94 -0
xinference/thirdparty/melo/data/example/metadata.list +20 -0
xinference/thirdparty/melo/data_utils.py +413 -0
xinference/thirdparty/melo/download_utils.py +67 -0
xinference/thirdparty/melo/infer.py +25 -0
xinference/thirdparty/melo/init_downloads.py +14 -0
xinference/thirdparty/melo/losses.py +58 -0
xinference/thirdparty/melo/main.py +36 -0
xinference/thirdparty/melo/mel_processing.py +174 -0
xinference/thirdparty/melo/models.py +1030 -0
xinference/thirdparty/melo/modules.py +598 -0
xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
xinference/thirdparty/melo/monotonic_align/core.py +46 -0
xinference/thirdparty/melo/preprocess_text.py +135 -0
xinference/thirdparty/melo/split_utils.py +174 -0
xinference/thirdparty/melo/text/__init__.py +35 -0
xinference/thirdparty/melo/text/chinese.py +199 -0
xinference/thirdparty/melo/text/chinese_bert.py +107 -0
xinference/thirdparty/melo/text/chinese_mix.py +253 -0
xinference/thirdparty/melo/text/cleaner.py +36 -0
xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
xinference/thirdparty/melo/text/cmudict.rep +129530 -0
xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
xinference/thirdparty/melo/text/english.py +284 -0
xinference/thirdparty/melo/text/english_bert.py +39 -0
xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
xinference/thirdparty/melo/text/french.py +94 -0
xinference/thirdparty/melo/text/french_bert.py +39 -0
xinference/thirdparty/melo/text/japanese.py +647 -0
xinference/thirdparty/melo/text/japanese_bert.py +49 -0
xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
xinference/thirdparty/melo/text/korean.py +192 -0
xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
xinference/thirdparty/melo/text/spanish.py +122 -0
xinference/thirdparty/melo/text/spanish_bert.py +39 -0
xinference/thirdparty/melo/text/symbols.py +290 -0
xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
xinference/thirdparty/melo/train.py +635 -0
xinference/thirdparty/melo/train.sh +19 -0
xinference/thirdparty/melo/transforms.py +209 -0
xinference/thirdparty/melo/utils.py +424 -0
xinference/types.py +2 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
/xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -7125,6 +7125,91 @@
       "<|endoftext|>"
     ]
   },
+  {
+    "version":1,
+    "context_length":128000,
+    "model_name":"qwen2.5-vl-instruct",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
+      }
+    ],
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 32768,
@@ -7212,7 +7297,7 @@
       "zh"
     ],
     "model_ability":[
-      "chat",
+      "generate",
       "audio"
     ],
     "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
@@ -8716,6 +8801,372 @@
       "<|im_end|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "deepseek-r1-distill-qwen",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-qwen-1.5b-awq"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-qwen-14b-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-14B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      151643
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "deepseek-r1-distill-llama",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "quantization_parts": {
+          "Q6_K": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q8_0": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "F16": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ]
+        },
+        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
+        "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      151643
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -9085,5 +9536,80 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm3-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gguf",
+        "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "4bit"
+        ],
+        "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      2,
+      128131
+    ],
+    "stop": [
+      "</s>",
+      "<|im_end|>"
+    ]
   }
 ]

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -538,7 +538,10 @@ def _generate_model_file_names(
     )
     need_merge = False
-    if llm_spec.quantization_parts is None:
+    if (
+        llm_spec.quantization_parts is None
+        or quantization not in llm_spec.quantization_parts
+    ):
         file_names.append(final_file_name)
     elif quantization is not None and quantization in llm_spec.quantization_parts:
         parts = llm_spec.quantization_parts[quantization]

xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

Potentially problematic release.

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl