xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +2 -0
- xinference/core/scheduler.py +4 -7
- xinference/core/supervisor.py +114 -23
- xinference/core/worker.py +70 -4
- xinference/deploy/local.py +2 -1
- xinference/model/audio/core.py +11 -0
- xinference/model/audio/cosyvoice.py +16 -5
- xinference/model/audio/kokoro.py +139 -0
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +80 -0
- xinference/model/audio/model_spec_modelscope.json +18 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +527 -1
- xinference/model/llm/llm_family.py +4 -1
- xinference/model/llm/llm_family_modelscope.json +495 -3
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +24 -6
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +115 -1
- xinference/model/llm/vllm/core.py +14 -4
- xinference/model/llm/vllm/xavier/block.py +3 -4
- xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/executor.py +18 -16
- xinference/model/llm/vllm/xavier/scheduler.py +79 -63
- xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
- xinference/model/llm/vllm/xavier/transfer.py +53 -32
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/melo/__init__.py +0 -0
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
- xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
- /xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
|
@@ -7125,6 +7125,91 @@
|
|
|
7125
7125
|
"<|endoftext|>"
|
|
7126
7126
|
]
|
|
7127
7127
|
},
|
|
7128
|
+
{
|
|
7129
|
+
"version":1,
|
|
7130
|
+
"context_length":128000,
|
|
7131
|
+
"model_name":"qwen2.5-vl-instruct",
|
|
7132
|
+
"model_lang":[
|
|
7133
|
+
"en",
|
|
7134
|
+
"zh"
|
|
7135
|
+
],
|
|
7136
|
+
"model_ability":[
|
|
7137
|
+
"chat",
|
|
7138
|
+
"vision"
|
|
7139
|
+
],
|
|
7140
|
+
"model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
7141
|
+
"model_specs":[
|
|
7142
|
+
{
|
|
7143
|
+
"model_format":"pytorch",
|
|
7144
|
+
"model_size_in_billions":3,
|
|
7145
|
+
"quantizations":[
|
|
7146
|
+
"none"
|
|
7147
|
+
],
|
|
7148
|
+
"model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
|
|
7149
|
+
},
|
|
7150
|
+
{
|
|
7151
|
+
"model_format":"pytorch",
|
|
7152
|
+
"model_size_in_billions":7,
|
|
7153
|
+
"quantizations":[
|
|
7154
|
+
"none"
|
|
7155
|
+
],
|
|
7156
|
+
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
|
|
7157
|
+
},
|
|
7158
|
+
{
|
|
7159
|
+
"model_format":"pytorch",
|
|
7160
|
+
"model_size_in_billions":72,
|
|
7161
|
+
"quantizations":[
|
|
7162
|
+
"none"
|
|
7163
|
+
],
|
|
7164
|
+
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
|
|
7165
|
+
},
|
|
7166
|
+
{
|
|
7167
|
+
"model_format":"mlx",
|
|
7168
|
+
"model_size_in_billions":3,
|
|
7169
|
+
"quantizations":[
|
|
7170
|
+
"3bit",
|
|
7171
|
+
"4bit",
|
|
7172
|
+
"6bit",
|
|
7173
|
+
"8bit",
|
|
7174
|
+
"bf16"
|
|
7175
|
+
],
|
|
7176
|
+
"model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
|
|
7177
|
+
},
|
|
7178
|
+
{
|
|
7179
|
+
"model_format":"mlx",
|
|
7180
|
+
"model_size_in_billions":7,
|
|
7181
|
+
"quantizations":[
|
|
7182
|
+
"3bit",
|
|
7183
|
+
"4bit",
|
|
7184
|
+
"6bit",
|
|
7185
|
+
"8bit",
|
|
7186
|
+
"bf16"
|
|
7187
|
+
],
|
|
7188
|
+
"model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
7189
|
+
},
|
|
7190
|
+
{
|
|
7191
|
+
"model_format":"mlx",
|
|
7192
|
+
"model_size_in_billions":72,
|
|
7193
|
+
"quantizations":[
|
|
7194
|
+
"3bit",
|
|
7195
|
+
"4bit",
|
|
7196
|
+
"6bit",
|
|
7197
|
+
"8bit",
|
|
7198
|
+
"bf16"
|
|
7199
|
+
],
|
|
7200
|
+
"model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
|
|
7201
|
+
}
|
|
7202
|
+
],
|
|
7203
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
7204
|
+
"stop_token_ids": [
|
|
7205
|
+
151645,
|
|
7206
|
+
151643
|
|
7207
|
+
],
|
|
7208
|
+
"stop": [
|
|
7209
|
+
"<|im_end|>",
|
|
7210
|
+
"<|endoftext|>"
|
|
7211
|
+
]
|
|
7212
|
+
},
|
|
7128
7213
|
{
|
|
7129
7214
|
"version": 1,
|
|
7130
7215
|
"context_length": 32768,
|
|
@@ -7212,7 +7297,7 @@
|
|
|
7212
7297
|
"zh"
|
|
7213
7298
|
],
|
|
7214
7299
|
"model_ability":[
|
|
7215
|
-
"
|
|
7300
|
+
"generate",
|
|
7216
7301
|
"audio"
|
|
7217
7302
|
],
|
|
7218
7303
|
"model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
@@ -8716,6 +8801,372 @@
|
|
|
8716
8801
|
"<|im_end|>"
|
|
8717
8802
|
]
|
|
8718
8803
|
},
|
|
8804
|
+
{
|
|
8805
|
+
"version": 1,
|
|
8806
|
+
"context_length": 131072,
|
|
8807
|
+
"model_name": "deepseek-r1-distill-qwen",
|
|
8808
|
+
"model_lang": [
|
|
8809
|
+
"en",
|
|
8810
|
+
"zh"
|
|
8811
|
+
],
|
|
8812
|
+
"model_ability": [
|
|
8813
|
+
"chat"
|
|
8814
|
+
],
|
|
8815
|
+
"model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
|
|
8816
|
+
"model_specs": [
|
|
8817
|
+
{
|
|
8818
|
+
"model_format": "pytorch",
|
|
8819
|
+
"model_size_in_billions": "1_5",
|
|
8820
|
+
"quantizations": [
|
|
8821
|
+
"4-bit",
|
|
8822
|
+
"8-bit",
|
|
8823
|
+
"none"
|
|
8824
|
+
],
|
|
8825
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
|
8826
|
+
},
|
|
8827
|
+
{
|
|
8828
|
+
"model_format": "awq",
|
|
8829
|
+
"model_size_in_billions": "1_5",
|
|
8830
|
+
"quantizations": [
|
|
8831
|
+
"Int4"
|
|
8832
|
+
],
|
|
8833
|
+
"model_id": "casperhansen/deepseek-r1-distill-qwen-1.5b-awq"
|
|
8834
|
+
},
|
|
8835
|
+
{
|
|
8836
|
+
"model_format": "gptq",
|
|
8837
|
+
"model_size_in_billions": "1_5",
|
|
8838
|
+
"quantizations": [
|
|
8839
|
+
"Int4"
|
|
8840
|
+
],
|
|
8841
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4"
|
|
8842
|
+
},
|
|
8843
|
+
{
|
|
8844
|
+
"model_format": "ggufv2",
|
|
8845
|
+
"model_size_in_billions": "1_5",
|
|
8846
|
+
"quantizations": [
|
|
8847
|
+
"Q2_K",
|
|
8848
|
+
"Q2_K_L",
|
|
8849
|
+
"Q3_K_M",
|
|
8850
|
+
"Q4_K_M",
|
|
8851
|
+
"Q5_K_M",
|
|
8852
|
+
"Q6_K",
|
|
8853
|
+
"Q8_0"
|
|
8854
|
+
],
|
|
8855
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
|
|
8856
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf"
|
|
8857
|
+
},
|
|
8858
|
+
{
|
|
8859
|
+
"model_format": "mlx",
|
|
8860
|
+
"model_size_in_billions": "1_5",
|
|
8861
|
+
"quantizations": [
|
|
8862
|
+
"3bit",
|
|
8863
|
+
"4bit",
|
|
8864
|
+
"6bit",
|
|
8865
|
+
"8bit",
|
|
8866
|
+
"bf16"
|
|
8867
|
+
],
|
|
8868
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}"
|
|
8869
|
+
},
|
|
8870
|
+
{
|
|
8871
|
+
"model_format": "pytorch",
|
|
8872
|
+
"model_size_in_billions": 7,
|
|
8873
|
+
"quantizations": [
|
|
8874
|
+
"4-bit",
|
|
8875
|
+
"8-bit",
|
|
8876
|
+
"none"
|
|
8877
|
+
],
|
|
8878
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
|
|
8879
|
+
},
|
|
8880
|
+
{
|
|
8881
|
+
"model_format": "awq",
|
|
8882
|
+
"model_size_in_billions": 7,
|
|
8883
|
+
"quantizations": [
|
|
8884
|
+
"Int4"
|
|
8885
|
+
],
|
|
8886
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_AWQ"
|
|
8887
|
+
},
|
|
8888
|
+
{
|
|
8889
|
+
"model_format": "gptq",
|
|
8890
|
+
"model_size_in_billions": 7,
|
|
8891
|
+
"quantizations": [
|
|
8892
|
+
"Int4"
|
|
8893
|
+
],
|
|
8894
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_GPTQ-int4"
|
|
8895
|
+
},
|
|
8896
|
+
{
|
|
8897
|
+
"model_format": "ggufv2",
|
|
8898
|
+
"model_size_in_billions": 7,
|
|
8899
|
+
"quantizations": [
|
|
8900
|
+
"Q2_K",
|
|
8901
|
+
"Q2_K_L",
|
|
8902
|
+
"Q3_K_M",
|
|
8903
|
+
"Q4_K_M",
|
|
8904
|
+
"Q5_K_M",
|
|
8905
|
+
"Q6_K",
|
|
8906
|
+
"Q8_0",
|
|
8907
|
+
"F16"
|
|
8908
|
+
],
|
|
8909
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
|
|
8910
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf"
|
|
8911
|
+
},
|
|
8912
|
+
{
|
|
8913
|
+
"model_format": "mlx",
|
|
8914
|
+
"model_size_in_billions": 7,
|
|
8915
|
+
"quantizations": [
|
|
8916
|
+
"3bit",
|
|
8917
|
+
"4bit",
|
|
8918
|
+
"6bit",
|
|
8919
|
+
"8bit",
|
|
8920
|
+
"bf16"
|
|
8921
|
+
],
|
|
8922
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-{quantization}"
|
|
8923
|
+
},
|
|
8924
|
+
{
|
|
8925
|
+
"model_format": "pytorch",
|
|
8926
|
+
"model_size_in_billions": 14,
|
|
8927
|
+
"quantizations": [
|
|
8928
|
+
"4-bit",
|
|
8929
|
+
"8-bit",
|
|
8930
|
+
"none"
|
|
8931
|
+
],
|
|
8932
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
|
|
8933
|
+
},
|
|
8934
|
+
{
|
|
8935
|
+
"model_format": "awq",
|
|
8936
|
+
"model_size_in_billions": 14,
|
|
8937
|
+
"quantizations": [
|
|
8938
|
+
"Int4"
|
|
8939
|
+
],
|
|
8940
|
+
"model_id": "casperhansen/deepseek-r1-distill-qwen-14b-awq"
|
|
8941
|
+
},
|
|
8942
|
+
{
|
|
8943
|
+
"model_format": "ggufv2",
|
|
8944
|
+
"model_size_in_billions": 14,
|
|
8945
|
+
"quantizations": [
|
|
8946
|
+
"Q2_K",
|
|
8947
|
+
"Q2_K_L",
|
|
8948
|
+
"Q3_K_M",
|
|
8949
|
+
"Q4_K_M",
|
|
8950
|
+
"Q5_K_M",
|
|
8951
|
+
"Q6_K",
|
|
8952
|
+
"Q8_0",
|
|
8953
|
+
"F16"
|
|
8954
|
+
],
|
|
8955
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
|
|
8956
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf"
|
|
8957
|
+
},
|
|
8958
|
+
{
|
|
8959
|
+
"model_format": "mlx",
|
|
8960
|
+
"model_size_in_billions": 14,
|
|
8961
|
+
"quantizations": [
|
|
8962
|
+
"3bit",
|
|
8963
|
+
"4bit",
|
|
8964
|
+
"6bit",
|
|
8965
|
+
"8bit",
|
|
8966
|
+
"bf16"
|
|
8967
|
+
],
|
|
8968
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-14B-{quantization}"
|
|
8969
|
+
},
|
|
8970
|
+
{
|
|
8971
|
+
"model_format": "pytorch",
|
|
8972
|
+
"model_size_in_billions": 32,
|
|
8973
|
+
"quantizations": [
|
|
8974
|
+
"4-bit",
|
|
8975
|
+
"8-bit",
|
|
8976
|
+
"none"
|
|
8977
|
+
],
|
|
8978
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
|
8979
|
+
},
|
|
8980
|
+
{
|
|
8981
|
+
"model_format": "awq",
|
|
8982
|
+
"model_size_in_billions": 32,
|
|
8983
|
+
"quantizations": [
|
|
8984
|
+
"Int4"
|
|
8985
|
+
],
|
|
8986
|
+
"model_id": "casperhansen/deepseek-r1-distill-qwen-32b-awq"
|
|
8987
|
+
},
|
|
8988
|
+
{
|
|
8989
|
+
"model_format": "ggufv2",
|
|
8990
|
+
"model_size_in_billions": 32,
|
|
8991
|
+
"quantizations": [
|
|
8992
|
+
"Q2_K",
|
|
8993
|
+
"Q2_K_L",
|
|
8994
|
+
"Q3_K_M",
|
|
8995
|
+
"Q4_K_M",
|
|
8996
|
+
"Q5_K_M",
|
|
8997
|
+
"Q6_K",
|
|
8998
|
+
"Q8_0",
|
|
8999
|
+
"F16"
|
|
9000
|
+
],
|
|
9001
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
|
|
9002
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf"
|
|
9003
|
+
},
|
|
9004
|
+
{
|
|
9005
|
+
"model_format": "mlx",
|
|
9006
|
+
"model_size_in_billions": 32,
|
|
9007
|
+
"quantizations": [
|
|
9008
|
+
"3bit",
|
|
9009
|
+
"4bit",
|
|
9010
|
+
"6bit",
|
|
9011
|
+
"8bit",
|
|
9012
|
+
"bf16"
|
|
9013
|
+
],
|
|
9014
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
|
|
9015
|
+
}
|
|
9016
|
+
],
|
|
9017
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
9018
|
+
"stop_token_ids": [
|
|
9019
|
+
151643
|
|
9020
|
+
],
|
|
9021
|
+
"stop": [
|
|
9022
|
+
"<|end▁of▁sentence|>"
|
|
9023
|
+
]
|
|
9024
|
+
},
|
|
9025
|
+
{
|
|
9026
|
+
"version": 1,
|
|
9027
|
+
"context_length": 131072,
|
|
9028
|
+
"model_name": "deepseek-r1-distill-llama",
|
|
9029
|
+
"model_lang": [
|
|
9030
|
+
"en",
|
|
9031
|
+
"zh"
|
|
9032
|
+
],
|
|
9033
|
+
"model_ability": [
|
|
9034
|
+
"chat"
|
|
9035
|
+
],
|
|
9036
|
+
"model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
|
|
9037
|
+
"model_specs": [
|
|
9038
|
+
{
|
|
9039
|
+
"model_format": "pytorch",
|
|
9040
|
+
"model_size_in_billions": 8,
|
|
9041
|
+
"quantizations": [
|
|
9042
|
+
"4-bit",
|
|
9043
|
+
"8-bit",
|
|
9044
|
+
"none"
|
|
9045
|
+
],
|
|
9046
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
|
|
9047
|
+
},
|
|
9048
|
+
{
|
|
9049
|
+
"model_format": "awq",
|
|
9050
|
+
"model_size_in_billions": 8,
|
|
9051
|
+
"quantizations": [
|
|
9052
|
+
"Int4"
|
|
9053
|
+
],
|
|
9054
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
|
|
9055
|
+
},
|
|
9056
|
+
{
|
|
9057
|
+
"model_format": "gptq",
|
|
9058
|
+
"model_size_in_billions": 8,
|
|
9059
|
+
"quantizations": [
|
|
9060
|
+
"Int4"
|
|
9061
|
+
],
|
|
9062
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
|
|
9063
|
+
},
|
|
9064
|
+
{
|
|
9065
|
+
"model_format": "ggufv2",
|
|
9066
|
+
"model_size_in_billions": "1_5",
|
|
9067
|
+
"quantizations": [
|
|
9068
|
+
"Q2_K",
|
|
9069
|
+
"Q2_K_L",
|
|
9070
|
+
"Q3_K_M",
|
|
9071
|
+
"Q4_K_M",
|
|
9072
|
+
"Q5_K_M",
|
|
9073
|
+
"Q6_K",
|
|
9074
|
+
"Q8_0",
|
|
9075
|
+
"F16"
|
|
9076
|
+
],
|
|
9077
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
|
|
9078
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
|
|
9079
|
+
},
|
|
9080
|
+
{
|
|
9081
|
+
"model_format": "mlx",
|
|
9082
|
+
"model_size_in_billions": 8,
|
|
9083
|
+
"quantizations": [
|
|
9084
|
+
"3bit",
|
|
9085
|
+
"4bit",
|
|
9086
|
+
"6bit",
|
|
9087
|
+
"8bit",
|
|
9088
|
+
"bf16"
|
|
9089
|
+
],
|
|
9090
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
|
|
9091
|
+
},
|
|
9092
|
+
{
|
|
9093
|
+
"model_format": "pytorch",
|
|
9094
|
+
"model_size_in_billions": 70,
|
|
9095
|
+
"quantizations": [
|
|
9096
|
+
"4-bit",
|
|
9097
|
+
"8-bit",
|
|
9098
|
+
"none"
|
|
9099
|
+
],
|
|
9100
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
|
9101
|
+
},
|
|
9102
|
+
{
|
|
9103
|
+
"model_format": "awq",
|
|
9104
|
+
"model_size_in_billions": 70,
|
|
9105
|
+
"quantizations": [
|
|
9106
|
+
"Int4"
|
|
9107
|
+
],
|
|
9108
|
+
"model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
|
|
9109
|
+
},
|
|
9110
|
+
{
|
|
9111
|
+
"model_format": "gptq",
|
|
9112
|
+
"model_size_in_billions": 70,
|
|
9113
|
+
"quantizations": [
|
|
9114
|
+
"Int4"
|
|
9115
|
+
],
|
|
9116
|
+
"model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
|
|
9117
|
+
},
|
|
9118
|
+
{
|
|
9119
|
+
"model_format": "ggufv2",
|
|
9120
|
+
"model_size_in_billions": 70,
|
|
9121
|
+
"quantizations": [
|
|
9122
|
+
"Q2_K",
|
|
9123
|
+
"Q2_K_L",
|
|
9124
|
+
"Q3_K_M",
|
|
9125
|
+
"Q4_K_M",
|
|
9126
|
+
"Q5_K_M",
|
|
9127
|
+
"Q6_K",
|
|
9128
|
+
"Q8_0",
|
|
9129
|
+
"F16"
|
|
9130
|
+
],
|
|
9131
|
+
"quantization_parts": {
|
|
9132
|
+
"Q6_K": [
|
|
9133
|
+
"00001-of-00002",
|
|
9134
|
+
"00002-of-00002"
|
|
9135
|
+
],
|
|
9136
|
+
"Q8_0": [
|
|
9137
|
+
"00001-of-00002",
|
|
9138
|
+
"00002-of-00002"
|
|
9139
|
+
],
|
|
9140
|
+
"F16": [
|
|
9141
|
+
"00001-of-00003",
|
|
9142
|
+
"00002-of-00003",
|
|
9143
|
+
"00003-of-00003"
|
|
9144
|
+
]
|
|
9145
|
+
},
|
|
9146
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
|
|
9147
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
|
|
9148
|
+
"model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
|
|
9149
|
+
},
|
|
9150
|
+
{
|
|
9151
|
+
"model_format": "mlx",
|
|
9152
|
+
"model_size_in_billions": 70,
|
|
9153
|
+
"quantizations": [
|
|
9154
|
+
"3bit",
|
|
9155
|
+
"4bit",
|
|
9156
|
+
"6bit",
|
|
9157
|
+
"8bit"
|
|
9158
|
+
],
|
|
9159
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
|
|
9160
|
+
}
|
|
9161
|
+
],
|
|
9162
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
9163
|
+
"stop_token_ids": [
|
|
9164
|
+
151643
|
|
9165
|
+
],
|
|
9166
|
+
"stop": [
|
|
9167
|
+
"<|end▁of▁sentence|>"
|
|
9168
|
+
]
|
|
9169
|
+
},
|
|
8719
9170
|
{
|
|
8720
9171
|
"version": 1,
|
|
8721
9172
|
"context_length": 8192,
|
|
@@ -9085,5 +9536,80 @@
|
|
|
9085
9536
|
"<|user|>",
|
|
9086
9537
|
"<|observation|>"
|
|
9087
9538
|
]
|
|
9539
|
+
},
|
|
9540
|
+
{
|
|
9541
|
+
"version": 1,
|
|
9542
|
+
"context_length": 32768,
|
|
9543
|
+
"model_name": "internlm3-instruct",
|
|
9544
|
+
"model_lang": [
|
|
9545
|
+
"en",
|
|
9546
|
+
"zh"
|
|
9547
|
+
],
|
|
9548
|
+
"model_ability": [
|
|
9549
|
+
"chat",
|
|
9550
|
+
"tools"
|
|
9551
|
+
],
|
|
9552
|
+
"model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
|
|
9553
|
+
"model_specs": [
|
|
9554
|
+
{
|
|
9555
|
+
"model_format": "pytorch",
|
|
9556
|
+
"model_size_in_billions": 8,
|
|
9557
|
+
"quantizations": [
|
|
9558
|
+
"4-bit",
|
|
9559
|
+
"8-bit",
|
|
9560
|
+
"none"
|
|
9561
|
+
],
|
|
9562
|
+
"model_id": "internlm/internlm3-8b-instruct"
|
|
9563
|
+
},
|
|
9564
|
+
{
|
|
9565
|
+
"model_format": "gptq",
|
|
9566
|
+
"model_size_in_billions": 8,
|
|
9567
|
+
"quantizations": [
|
|
9568
|
+
"Int4"
|
|
9569
|
+
],
|
|
9570
|
+
"model_id": "internlm/internlm3-8b-instruct-gptq-int4"
|
|
9571
|
+
},
|
|
9572
|
+
{
|
|
9573
|
+
"model_format": "awq",
|
|
9574
|
+
"model_size_in_billions": 8,
|
|
9575
|
+
"quantizations": [
|
|
9576
|
+
"Int4"
|
|
9577
|
+
],
|
|
9578
|
+
"model_id": "internlm/internlm3-8b-instruct-awq"
|
|
9579
|
+
},
|
|
9580
|
+
{
|
|
9581
|
+
"model_format": "ggufv2",
|
|
9582
|
+
"model_size_in_billions": 8,
|
|
9583
|
+
"quantizations": [
|
|
9584
|
+
"q2_k",
|
|
9585
|
+
"q3_k_m",
|
|
9586
|
+
"q4_0",
|
|
9587
|
+
"q4_k_m",
|
|
9588
|
+
"q5_0",
|
|
9589
|
+
"q5_k_m",
|
|
9590
|
+
"q6_k",
|
|
9591
|
+
"q8_0"
|
|
9592
|
+
],
|
|
9593
|
+
"model_id": "internlm/internlm3-8b-instruct-gguf",
|
|
9594
|
+
"model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
|
|
9595
|
+
},
|
|
9596
|
+
{
|
|
9597
|
+
"model_format":"mlx",
|
|
9598
|
+
"model_size_in_billions":8,
|
|
9599
|
+
"quantizations":[
|
|
9600
|
+
"4bit"
|
|
9601
|
+
],
|
|
9602
|
+
"model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
|
|
9603
|
+
}
|
|
9604
|
+
],
|
|
9605
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
9606
|
+
"stop_token_ids": [
|
|
9607
|
+
2,
|
|
9608
|
+
128131
|
|
9609
|
+
],
|
|
9610
|
+
"stop": [
|
|
9611
|
+
"</s>",
|
|
9612
|
+
"<|im_end|>"
|
|
9613
|
+
]
|
|
9088
9614
|
}
|
|
9089
9615
|
]
|
|
@@ -538,7 +538,10 @@ def _generate_model_file_names(
|
|
|
538
538
|
)
|
|
539
539
|
need_merge = False
|
|
540
540
|
|
|
541
|
-
if
|
|
541
|
+
if (
|
|
542
|
+
llm_spec.quantization_parts is None
|
|
543
|
+
or quantization not in llm_spec.quantization_parts
|
|
544
|
+
):
|
|
542
545
|
file_names.append(final_file_name)
|
|
543
546
|
elif quantization is not None and quantization in llm_spec.quantization_parts:
|
|
544
547
|
parts = llm_spec.quantization_parts[quantization]
|