xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +4 -7
  3. xinference/client/handlers.py +3 -0
  4. xinference/core/chat_interface.py +6 -1
  5. xinference/core/model.py +2 -0
  6. xinference/core/scheduler.py +4 -7
  7. xinference/core/supervisor.py +114 -23
  8. xinference/core/worker.py +70 -4
  9. xinference/deploy/local.py +2 -1
  10. xinference/model/audio/core.py +11 -0
  11. xinference/model/audio/cosyvoice.py +16 -5
  12. xinference/model/audio/kokoro.py +139 -0
  13. xinference/model/audio/melotts.py +110 -0
  14. xinference/model/audio/model_spec.json +80 -0
  15. xinference/model/audio/model_spec_modelscope.json +18 -0
  16. xinference/model/audio/whisper.py +35 -10
  17. xinference/model/llm/llama_cpp/core.py +21 -14
  18. xinference/model/llm/llm_family.json +527 -1
  19. xinference/model/llm/llm_family.py +4 -1
  20. xinference/model/llm/llm_family_modelscope.json +495 -3
  21. xinference/model/llm/memory.py +1 -1
  22. xinference/model/llm/mlx/core.py +24 -6
  23. xinference/model/llm/transformers/core.py +9 -1
  24. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  25. xinference/model/llm/transformers/qwen2_vl.py +20 -3
  26. xinference/model/llm/transformers/utils.py +22 -11
  27. xinference/model/llm/utils.py +115 -1
  28. xinference/model/llm/vllm/core.py +14 -4
  29. xinference/model/llm/vllm/xavier/block.py +3 -4
  30. xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
  31. xinference/model/llm/vllm/xavier/collective.py +74 -0
  32. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  33. xinference/model/llm/vllm/xavier/executor.py +18 -16
  34. xinference/model/llm/vllm/xavier/scheduler.py +79 -63
  35. xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
  36. xinference/model/llm/vllm/xavier/transfer.py +53 -32
  37. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  38. xinference/thirdparty/melo/__init__.py +0 -0
  39. xinference/thirdparty/melo/api.py +135 -0
  40. xinference/thirdparty/melo/app.py +61 -0
  41. xinference/thirdparty/melo/attentions.py +459 -0
  42. xinference/thirdparty/melo/commons.py +160 -0
  43. xinference/thirdparty/melo/configs/config.json +94 -0
  44. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  45. xinference/thirdparty/melo/data_utils.py +413 -0
  46. xinference/thirdparty/melo/download_utils.py +67 -0
  47. xinference/thirdparty/melo/infer.py +25 -0
  48. xinference/thirdparty/melo/init_downloads.py +14 -0
  49. xinference/thirdparty/melo/losses.py +58 -0
  50. xinference/thirdparty/melo/main.py +36 -0
  51. xinference/thirdparty/melo/mel_processing.py +174 -0
  52. xinference/thirdparty/melo/models.py +1030 -0
  53. xinference/thirdparty/melo/modules.py +598 -0
  54. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  55. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  56. xinference/thirdparty/melo/preprocess_text.py +135 -0
  57. xinference/thirdparty/melo/split_utils.py +174 -0
  58. xinference/thirdparty/melo/text/__init__.py +35 -0
  59. xinference/thirdparty/melo/text/chinese.py +199 -0
  60. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  61. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  62. xinference/thirdparty/melo/text/cleaner.py +36 -0
  63. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  64. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  65. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  66. xinference/thirdparty/melo/text/english.py +284 -0
  67. xinference/thirdparty/melo/text/english_bert.py +39 -0
  68. xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
  69. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  70. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  71. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  72. xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
  73. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  74. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  75. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  76. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  77. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  78. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  79. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  80. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  81. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  82. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  83. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  84. xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
  85. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  86. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  87. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  88. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  89. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  90. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  91. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  92. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  93. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  94. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  95. xinference/thirdparty/melo/text/french.py +94 -0
  96. xinference/thirdparty/melo/text/french_bert.py +39 -0
  97. xinference/thirdparty/melo/text/japanese.py +647 -0
  98. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  99. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  100. xinference/thirdparty/melo/text/korean.py +192 -0
  101. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  102. xinference/thirdparty/melo/text/spanish.py +122 -0
  103. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  104. xinference/thirdparty/melo/text/symbols.py +290 -0
  105. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  106. xinference/thirdparty/melo/train.py +635 -0
  107. xinference/thirdparty/melo/train.sh +19 -0
  108. xinference/thirdparty/melo/transforms.py +209 -0
  109. xinference/thirdparty/melo/utils.py +424 -0
  110. xinference/types.py +2 -0
  111. xinference/web/ui/build/asset-manifest.json +3 -3
  112. xinference/web/ui/build/index.html +1 -1
  113. xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
  114. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  116. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
  117. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
  118. xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
  120. /xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  121. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
  122. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
  123. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
  124. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
@@ -7125,6 +7125,91 @@
7125
7125
  "<|endoftext|>"
7126
7126
  ]
7127
7127
  },
7128
+ {
7129
+ "version":1,
7130
+ "context_length":128000,
7131
+ "model_name":"qwen2.5-vl-instruct",
7132
+ "model_lang":[
7133
+ "en",
7134
+ "zh"
7135
+ ],
7136
+ "model_ability":[
7137
+ "chat",
7138
+ "vision"
7139
+ ],
7140
+ "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
7141
+ "model_specs":[
7142
+ {
7143
+ "model_format":"pytorch",
7144
+ "model_size_in_billions":3,
7145
+ "quantizations":[
7146
+ "none"
7147
+ ],
7148
+ "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
7149
+ },
7150
+ {
7151
+ "model_format":"pytorch",
7152
+ "model_size_in_billions":7,
7153
+ "quantizations":[
7154
+ "none"
7155
+ ],
7156
+ "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
7157
+ },
7158
+ {
7159
+ "model_format":"pytorch",
7160
+ "model_size_in_billions":72,
7161
+ "quantizations":[
7162
+ "none"
7163
+ ],
7164
+ "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
7165
+ },
7166
+ {
7167
+ "model_format":"mlx",
7168
+ "model_size_in_billions":3,
7169
+ "quantizations":[
7170
+ "3bit",
7171
+ "4bit",
7172
+ "6bit",
7173
+ "8bit",
7174
+ "bf16"
7175
+ ],
7176
+ "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
7177
+ },
7178
+ {
7179
+ "model_format":"mlx",
7180
+ "model_size_in_billions":7,
7181
+ "quantizations":[
7182
+ "3bit",
7183
+ "4bit",
7184
+ "6bit",
7185
+ "8bit",
7186
+ "bf16"
7187
+ ],
7188
+ "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
7189
+ },
7190
+ {
7191
+ "model_format":"mlx",
7192
+ "model_size_in_billions":72,
7193
+ "quantizations":[
7194
+ "3bit",
7195
+ "4bit",
7196
+ "6bit",
7197
+ "8bit",
7198
+ "bf16"
7199
+ ],
7200
+ "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
7201
+ }
7202
+ ],
7203
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
7204
+ "stop_token_ids": [
7205
+ 151645,
7206
+ 151643
7207
+ ],
7208
+ "stop": [
7209
+ "<|im_end|>",
7210
+ "<|endoftext|>"
7211
+ ]
7212
+ },
7128
7213
  {
7129
7214
  "version": 1,
7130
7215
  "context_length": 32768,
@@ -7212,7 +7297,7 @@
7212
7297
  "zh"
7213
7298
  ],
7214
7299
  "model_ability":[
7215
- "chat",
7300
+ "generate",
7216
7301
  "audio"
7217
7302
  ],
7218
7303
  "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
@@ -8716,6 +8801,372 @@
8716
8801
  "<|im_end|>"
8717
8802
  ]
8718
8803
  },
8804
+ {
8805
+ "version": 1,
8806
+ "context_length": 131072,
8807
+ "model_name": "deepseek-r1-distill-qwen",
8808
+ "model_lang": [
8809
+ "en",
8810
+ "zh"
8811
+ ],
8812
+ "model_ability": [
8813
+ "chat"
8814
+ ],
8815
+ "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
8816
+ "model_specs": [
8817
+ {
8818
+ "model_format": "pytorch",
8819
+ "model_size_in_billions": "1_5",
8820
+ "quantizations": [
8821
+ "4-bit",
8822
+ "8-bit",
8823
+ "none"
8824
+ ],
8825
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
8826
+ },
8827
+ {
8828
+ "model_format": "awq",
8829
+ "model_size_in_billions": "1_5",
8830
+ "quantizations": [
8831
+ "Int4"
8832
+ ],
8833
+ "model_id": "casperhansen/deepseek-r1-distill-qwen-1.5b-awq"
8834
+ },
8835
+ {
8836
+ "model_format": "gptq",
8837
+ "model_size_in_billions": "1_5",
8838
+ "quantizations": [
8839
+ "Int4"
8840
+ ],
8841
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4"
8842
+ },
8843
+ {
8844
+ "model_format": "ggufv2",
8845
+ "model_size_in_billions": "1_5",
8846
+ "quantizations": [
8847
+ "Q2_K",
8848
+ "Q2_K_L",
8849
+ "Q3_K_M",
8850
+ "Q4_K_M",
8851
+ "Q5_K_M",
8852
+ "Q6_K",
8853
+ "Q8_0"
8854
+ ],
8855
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
8856
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf"
8857
+ },
8858
+ {
8859
+ "model_format": "mlx",
8860
+ "model_size_in_billions": "1_5",
8861
+ "quantizations": [
8862
+ "3bit",
8863
+ "4bit",
8864
+ "6bit",
8865
+ "8bit",
8866
+ "bf16"
8867
+ ],
8868
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}"
8869
+ },
8870
+ {
8871
+ "model_format": "pytorch",
8872
+ "model_size_in_billions": 7,
8873
+ "quantizations": [
8874
+ "4-bit",
8875
+ "8-bit",
8876
+ "none"
8877
+ ],
8878
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
8879
+ },
8880
+ {
8881
+ "model_format": "awq",
8882
+ "model_size_in_billions": 7,
8883
+ "quantizations": [
8884
+ "Int4"
8885
+ ],
8886
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_AWQ"
8887
+ },
8888
+ {
8889
+ "model_format": "gptq",
8890
+ "model_size_in_billions": 7,
8891
+ "quantizations": [
8892
+ "Int4"
8893
+ ],
8894
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_GPTQ-int4"
8895
+ },
8896
+ {
8897
+ "model_format": "ggufv2",
8898
+ "model_size_in_billions": 7,
8899
+ "quantizations": [
8900
+ "Q2_K",
8901
+ "Q2_K_L",
8902
+ "Q3_K_M",
8903
+ "Q4_K_M",
8904
+ "Q5_K_M",
8905
+ "Q6_K",
8906
+ "Q8_0",
8907
+ "F16"
8908
+ ],
8909
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
8910
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf"
8911
+ },
8912
+ {
8913
+ "model_format": "mlx",
8914
+ "model_size_in_billions": 7,
8915
+ "quantizations": [
8916
+ "3bit",
8917
+ "4bit",
8918
+ "6bit",
8919
+ "8bit",
8920
+ "bf16"
8921
+ ],
8922
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-{quantization}"
8923
+ },
8924
+ {
8925
+ "model_format": "pytorch",
8926
+ "model_size_in_billions": 14,
8927
+ "quantizations": [
8928
+ "4-bit",
8929
+ "8-bit",
8930
+ "none"
8931
+ ],
8932
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
8933
+ },
8934
+ {
8935
+ "model_format": "awq",
8936
+ "model_size_in_billions": 14,
8937
+ "quantizations": [
8938
+ "Int4"
8939
+ ],
8940
+ "model_id": "casperhansen/deepseek-r1-distill-qwen-14b-awq"
8941
+ },
8942
+ {
8943
+ "model_format": "ggufv2",
8944
+ "model_size_in_billions": 14,
8945
+ "quantizations": [
8946
+ "Q2_K",
8947
+ "Q2_K_L",
8948
+ "Q3_K_M",
8949
+ "Q4_K_M",
8950
+ "Q5_K_M",
8951
+ "Q6_K",
8952
+ "Q8_0",
8953
+ "F16"
8954
+ ],
8955
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
8956
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf"
8957
+ },
8958
+ {
8959
+ "model_format": "mlx",
8960
+ "model_size_in_billions": 14,
8961
+ "quantizations": [
8962
+ "3bit",
8963
+ "4bit",
8964
+ "6bit",
8965
+ "8bit",
8966
+ "bf16"
8967
+ ],
8968
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-14B-{quantization}"
8969
+ },
8970
+ {
8971
+ "model_format": "pytorch",
8972
+ "model_size_in_billions": 32,
8973
+ "quantizations": [
8974
+ "4-bit",
8975
+ "8-bit",
8976
+ "none"
8977
+ ],
8978
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
8979
+ },
8980
+ {
8981
+ "model_format": "awq",
8982
+ "model_size_in_billions": 32,
8983
+ "quantizations": [
8984
+ "Int4"
8985
+ ],
8986
+ "model_id": "casperhansen/deepseek-r1-distill-qwen-32b-awq"
8987
+ },
8988
+ {
8989
+ "model_format": "ggufv2",
8990
+ "model_size_in_billions": 32,
8991
+ "quantizations": [
8992
+ "Q2_K",
8993
+ "Q2_K_L",
8994
+ "Q3_K_M",
8995
+ "Q4_K_M",
8996
+ "Q5_K_M",
8997
+ "Q6_K",
8998
+ "Q8_0",
8999
+ "F16"
9000
+ ],
9001
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
9002
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf"
9003
+ },
9004
+ {
9005
+ "model_format": "mlx",
9006
+ "model_size_in_billions": 32,
9007
+ "quantizations": [
9008
+ "3bit",
9009
+ "4bit",
9010
+ "6bit",
9011
+ "8bit",
9012
+ "bf16"
9013
+ ],
9014
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
9015
+ }
9016
+ ],
9017
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
9018
+ "stop_token_ids": [
9019
+ 151643
9020
+ ],
9021
+ "stop": [
9022
+ "<|end▁of▁sentence|>"
9023
+ ]
9024
+ },
9025
+ {
9026
+ "version": 1,
9027
+ "context_length": 131072,
9028
+ "model_name": "deepseek-r1-distill-llama",
9029
+ "model_lang": [
9030
+ "en",
9031
+ "zh"
9032
+ ],
9033
+ "model_ability": [
9034
+ "chat"
9035
+ ],
9036
+ "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
9037
+ "model_specs": [
9038
+ {
9039
+ "model_format": "pytorch",
9040
+ "model_size_in_billions": 8,
9041
+ "quantizations": [
9042
+ "4-bit",
9043
+ "8-bit",
9044
+ "none"
9045
+ ],
9046
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
9047
+ },
9048
+ {
9049
+ "model_format": "awq",
9050
+ "model_size_in_billions": 8,
9051
+ "quantizations": [
9052
+ "Int4"
9053
+ ],
9054
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
9055
+ },
9056
+ {
9057
+ "model_format": "gptq",
9058
+ "model_size_in_billions": 8,
9059
+ "quantizations": [
9060
+ "Int4"
9061
+ ],
9062
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
9063
+ },
9064
+ {
9065
+ "model_format": "ggufv2",
9066
+ "model_size_in_billions": "1_5",
9067
+ "quantizations": [
9068
+ "Q2_K",
9069
+ "Q2_K_L",
9070
+ "Q3_K_M",
9071
+ "Q4_K_M",
9072
+ "Q5_K_M",
9073
+ "Q6_K",
9074
+ "Q8_0",
9075
+ "F16"
9076
+ ],
9077
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
9078
+ "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
9079
+ },
9080
+ {
9081
+ "model_format": "mlx",
9082
+ "model_size_in_billions": 8,
9083
+ "quantizations": [
9084
+ "3bit",
9085
+ "4bit",
9086
+ "6bit",
9087
+ "8bit",
9088
+ "bf16"
9089
+ ],
9090
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
9091
+ },
9092
+ {
9093
+ "model_format": "pytorch",
9094
+ "model_size_in_billions": 70,
9095
+ "quantizations": [
9096
+ "4-bit",
9097
+ "8-bit",
9098
+ "none"
9099
+ ],
9100
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
9101
+ },
9102
+ {
9103
+ "model_format": "awq",
9104
+ "model_size_in_billions": 70,
9105
+ "quantizations": [
9106
+ "Int4"
9107
+ ],
9108
+ "model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
9109
+ },
9110
+ {
9111
+ "model_format": "gptq",
9112
+ "model_size_in_billions": 70,
9113
+ "quantizations": [
9114
+ "Int4"
9115
+ ],
9116
+ "model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
9117
+ },
9118
+ {
9119
+ "model_format": "ggufv2",
9120
+ "model_size_in_billions": 70,
9121
+ "quantizations": [
9122
+ "Q2_K",
9123
+ "Q2_K_L",
9124
+ "Q3_K_M",
9125
+ "Q4_K_M",
9126
+ "Q5_K_M",
9127
+ "Q6_K",
9128
+ "Q8_0",
9129
+ "F16"
9130
+ ],
9131
+ "quantization_parts": {
9132
+ "Q6_K": [
9133
+ "00001-of-00002",
9134
+ "00002-of-00002"
9135
+ ],
9136
+ "Q8_0": [
9137
+ "00001-of-00002",
9138
+ "00002-of-00002"
9139
+ ],
9140
+ "F16": [
9141
+ "00001-of-00003",
9142
+ "00002-of-00003",
9143
+ "00003-of-00003"
9144
+ ]
9145
+ },
9146
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
9147
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
9148
+ "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
9149
+ },
9150
+ {
9151
+ "model_format": "mlx",
9152
+ "model_size_in_billions": 70,
9153
+ "quantizations": [
9154
+ "3bit",
9155
+ "4bit",
9156
+ "6bit",
9157
+ "8bit"
9158
+ ],
9159
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
9160
+ }
9161
+ ],
9162
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
9163
+ "stop_token_ids": [
9164
+ 151643
9165
+ ],
9166
+ "stop": [
9167
+ "<|end▁of▁sentence|>"
9168
+ ]
9169
+ },
8719
9170
  {
8720
9171
  "version": 1,
8721
9172
  "context_length": 8192,
@@ -9085,5 +9536,80 @@
9085
9536
  "<|user|>",
9086
9537
  "<|observation|>"
9087
9538
  ]
9539
+ },
9540
+ {
9541
+ "version": 1,
9542
+ "context_length": 32768,
9543
+ "model_name": "internlm3-instruct",
9544
+ "model_lang": [
9545
+ "en",
9546
+ "zh"
9547
+ ],
9548
+ "model_ability": [
9549
+ "chat",
9550
+ "tools"
9551
+ ],
9552
+ "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
9553
+ "model_specs": [
9554
+ {
9555
+ "model_format": "pytorch",
9556
+ "model_size_in_billions": 8,
9557
+ "quantizations": [
9558
+ "4-bit",
9559
+ "8-bit",
9560
+ "none"
9561
+ ],
9562
+ "model_id": "internlm/internlm3-8b-instruct"
9563
+ },
9564
+ {
9565
+ "model_format": "gptq",
9566
+ "model_size_in_billions": 8,
9567
+ "quantizations": [
9568
+ "Int4"
9569
+ ],
9570
+ "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
9571
+ },
9572
+ {
9573
+ "model_format": "awq",
9574
+ "model_size_in_billions": 8,
9575
+ "quantizations": [
9576
+ "Int4"
9577
+ ],
9578
+ "model_id": "internlm/internlm3-8b-instruct-awq"
9579
+ },
9580
+ {
9581
+ "model_format": "ggufv2",
9582
+ "model_size_in_billions": 8,
9583
+ "quantizations": [
9584
+ "q2_k",
9585
+ "q3_k_m",
9586
+ "q4_0",
9587
+ "q4_k_m",
9588
+ "q5_0",
9589
+ "q5_k_m",
9590
+ "q6_k",
9591
+ "q8_0"
9592
+ ],
9593
+ "model_id": "internlm/internlm3-8b-instruct-gguf",
9594
+ "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
9595
+ },
9596
+ {
9597
+ "model_format":"mlx",
9598
+ "model_size_in_billions":8,
9599
+ "quantizations":[
9600
+ "4bit"
9601
+ ],
9602
+ "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
9603
+ }
9604
+ ],
9605
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
9606
+ "stop_token_ids": [
9607
+ 2,
9608
+ 128131
9609
+ ],
9610
+ "stop": [
9611
+ "</s>",
9612
+ "<|im_end|>"
9613
+ ]
9088
9614
  }
9089
9615
  ]
@@ -538,7 +538,10 @@ def _generate_model_file_names(
538
538
  )
539
539
  need_merge = False
540
540
 
541
- if llm_spec.quantization_parts is None:
541
+ if (
542
+ llm_spec.quantization_parts is None
543
+ or quantization not in llm_spec.quantization_parts
544
+ ):
542
545
  file_names.append(final_file_name)
543
546
  elif quantization is not None and quantization in llm_spec.quantization_parts:
544
547
  parts = llm_spec.quantization_parts[quantization]