xinference 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (84) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +204 -1
  3. xinference/client/restful/restful_client.py +4 -2
  4. xinference/core/image_interface.py +28 -0
  5. xinference/core/model.py +30 -2
  6. xinference/core/supervisor.py +6 -0
  7. xinference/model/audio/cosyvoice.py +3 -3
  8. xinference/model/audio/fish_speech.py +9 -9
  9. xinference/model/audio/model_spec.json +9 -9
  10. xinference/model/audio/whisper.py +4 -1
  11. xinference/model/image/core.py +2 -1
  12. xinference/model/image/model_spec.json +16 -4
  13. xinference/model/image/model_spec_modelscope.json +16 -4
  14. xinference/model/image/sdapi.py +136 -0
  15. xinference/model/image/stable_diffusion/core.py +163 -24
  16. xinference/model/llm/__init__.py +9 -1
  17. xinference/model/llm/llm_family.json +1241 -0
  18. xinference/model/llm/llm_family.py +3 -1
  19. xinference/model/llm/llm_family_modelscope.json +1301 -3
  20. xinference/model/llm/sglang/core.py +7 -0
  21. xinference/model/llm/transformers/chatglm.py +1 -1
  22. xinference/model/llm/transformers/core.py +6 -0
  23. xinference/model/llm/transformers/deepseek_v2.py +340 -0
  24. xinference/model/llm/transformers/qwen2_audio.py +168 -0
  25. xinference/model/llm/transformers/qwen2_vl.py +31 -5
  26. xinference/model/llm/utils.py +104 -84
  27. xinference/model/llm/vllm/core.py +13 -0
  28. xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +2 -3
  29. xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +1 -1
  30. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
  31. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
  32. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
  33. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
  34. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
  35. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
  36. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
  37. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
  38. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
  39. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
  40. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
  41. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
  42. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
  43. xinference/thirdparty/fish_speech/tools/api.py +79 -134
  44. xinference/thirdparty/fish_speech/tools/commons.py +35 -0
  45. xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
  46. xinference/thirdparty/fish_speech/tools/file.py +17 -0
  47. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
  48. xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
  49. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
  50. xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
  51. xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
  52. xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
  53. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
  54. xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
  55. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
  56. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
  57. xinference/thirdparty/fish_speech/tools/webui.py +12 -146
  58. xinference/types.py +7 -4
  59. xinference/web/ui/build/asset-manifest.json +6 -6
  60. xinference/web/ui/build/index.html +1 -1
  61. xinference/web/ui/build/static/css/{main.632e9148.css → main.5061c4c3.css} +2 -2
  62. xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
  63. xinference/web/ui/build/static/js/{main.9cfafbd6.js → main.29578905.js} +3 -3
  64. xinference/web/ui/build/static/js/main.29578905.js.map +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
  67. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/METADATA +13 -7
  68. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/RECORD +73 -75
  69. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
  70. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
  71. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
  72. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
  73. xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
  74. xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
  75. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
  76. xinference/web/ui/build/static/css/main.632e9148.css.map +0 -1
  77. xinference/web/ui/build/static/js/main.9cfafbd6.js.map +0 -1
  78. xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +0 -1
  80. /xinference/web/ui/build/static/js/{main.9cfafbd6.js.LICENSE.txt → main.29578905.js.LICENSE.txt} +0 -0
  81. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/LICENSE +0 -0
  82. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/WHEEL +0 -0
  83. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/entry_points.txt +0 -0
  84. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/top_level.txt +0 -0
@@ -6828,6 +6828,33 @@
6828
6828
  ],
6829
6829
  "model_id":"Qwen/Qwen2-VL-2B-Instruct",
6830
6830
  "model_revision":"096da3b96240e3d66d35be0e5ccbe282eea8d6b1"
6831
+ },
6832
+ {
6833
+ "model_format":"gptq",
6834
+ "model_size_in_billions":2,
6835
+ "quantizations":[
6836
+ "Int8"
6837
+ ],
6838
+ "model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
6839
+ "model_revision":"d15fb11857ccc566903e2e71341f9db7babb567b"
6840
+ },
6841
+ {
6842
+ "model_format":"gptq",
6843
+ "model_size_in_billions":2,
6844
+ "quantizations":[
6845
+ "Int4"
6846
+ ],
6847
+ "model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
6848
+ "model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
6849
+ },
6850
+ {
6851
+ "model_format":"awq",
6852
+ "model_size_in_billions":2,
6853
+ "quantizations":[
6854
+ "Int4"
6855
+ ],
6856
+ "model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
6857
+ "model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
6831
6858
  },
6832
6859
  {
6833
6860
  "model_format":"pytorch",
@@ -6837,6 +6864,173 @@
6837
6864
  ],
6838
6865
  "model_id":"Qwen/Qwen2-VL-7B-Instruct",
6839
6866
  "model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
6867
+ },
6868
+ {
6869
+ "model_format":"gptq",
6870
+ "model_size_in_billions":7,
6871
+ "quantizations":[
6872
+ "Int8"
6873
+ ],
6874
+ "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
6875
+ "model_revision":"3d152a77eaccfd72d59baedb0b183a1b8fd56e48"
6876
+ },
6877
+ {
6878
+ "model_format":"gptq",
6879
+ "model_size_in_billions":7,
6880
+ "quantizations":[
6881
+ "Int4"
6882
+ ],
6883
+ "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
6884
+ "model_revision":"5ab897112fa83b9699826be8753ef9184585c77d"
6885
+ },
6886
+ {
6887
+ "model_format":"awq",
6888
+ "model_size_in_billions":7,
6889
+ "quantizations":[
6890
+ "Int4"
6891
+ ],
6892
+ "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
6893
+ "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
6894
+ },
6895
+ {
6896
+ "model_format":"pytorch",
6897
+ "model_size_in_billions":72,
6898
+ "quantizations":[
6899
+ "none"
6900
+ ],
6901
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct"
6902
+ },
6903
+ {
6904
+ "model_format":"awq",
6905
+ "model_size_in_billions":72,
6906
+ "quantizations":[
6907
+ "Int4"
6908
+ ],
6909
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct-AWQ"
6910
+ },
6911
+ {
6912
+ "model_format":"gptq",
6913
+ "model_size_in_billions":72,
6914
+ "quantizations":[
6915
+ "Int4",
6916
+ "Int8"
6917
+ ],
6918
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
6919
+ }
6920
+ ],
6921
+ "prompt_style":{
6922
+ "style_name":"QWEN",
6923
+ "system_prompt":"You are a helpful assistant",
6924
+ "roles":[
6925
+ "user",
6926
+ "assistant"
6927
+ ],
6928
+ "stop": [
6929
+ "<|im_end|>",
6930
+ "<|endoftext|>"
6931
+ ]
6932
+ }
6933
+ },
6934
+ {
6935
+ "version": 1,
6936
+ "context_length": 32768,
6937
+ "model_name": "minicpm3-4b",
6938
+ "model_lang": [
6939
+ "zh"
6940
+ ],
6941
+ "model_ability": [
6942
+ "chat"
6943
+ ],
6944
+ "model_description": "MiniCPM3-4B is the 3rd generation of MiniCPM series. The overall performance of MiniCPM3-4B surpasses Phi-3.5-mini-Instruct and GPT-3.5-Turbo-0125, being comparable with many recent 7B~9B models.",
6945
+ "model_specs": [
6946
+ {
6947
+ "model_format": "pytorch",
6948
+ "model_size_in_billions": 4,
6949
+ "quantizations": [
6950
+ "none"
6951
+ ],
6952
+ "model_id": "openbmb/MiniCPM3-4B",
6953
+ "model_revision": "75f9f1097d9d66d11f37fff49210bf940455f8ac"
6954
+ },
6955
+ {
6956
+ "model_format": "gptq",
6957
+ "model_size_in_billions": 4,
6958
+ "quantizations": [
6959
+ "none"
6960
+ ],
6961
+ "model_id": "openbmb/MiniCPM3-4B-GPTQ-Int4",
6962
+ "model_revision": "97a66a62f7d09c1ee35b087b42694716a8113dce"
6963
+ }
6964
+ ],
6965
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6966
+ "stop_token_ids": [
6967
+ 1,
6968
+ 2
6969
+ ],
6970
+ "stop": [
6971
+ "<s>",
6972
+ "</s>"
6973
+ ]
6974
+ },
6975
+ {
6976
+ "version":1,
6977
+ "context_length":32768,
6978
+ "model_name":"qwen2-audio-instruct",
6979
+ "model_lang":[
6980
+ "en",
6981
+ "zh"
6982
+ ],
6983
+ "model_ability":[
6984
+ "chat",
6985
+ "audio"
6986
+ ],
6987
+ "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
6988
+ "model_specs":[
6989
+ {
6990
+ "model_format":"pytorch",
6991
+ "model_size_in_billions":7,
6992
+ "quantizations":[
6993
+ "none"
6994
+ ],
6995
+ "model_id":"Qwen/Qwen2-Audio-7B-Instruct",
6996
+ "model_revision":"bac62d2c6808845904c709c17a0402d817558c64"
6997
+ }
6998
+ ],
6999
+ "prompt_style":{
7000
+ "style_name":"QWEN",
7001
+ "system_prompt":"You are a helpful assistant",
7002
+ "roles":[
7003
+ "user",
7004
+ "assistant"
7005
+ ],
7006
+ "stop": [
7007
+ "<|im_end|>",
7008
+ "<|endoftext|>"
7009
+ ]
7010
+ }
7011
+ },
7012
+ {
7013
+ "version":1,
7014
+ "context_length":32768,
7015
+ "model_name":"qwen2-audio",
7016
+ "model_lang":[
7017
+ "en",
7018
+ "zh"
7019
+ ],
7020
+ "model_ability":[
7021
+ "chat",
7022
+ "audio"
7023
+ ],
7024
+ "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
7025
+ "model_specs":[
7026
+ {
7027
+ "model_format":"pytorch",
7028
+ "model_size_in_billions":7,
7029
+ "quantizations":[
7030
+ "none"
7031
+ ],
7032
+ "model_id":"Qwen/Qwen2-Audio-7B",
7033
+ "model_revision":"8577bc71d330c8fa32ffe9f8a1374100759f2466"
6840
7034
  }
6841
7035
  ],
6842
7036
  "prompt_style":{
@@ -6851,5 +7045,1052 @@
6851
7045
  "<|endoftext|>"
6852
7046
  ]
6853
7047
  }
7048
+ },
7049
+ {
7050
+ "version": 1,
7051
+ "context_length": 128000,
7052
+ "model_name": "deepseek-v2",
7053
+ "model_lang": [
7054
+ "en",
7055
+ "zh"
7056
+ ],
7057
+ "model_ability": [
7058
+ "generate"
7059
+ ],
7060
+ "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
7061
+ "model_specs": [
7062
+ {
7063
+ "model_format": "pytorch",
7064
+ "model_size_in_billions": 16,
7065
+ "quantizations": [
7066
+ "4-bit",
7067
+ "8-bit",
7068
+ "none"
7069
+ ],
7070
+ "model_id": "deepseek-ai/DeepSeek-V2-Lite",
7071
+ "model_revision": "604d5664dddd88a0433dbae533b7fe9472482de0"
7072
+ },
7073
+ {
7074
+ "model_format": "pytorch",
7075
+ "model_size_in_billions": 236,
7076
+ "quantizations": [
7077
+ "4-bit",
7078
+ "8-bit",
7079
+ "none"
7080
+ ],
7081
+ "model_id": "deepseek-ai/DeepSeek-V2",
7082
+ "model_revision": "4461458f186c35188585855f28f77af5661ad489"
7083
+ }
7084
+ ]
7085
+ },
7086
+ {
7087
+ "version": 1,
7088
+ "context_length": 128000,
7089
+ "model_name": "deepseek-v2-chat",
7090
+ "model_lang": [
7091
+ "en",
7092
+ "zh"
7093
+ ],
7094
+ "model_ability": [
7095
+ "chat"
7096
+ ],
7097
+ "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
7098
+ "model_specs": [
7099
+ {
7100
+ "model_format": "pytorch",
7101
+ "model_size_in_billions": 16,
7102
+ "quantizations": [
7103
+ "4-bit",
7104
+ "8-bit",
7105
+ "none"
7106
+ ],
7107
+ "model_id": "deepseek-ai/DeepSeek-V2-Lite-Chat",
7108
+ "model_revision": "85864749cd611b4353ce1decdb286193298f64c7"
7109
+ },
7110
+ {
7111
+ "model_format": "pytorch",
7112
+ "model_size_in_billions": 236,
7113
+ "quantizations": [
7114
+ "4-bit",
7115
+ "8-bit",
7116
+ "none"
7117
+ ],
7118
+ "model_id": "deepseek-ai/DeepSeek-V2-Chat",
7119
+ "model_revision": "8e3f5f6c2226787e41ba3e9283a06389d178c926"
7120
+ }
7121
+ ],
7122
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
7123
+ "stop_token_ids": [
7124
+ 100001
7125
+ ],
7126
+ "stop": [
7127
+ "<|end▁of▁sentence|>"
7128
+ ]
7129
+ },
7130
+ {
7131
+ "version": 1,
7132
+ "context_length": 128000,
7133
+ "model_name": "deepseek-v2-chat-0628",
7134
+ "model_lang": [
7135
+ "en",
7136
+ "zh"
7137
+ ],
7138
+ "model_ability": [
7139
+ "chat"
7140
+ ],
7141
+ "model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
7142
+ "model_specs": [
7143
+ {
7144
+ "model_format": "pytorch",
7145
+ "model_size_in_billions": 236,
7146
+ "quantizations": [
7147
+ "4-bit",
7148
+ "8-bit",
7149
+ "none"
7150
+ ],
7151
+ "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
7152
+ "model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
7153
+ }
7154
+ ],
7155
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
7156
+ "stop_token_ids": [
7157
+ 100001
7158
+ ],
7159
+ "stop": [
7160
+ "<|end▁of▁sentence|>"
7161
+ ]
7162
+ },
7163
+ {
7164
+ "version": 1,
7165
+ "context_length": 128000,
7166
+ "model_name": "deepseek-v2.5",
7167
+ "model_lang": [
7168
+ "en",
7169
+ "zh"
7170
+ ],
7171
+ "model_ability": [
7172
+ "chat"
7173
+ ],
7174
+ "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
7175
+ "model_specs": [
7176
+ {
7177
+ "model_format": "pytorch",
7178
+ "model_size_in_billions": 236,
7179
+ "quantizations": [
7180
+ "4-bit",
7181
+ "8-bit",
7182
+ "none"
7183
+ ],
7184
+ "model_id": "deepseek-ai/DeepSeek-V2.5",
7185
+ "model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
7186
+ }
7187
+ ],
7188
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
7189
+ "stop_token_ids": [
7190
+ 100001
7191
+ ],
7192
+ "stop": [
7193
+ "<|end▁of▁sentence|>"
7194
+ ]
7195
+ },
7196
+ {
7197
+ "version": 1,
7198
+ "context_length": 131072,
7199
+ "model_name": "yi-coder-chat",
7200
+ "model_lang": [
7201
+ "en"
7202
+ ],
7203
+ "model_ability": [
7204
+ "chat"
7205
+ ],
7206
+ "model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
7207
+ "model_specs": [
7208
+ {
7209
+ "model_format": "pytorch",
7210
+ "model_size_in_billions": 9,
7211
+ "quantizations": [
7212
+ "none"
7213
+ ],
7214
+ "model_id": "01ai/Yi-Coder-9B-Chat",
7215
+ "model_revision": "356a1f8d4e4a606d0b879e54191ca809918576b8"
7216
+ },
7217
+ {
7218
+ "model_format": "pytorch",
7219
+ "model_size_in_billions": "1_5",
7220
+ "quantizations": [
7221
+ "none"
7222
+ ],
7223
+ "model_id": "01ai/Yi-Coder-1.5B-Chat",
7224
+ "model_revision": "92fdd1b2f1539ac990e7f4a921db5601da2f0299"
7225
+ }
7226
+ ],
7227
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
7228
+ "stop_token_ids": [
7229
+ 1,
7230
+ 2,
7231
+ 6,
7232
+ 7
7233
+ ],
7234
+ "stop": [
7235
+ "<|startoftext|>",
7236
+ "<|endoftext|>",
7237
+ "<|im_start|>",
7238
+ "<|im_end|>"
7239
+ ]
7240
+ },
7241
+ {
7242
+ "version": 1,
7243
+ "context_length": 131072,
7244
+ "model_name": "yi-coder",
7245
+ "model_lang": [
7246
+ "en"
7247
+ ],
7248
+ "model_ability": [
7249
+ "generate"
7250
+ ],
7251
+ "model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
7252
+ "model_specs": [
7253
+ {
7254
+ "model_format": "pytorch",
7255
+ "model_size_in_billions": 9,
7256
+ "quantizations": [
7257
+ "none"
7258
+ ],
7259
+ "model_id": "01-ai/Yi-Coder-9B",
7260
+ "model_revision": "e20f8087a9507ac8bce409dc5db5d0c608124238"
7261
+ },
7262
+ {
7263
+ "model_format": "pytorch",
7264
+ "model_size_in_billions": "1_5",
7265
+ "quantizations": [
7266
+ "none"
7267
+ ],
7268
+ "model_id": "01-ai/Yi-Coder-1.5B",
7269
+ "model_revision": "00e59e64f47d3c78e4cfbdd345888479797e8109"
7270
+ }
7271
+ ]
7272
+ },
7273
+ {
7274
+ "version": 1,
7275
+ "context_length": 32768,
7276
+ "model_name": "qwen2.5",
7277
+ "model_lang": [
7278
+ "en",
7279
+ "zh"
7280
+ ],
7281
+ "model_ability": [
7282
+ "generate"
7283
+ ],
7284
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
7285
+ "model_specs": [
7286
+ {
7287
+ "model_format": "pytorch",
7288
+ "model_size_in_billions": "0_5",
7289
+ "quantizations": [
7290
+ "4-bit",
7291
+ "8-bit",
7292
+ "none"
7293
+ ],
7294
+ "model_id": "Qwen/Qwen2.5-0.5B",
7295
+ "model_revision": "2630d3d2321bc1f1878f702166d1b2af019a7310"
7296
+ },
7297
+ {
7298
+ "model_format": "pytorch",
7299
+ "model_size_in_billions": "1_5",
7300
+ "quantizations": [
7301
+ "4-bit",
7302
+ "8-bit",
7303
+ "none"
7304
+ ],
7305
+ "model_id": "Qwen/Qwen2.5-1.5B",
7306
+ "model_revision": "e5dfabbcffd9b0c7b31d89b82c5a6b72e663f32c"
7307
+ },
7308
+ {
7309
+ "model_format": "pytorch",
7310
+ "model_size_in_billions": 3,
7311
+ "quantizations": [
7312
+ "4-bit",
7313
+ "8-bit",
7314
+ "none"
7315
+ ],
7316
+ "model_id": "Qwen/Qwen2.5-3B",
7317
+ "model_revision": "e4aa5ac50aa507415cda96cc99eb77ad0a3d2d34"
7318
+ },
7319
+ {
7320
+ "model_format": "pytorch",
7321
+ "model_size_in_billions": 7,
7322
+ "quantizations": [
7323
+ "4-bit",
7324
+ "8-bit",
7325
+ "none"
7326
+ ],
7327
+ "model_id": "Qwen/Qwen2.5-7B",
7328
+ "model_revision": "09a0bac5707b43ec44508eab308b0846320c1ed4"
7329
+ },
7330
+ {
7331
+ "model_format": "pytorch",
7332
+ "model_size_in_billions": 14,
7333
+ "quantizations": [
7334
+ "4-bit",
7335
+ "8-bit",
7336
+ "none"
7337
+ ],
7338
+ "model_id": "Qwen/Qwen2.5-14B",
7339
+ "model_revision": "d02b64ba1ce86bf9948668a13f82709600431ccc"
7340
+ },
7341
+ {
7342
+ "model_format": "pytorch",
7343
+ "model_size_in_billions": 32,
7344
+ "quantizations": [
7345
+ "4-bit",
7346
+ "8-bit",
7347
+ "none"
7348
+ ],
7349
+ "model_id": "Qwen/Qwen2.5-32B",
7350
+ "model_revision": "ff23665d01c3665be5fdb271d18a62090b65c06d"
7351
+ },
7352
+ {
7353
+ "model_format": "pytorch",
7354
+ "model_size_in_billions": 72,
7355
+ "quantizations": [
7356
+ "4-bit",
7357
+ "8-bit",
7358
+ "none"
7359
+ ],
7360
+ "model_id": "Qwen/Qwen2.5-72B",
7361
+ "model_revision": "587cc4061cf6a7cc0d429d05c109447e5cf063af"
7362
+ }
7363
+ ]
7364
+ },
7365
+ {
7366
+ "version": 1,
7367
+ "context_length": 32768,
7368
+ "model_name": "qwen2.5-instruct",
7369
+ "model_lang": [
7370
+ "en",
7371
+ "zh"
7372
+ ],
7373
+ "model_ability": [
7374
+ "chat",
7375
+ "tools"
7376
+ ],
7377
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
7378
+ "model_specs": [
7379
+ {
7380
+ "model_format": "pytorch",
7381
+ "model_size_in_billions": "0_5",
7382
+ "quantizations": [
7383
+ "4-bit",
7384
+ "8-bit",
7385
+ "none"
7386
+ ],
7387
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct"
7388
+ },
7389
+ {
7390
+ "model_format": "pytorch",
7391
+ "model_size_in_billions": "1_5",
7392
+ "quantizations": [
7393
+ "4-bit",
7394
+ "8-bit",
7395
+ "none"
7396
+ ],
7397
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct"
7398
+ },
7399
+ {
7400
+ "model_format": "pytorch",
7401
+ "model_size_in_billions": 3,
7402
+ "quantizations": [
7403
+ "4-bit",
7404
+ "8-bit",
7405
+ "none"
7406
+ ],
7407
+ "model_id": "Qwen/Qwen2.5-3B-Instruct"
7408
+ },
7409
+ {
7410
+ "model_format": "pytorch",
7411
+ "model_size_in_billions": 7,
7412
+ "quantizations": [
7413
+ "4-bit",
7414
+ "8-bit",
7415
+ "none"
7416
+ ],
7417
+ "model_id": "Qwen/Qwen2.5-7B-Instruct"
7418
+ },
7419
+ {
7420
+ "model_format": "pytorch",
7421
+ "model_size_in_billions": 14,
7422
+ "quantizations": [
7423
+ "4-bit",
7424
+ "8-bit",
7425
+ "none"
7426
+ ],
7427
+ "model_id": "Qwen/Qwen2.5-14B-Instruct"
7428
+ },
7429
+ {
7430
+ "model_format": "pytorch",
7431
+ "model_size_in_billions": 32,
7432
+ "quantizations": [
7433
+ "4-bit",
7434
+ "8-bit",
7435
+ "none"
7436
+ ],
7437
+ "model_id": "Qwen/Qwen2.5-32B-Instruct"
7438
+ },
7439
+ {
7440
+ "model_format": "pytorch",
7441
+ "model_size_in_billions": 72,
7442
+ "quantizations": [
7443
+ "4-bit",
7444
+ "8-bit",
7445
+ "none"
7446
+ ],
7447
+ "model_id": "Qwen/Qwen2.5-72B-Instruct"
7448
+ },
7449
+ {
7450
+ "model_format": "gptq",
7451
+ "model_size_in_billions": "0_5",
7452
+ "quantizations": [
7453
+ "Int4",
7454
+ "Int8"
7455
+ ],
7456
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}"
7457
+ },
7458
+ {
7459
+ "model_format": "gptq",
7460
+ "model_size_in_billions": "1_5",
7461
+ "quantizations": [
7462
+ "Int4",
7463
+ "Int8"
7464
+ ],
7465
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}"
7466
+ },
7467
+ {
7468
+ "model_format": "gptq",
7469
+ "model_size_in_billions": 3,
7470
+ "quantizations": [
7471
+ "Int4",
7472
+ "Int8"
7473
+ ],
7474
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}"
7475
+ },
7476
+ {
7477
+ "model_format": "gptq",
7478
+ "model_size_in_billions": 7,
7479
+ "quantizations": [
7480
+ "Int4",
7481
+ "Int8"
7482
+ ],
7483
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}"
7484
+ },
7485
+ {
7486
+ "model_format": "gptq",
7487
+ "model_size_in_billions": 14,
7488
+ "quantizations": [
7489
+ "Int4",
7490
+ "Int8"
7491
+ ],
7492
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}"
7493
+ },
7494
+ {
7495
+ "model_format": "gptq",
7496
+ "model_size_in_billions": 32,
7497
+ "quantizations": [
7498
+ "Int4",
7499
+ "Int8"
7500
+ ],
7501
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}"
7502
+ },
7503
+ {
7504
+ "model_format": "gptq",
7505
+ "model_size_in_billions": 72,
7506
+ "quantizations": [
7507
+ "Int4",
7508
+ "Int8"
7509
+ ],
7510
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}"
7511
+ },
7512
+ {
7513
+ "model_format": "awq",
7514
+ "model_size_in_billions": "0_5",
7515
+ "quantizations": [
7516
+ "Int4"
7517
+ ],
7518
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-AWQ"
7519
+ },
7520
+ {
7521
+ "model_format": "awq",
7522
+ "model_size_in_billions": "1_5",
7523
+ "quantizations": [
7524
+ "Int4"
7525
+ ],
7526
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-AWQ"
7527
+ },
7528
+ {
7529
+ "model_format": "awq",
7530
+ "model_size_in_billions": 3,
7531
+ "quantizations": [
7532
+ "Int4"
7533
+ ],
7534
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-AWQ"
7535
+ },
7536
+ {
7537
+ "model_format": "awq",
7538
+ "model_size_in_billions": 7,
7539
+ "quantizations": [
7540
+ "Int4"
7541
+ ],
7542
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-AWQ"
7543
+ },
7544
+ {
7545
+ "model_format": "awq",
7546
+ "model_size_in_billions": 14,
7547
+ "quantizations": [
7548
+ "Int4"
7549
+ ],
7550
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-AWQ"
7551
+ },
7552
+ {
7553
+ "model_format": "awq",
7554
+ "model_size_in_billions": 32,
7555
+ "quantizations": [
7556
+ "Int4"
7557
+ ],
7558
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-AWQ"
7559
+ },
7560
+ {
7561
+ "model_format": "awq",
7562
+ "model_size_in_billions": 72,
7563
+ "quantizations": [
7564
+ "Int4"
7565
+ ],
7566
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-AWQ"
7567
+ },
7568
+ {
7569
+ "model_format": "ggufv2",
7570
+ "model_size_in_billions": "0_5",
7571
+ "quantizations": [
7572
+ "q2_k",
7573
+ "q3_k_m",
7574
+ "q4_0",
7575
+ "q4_k_m",
7576
+ "q5_0",
7577
+ "q5_k_m",
7578
+ "q6_k",
7579
+ "q8_0"
7580
+ ],
7581
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
7582
+ "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf"
7583
+ },
7584
+ {
7585
+ "model_format": "ggufv2",
7586
+ "model_size_in_billions": "1_5",
7587
+ "quantizations": [
7588
+ "q2_k",
7589
+ "q3_k_m",
7590
+ "q4_0",
7591
+ "q4_k_m",
7592
+ "q5_0",
7593
+ "q5_k_m",
7594
+ "q6_k",
7595
+ "q8_0"
7596
+ ],
7597
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
7598
+ "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf"
7599
+ },
7600
+ {
7601
+ "model_format": "ggufv2",
7602
+ "model_size_in_billions": 3,
7603
+ "quantizations": [
7604
+ "q2_k",
7605
+ "q3_k_m",
7606
+ "q4_0",
7607
+ "q4_k_m",
7608
+ "q5_0",
7609
+ "q5_k_m",
7610
+ "q6_k",
7611
+ "q8_0"
7612
+ ],
7613
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-GGUF",
7614
+ "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf"
7615
+ },
7616
+ {
7617
+ "model_format": "ggufv2",
7618
+ "model_size_in_billions": 7,
7619
+ "quantizations": [
7620
+ "q2_k",
7621
+ "q3_k_m",
7622
+ "q4_0",
7623
+ "q4_k_m",
7624
+ "q5_0",
7625
+ "q5_k_m",
7626
+ "q6_k",
7627
+ "q8_0"
7628
+ ],
7629
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-GGUF",
7630
+ "model_file_name_template": "qwen2.5-7b-instruct-{quantization}.gguf",
7631
+ "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
7632
+ "quantization_parts": {
7633
+ "q4_0": [
7634
+ "00001-of-00002",
7635
+ "00002-of-00002"
7636
+ ],
7637
+ "q4_k_m": [
7638
+ "00001-of-00002",
7639
+ "00002-of-00002"
7640
+ ],
7641
+ "q5_0": [
7642
+ "00001-of-00002",
7643
+ "00002-of-00002"
7644
+ ],
7645
+ "q5_k_m": [
7646
+ "00001-of-00002",
7647
+ "00002-of-00002"
7648
+ ],
7649
+ "q6_k": [
7650
+ "00001-of-00002",
7651
+ "00002-of-00002"
7652
+ ],
7653
+ "q8_0": [
7654
+ "00001-of-00002",
7655
+ "00002-of-00002"
7656
+ ]
7657
+ }
7658
+ },
7659
+ {
7660
+ "model_format": "ggufv2",
7661
+ "model_size_in_billions": 14,
7662
+ "quantizations": [
7663
+ "q2_k",
7664
+ "q3_k_m",
7665
+ "q4_0",
7666
+ "q4_k_m",
7667
+ "q5_0",
7668
+ "q5_k_m",
7669
+ "q6_k",
7670
+ "q8_0"
7671
+ ],
7672
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-GGUF",
7673
+ "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf",
7674
+ "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf",
7675
+ "quantization_parts": {
7676
+ "q2_k": [
7677
+ "00001-of-00002",
7678
+ "00002-of-00002"
7679
+ ],
7680
+ "q3_k_m": [
7681
+ "00001-of-00002",
7682
+ "00002-of-00002"
7683
+ ],
7684
+ "q4_0": [
7685
+ "00001-of-00003",
7686
+ "00002-of-00003",
7687
+ "00003-of-00003"
7688
+ ],
7689
+ "q4_k_m": [
7690
+ "00001-of-00003",
7691
+ "00002-of-00003",
7692
+ "00003-of-00003"
7693
+ ],
7694
+ "q5_0": [
7695
+ "00001-of-00003",
7696
+ "00002-of-00003",
7697
+ "00003-of-00003"
7698
+ ],
7699
+ "q5_k_m": [
7700
+ "00001-of-00003",
7701
+ "00002-of-00003",
7702
+ "00003-of-00003"
7703
+ ],
7704
+ "q6_k": [
7705
+ "00001-of-00004",
7706
+ "00002-of-00004",
7707
+ "00003-of-00004",
7708
+ "00004-of-00004"
7709
+ ],
7710
+ "q8_0": [
7711
+ "00001-of-00004",
7712
+ "00002-of-00004",
7713
+ "00003-of-00004",
7714
+ "00004-of-00004"
7715
+ ]
7716
+ }
7717
+ },
7718
+ {
7719
+ "model_format": "ggufv2",
7720
+ "model_size_in_billions": 32,
7721
+ "quantizations": [
7722
+ "q2_k",
7723
+ "q3_k_m",
7724
+ "q4_0",
7725
+ "q4_k_m",
7726
+ "q5_0",
7727
+ "q5_k_m",
7728
+ "q6_k",
7729
+ "q8_0"
7730
+ ],
7731
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-GGUF",
7732
+ "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf",
7733
+ "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf",
7734
+ "quantization_parts": {
7735
+ "q2_k": [
7736
+ "00001-of-00004",
7737
+ "00002-of-00004",
7738
+ "00003-of-00004",
7739
+ "00004-of-00004"
7740
+ ],
7741
+ "q3_k_m": [
7742
+ "00001-of-00005",
7743
+ "00002-of-00005",
7744
+ "00003-of-00005",
7745
+ "00004-of-00005",
7746
+ "00005-of-00005"
7747
+ ],
7748
+ "q4_0": [
7749
+ "00001-of-00005",
7750
+ "00002-of-00005",
7751
+ "00003-of-00005",
7752
+ "00004-of-00005",
7753
+ "00005-of-00005"
7754
+ ],
7755
+ "q4_k_m": [
7756
+ "00001-of-00005",
7757
+ "00002-of-00005",
7758
+ "00003-of-00005",
7759
+ "00004-of-00005",
7760
+ "00005-of-00005"
7761
+ ],
7762
+ "q5_0": [
7763
+ "00001-of-00006",
7764
+ "00002-of-00006",
7765
+ "00003-of-00006",
7766
+ "00004-of-00006",
7767
+ "00005-of-00006",
7768
+ "00006-of-00006"
7769
+ ],
7770
+ "q5_k_m": [
7771
+ "00001-of-00006",
7772
+ "00002-of-00006",
7773
+ "00003-of-00006",
7774
+ "00004-of-00006",
7775
+ "00005-of-00006",
7776
+ "00006-of-00006"
7777
+ ],
7778
+ "q6_k": [
7779
+ "00001-of-00007",
7780
+ "00002-of-00007",
7781
+ "00003-of-00007",
7782
+ "00004-of-00007",
7783
+ "00005-of-00007",
7784
+ "00006-of-00007",
7785
+ "00007-of-00007"
7786
+ ],
7787
+ "q8_0": [
7788
+ "00001-of-00009",
7789
+ "00002-of-00009",
7790
+ "00003-of-00009",
7791
+ "00004-of-00009",
7792
+ "00005-of-00009",
7793
+ "00006-of-00009",
7794
+ "00007-of-00009",
7795
+ "00008-of-00009",
7796
+ "00009-of-00009"
7797
+ ]
7798
+ }
7799
+ },
7800
+ {
7801
+ "model_format": "ggufv2",
7802
+ "model_size_in_billions": 72,
7803
+ "quantizations": [
7804
+ "q2_k",
7805
+ "q3_k_m",
7806
+ "q4_0",
7807
+ "q4_k_m",
7808
+ "q5_0",
7809
+ "q5_k_m",
7810
+ "q6_k",
7811
+ "q8_0",
7812
+ "fp16"
7813
+ ],
7814
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-GGUF",
7815
+ "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
7816
+ "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
7817
+ "quantization_parts": {
7818
+ "q2_k": [
7819
+ "00001-of-00007",
7820
+ "00002-of-00007",
7821
+ "00003-of-00007",
7822
+ "00004-of-00007",
7823
+ "00005-of-00007",
7824
+ "00006-of-00007",
7825
+ "00007-of-00007"
7826
+ ],
7827
+ "q3_k_m": [
7828
+ "00001-of-00009",
7829
+ "00002-of-00009",
7830
+ "00003-of-00009",
7831
+ "00004-of-00009",
7832
+ "00005-of-00009",
7833
+ "00006-of-00009",
7834
+ "00007-of-00009",
7835
+ "00008-of-00009",
7836
+ "00009-of-00009"
7837
+ ],
7838
+ "q4_0": [
7839
+ "00001-of-00011",
7840
+ "00002-of-00011",
7841
+ "00003-of-00011",
7842
+ "00004-of-00011",
7843
+ "00005-of-00011",
7844
+ "00006-of-00011",
7845
+ "00007-of-00011",
7846
+ "00008-of-00011",
7847
+ "00009-of-00011",
7848
+ "00010-of-00011",
7849
+ "00011-of-00011"
7850
+ ],
7851
+ "q4_k_m": [
7852
+ "00001-of-00012",
7853
+ "00002-of-00012",
7854
+ "00003-of-00012",
7855
+ "00004-of-00012",
7856
+ "00005-of-00012",
7857
+ "00006-of-00012",
7858
+ "00007-of-00012",
7859
+ "00008-of-00012",
7860
+ "00009-of-00012",
7861
+ "00010-of-00012",
7862
+ "00011-of-00012",
7863
+ "00012-of-00012"
7864
+ ],
7865
+ "q5_0": [
7866
+ "00001-of-00013",
7867
+ "00002-of-00013",
7868
+ "00003-of-00013",
7869
+ "00004-of-00013",
7870
+ "00005-of-00013",
7871
+ "00006-of-00013",
7872
+ "00007-of-00013",
7873
+ "00008-of-00013",
7874
+ "00009-of-00013",
7875
+ "00010-of-00013",
7876
+ "00011-of-00013",
7877
+ "00012-of-00013",
7878
+ "00013-of-00013"
7879
+ ],
7880
+ "q5_k_m": [
7881
+ "00001-of-00014",
7882
+ "00002-of-00014",
7883
+ "00003-of-00014",
7884
+ "00004-of-00014",
7885
+ "00005-of-00014",
7886
+ "00006-of-00014",
7887
+ "00007-of-00014",
7888
+ "00008-of-00014",
7889
+ "00009-of-00014",
7890
+ "00010-of-00014",
7891
+ "00011-of-00014",
7892
+ "00012-of-00014",
7893
+ "00013-of-00014",
7894
+ "00014-of-00014"
7895
+ ],
7896
+ "q6_k": [
7897
+ "00001-of-00016",
7898
+ "00002-of-00016",
7899
+ "00003-of-00016",
7900
+ "00004-of-00016",
7901
+ "00005-of-00016",
7902
+ "00006-of-00016",
7903
+ "00007-of-00016",
7904
+ "00008-of-00016",
7905
+ "00009-of-00016",
7906
+ "00010-of-00016",
7907
+ "00011-of-00016",
7908
+ "00012-of-00016",
7909
+ "00013-of-00016",
7910
+ "00014-of-00016",
7911
+ "00015-of-00016",
7912
+ "00016-of-00016"
7913
+ ],
7914
+ "q8_0": [
7915
+ "00001-of-00021",
7916
+ "00002-of-00021",
7917
+ "00003-of-00021",
7918
+ "00004-of-00021",
7919
+ "00005-of-00021",
7920
+ "00006-of-00021",
7921
+ "00007-of-00021",
7922
+ "00008-of-00021",
7923
+ "00009-of-00021",
7924
+ "00010-of-00021",
7925
+ "00011-of-00021",
7926
+ "00012-of-00021",
7927
+ "00013-of-00021",
7928
+ "00014-of-00021",
7929
+ "00015-of-00021",
7930
+ "00016-of-00021",
7931
+ "00017-of-00021",
7932
+ "00018-of-00021",
7933
+ "00019-of-00021",
7934
+ "00020-of-00021",
7935
+ "00021-of-00021"
7936
+ ]
7937
+ }
7938
+ }
7939
+ ],
7940
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7941
+ "stop_token_ids": [
7942
+ 151643,
7943
+ 151644,
7944
+ 151645
7945
+ ],
7946
+ "stop": [
7947
+ "<|endoftext|>",
7948
+ "<|im_start|>",
7949
+ "<|im_end|>"
7950
+ ]
7951
+ },
7952
+ {
7953
+ "version": 1,
7954
+ "context_length": 32768,
7955
+ "model_name": "qwen2.5-coder",
7956
+ "model_lang": [
7957
+ "en",
7958
+ "zh"
7959
+ ],
7960
+ "model_ability": [
7961
+ "generate"
7962
+ ],
7963
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
7964
+ "model_specs": [
7965
+ {
7966
+ "model_format": "pytorch",
7967
+ "model_size_in_billions": "1_5",
7968
+ "quantizations": [
7969
+ "4-bit",
7970
+ "8-bit",
7971
+ "none"
7972
+ ],
7973
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B",
7974
+ "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
7975
+ },
7976
+ {
7977
+ "model_format": "pytorch",
7978
+ "model_size_in_billions": 7,
7979
+ "quantizations": [
7980
+ "4-bit",
7981
+ "8-bit",
7982
+ "none"
7983
+ ],
7984
+ "model_id": "Qwen/Qwen2.5-Coder-7B",
7985
+ "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
7986
+ }
7987
+ ]
7988
+ },
7989
+ {
7990
+ "version": 1,
7991
+ "context_length": 32768,
7992
+ "model_name": "qwen2.5-coder-instruct",
7993
+ "model_lang": [
7994
+ "en",
7995
+ "zh"
7996
+ ],
7997
+ "model_ability": [
7998
+ "chat",
7999
+ "tools"
8000
+ ],
8001
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8002
+ "model_specs": [
8003
+ {
8004
+ "model_format": "pytorch",
8005
+ "model_size_in_billions": "1_5",
8006
+ "quantizations": [
8007
+ "4-bit",
8008
+ "8-bit",
8009
+ "none"
8010
+ ],
8011
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
8012
+ },
8013
+ {
8014
+ "model_format": "pytorch",
8015
+ "model_size_in_billions": 7,
8016
+ "quantizations": [
8017
+ "4-bit",
8018
+ "8-bit",
8019
+ "none"
8020
+ ],
8021
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
8022
+ },
8023
+ {
8024
+ "model_format": "ggufv2",
8025
+ "model_size_in_billions": "1_5",
8026
+ "quantizations": [
8027
+ "q2_k",
8028
+ "q3_k_m",
8029
+ "q4_0",
8030
+ "q4_k_m",
8031
+ "q5_0",
8032
+ "q5_k_m",
8033
+ "q6_k",
8034
+ "q8_0"
8035
+ ],
8036
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
8037
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
8038
+ },
8039
+ {
8040
+ "model_format": "ggufv2",
8041
+ "model_size_in_billions": 7,
8042
+ "quantizations": [
8043
+ "q2_k",
8044
+ "q3_k_m",
8045
+ "q4_0",
8046
+ "q4_k_m",
8047
+ "q5_0",
8048
+ "q5_k_m",
8049
+ "q6_k",
8050
+ "q8_0"
8051
+ ],
8052
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
8053
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
8054
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
8055
+ "quantization_parts": {
8056
+ "q4_0": [
8057
+ "00001-of-00002",
8058
+ "00002-of-00002"
8059
+ ],
8060
+ "q4_k_m": [
8061
+ "00001-of-00002",
8062
+ "00002-of-00002"
8063
+ ],
8064
+ "q5_0": [
8065
+ "00001-of-00002",
8066
+ "00002-of-00002"
8067
+ ],
8068
+ "q5_k_m": [
8069
+ "00001-of-00002",
8070
+ "00002-of-00002"
8071
+ ],
8072
+ "q6_k": [
8073
+ "00001-of-00002",
8074
+ "00002-of-00002"
8075
+ ],
8076
+ "q8_0": [
8077
+ "00001-of-00003",
8078
+ "00002-of-00003",
8079
+ "00003-of-00003"
8080
+ ]
8081
+ }
8082
+ }
8083
+ ],
8084
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
8085
+ "stop_token_ids": [
8086
+ 151643,
8087
+ 151644,
8088
+ 151645
8089
+ ],
8090
+ "stop": [
8091
+ "<|endoftext|>",
8092
+ "<|im_start|>",
8093
+ "<|im_end|>"
8094
+ ]
6854
8095
  }
6855
8096
  ]