xinference 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (45) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/chat_interface.py +39 -24
  3. xinference/model/llm/__init__.py +3 -0
  4. xinference/model/llm/core.py +2 -5
  5. xinference/model/llm/llama_cpp/core.py +52 -16
  6. xinference/model/llm/llm_family.json +364 -21
  7. xinference/model/llm/llm_family_modelscope.json +258 -23
  8. xinference/model/llm/mlx/core.py +15 -11
  9. xinference/model/llm/{reasoning_parsers/deepseek_r1_reasoning_parser.py → reasoning_parser.py} +19 -14
  10. xinference/model/llm/sglang/core.py +2 -0
  11. xinference/model/llm/transformers/core.py +3 -2
  12. xinference/model/llm/transformers/gemma3.py +185 -0
  13. xinference/model/llm/transformers/intern_vl.py +0 -2
  14. xinference/model/llm/utils.py +78 -32
  15. xinference/model/llm/vllm/core.py +10 -3
  16. xinference/types.py +2 -2
  17. xinference/web/ui/build/asset-manifest.json +6 -6
  18. xinference/web/ui/build/index.html +1 -1
  19. xinference/web/ui/build/static/css/main.b494ae7e.css +2 -0
  20. xinference/web/ui/build/static/css/main.b494ae7e.css.map +1 -0
  21. xinference/web/ui/build/static/js/main.3cea968e.js +3 -0
  22. xinference/web/ui/build/static/js/main.3cea968e.js.map +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/cc97b49285d7717c63374766c789141a4329a04582ab32756d7e0e614d4c5c7f.json +1 -0
  25. xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +1 -0
  26. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +1 -0
  27. xinference/web/ui/src/locales/en.json +2 -2
  28. xinference/web/ui/src/locales/zh.json +1 -1
  29. {xinference-1.3.1.dist-info → xinference-1.4.0.dist-info}/METADATA +3 -3
  30. {xinference-1.3.1.dist-info → xinference-1.4.0.dist-info}/RECORD +35 -36
  31. xinference/model/llm/reasoning_parsers/__init__.py +0 -13
  32. xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +0 -98
  33. xinference/web/ui/build/static/css/main.f8177338.css +0 -2
  34. xinference/web/ui/build/static/css/main.f8177338.css.map +0 -1
  35. xinference/web/ui/build/static/js/main.55b70cb7.js +0 -3
  36. xinference/web/ui/build/static/js/main.55b70cb7.js.map +0 -1
  37. xinference/web/ui/node_modules/.cache/babel-loader/2deac8d5636974533e3714f34e94fc754f9153a07c6ee11e72846cb8eae47e4b.json +0 -1
  38. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +0 -1
  39. xinference/web/ui/node_modules/.cache/babel-loader/e23d476fcbf6fd69c8986bf82133d257d28aa8fc9a5cab231d81c1c75c58cd99.json +0 -1
  40. xinference/web/ui/node_modules/.cache/babel-loader/e7a8c37fda8725cab69c7ef8c627060bd7fc806adc67e00fe628ba148cb86d7f.json +0 -1
  41. /xinference/web/ui/build/static/js/{main.55b70cb7.js.LICENSE.txt → main.3cea968e.js.LICENSE.txt} +0 -0
  42. {xinference-1.3.1.dist-info → xinference-1.4.0.dist-info}/LICENSE +0 -0
  43. {xinference-1.3.1.dist-info → xinference-1.4.0.dist-info}/WHEEL +0 -0
  44. {xinference-1.3.1.dist-info → xinference-1.4.0.dist-info}/entry_points.txt +0 -0
  45. {xinference-1.3.1.dist-info → xinference-1.4.0.dist-info}/top_level.txt +0 -0
@@ -5786,6 +5786,265 @@
5786
5786
  "<start_of_turn>"
5787
5787
  ]
5788
5788
  },
5789
+ {
5790
+ "version": 1,
5791
+ "context_length": 32768,
5792
+ "model_name": "gemma-3-1b-it",
5793
+ "model_lang": [
5794
+ "en"
5795
+ ],
5796
+ "model_ability": [
5797
+ "chat"
5798
+ ],
5799
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
5800
+ "model_specs": [
5801
+ {
5802
+ "model_format": "pytorch",
5803
+ "model_size_in_billions": 1,
5804
+ "quantizations": [
5805
+ "none",
5806
+ "4-bit",
5807
+ "8-bit"
5808
+ ],
5809
+ "model_id": "google/gemma-3-1b-it"
5810
+ },
5811
+ {
5812
+ "model_format": "ggufv2",
5813
+ "model_size_in_billions": 1,
5814
+ "quantizations": [
5815
+ "IQ2_M",
5816
+ "IQ3_M",
5817
+ "IQ3_XS",
5818
+ "IQ3_XXS",
5819
+ "IQ4_NL",
5820
+ "IQ4_XS",
5821
+ "Q2_K",
5822
+ "Q2_K_L",
5823
+ "Q3_K_L",
5824
+ "Q3_K_M",
5825
+ "Q3_K_S",
5826
+ "Q4_0",
5827
+ "Q4_1",
5828
+ "Q4_K_L",
5829
+ "Q4_K_M",
5830
+ "Q4_K_S",
5831
+ "Q5_K_L",
5832
+ "Q5_K_M",
5833
+ "Q5_K_S",
5834
+ "Q6_K",
5835
+ "Q6_K_L",
5836
+ "Q8_0",
5837
+ "bf16"
5838
+ ],
5839
+ "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
5840
+ "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
5841
+ },
5842
+ {
5843
+ "model_format": "mlx",
5844
+ "model_size_in_billions": 1,
5845
+ "quantizations": [
5846
+ "4bit",
5847
+ "6bit",
5848
+ "8bit",
5849
+ "fp16"
5850
+ ],
5851
+ "model_id": "mlx-community/gemma-3-1b-it-{quantization}"
5852
+ }
5853
+ ],
5854
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
5855
+ "stop_token_ids": [
5856
+ 1,
5857
+ 105,
5858
+ 106
5859
+ ],
5860
+ "stop": [
5861
+ "<eos>",
5862
+ "<end_of_turn>",
5863
+ "<start_of_turn>"
5864
+ ]
5865
+ },
5866
+ {
5867
+ "version": 1,
5868
+ "context_length": 131072,
5869
+ "model_name": "gemma-3-it",
5870
+ "model_lang": [
5871
+ "en"
5872
+ ],
5873
+ "model_ability": [
5874
+ "chat",
5875
+ "vision"
5876
+ ],
5877
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
5878
+ "model_specs": [
5879
+ {
5880
+ "model_format": "pytorch",
5881
+ "model_size_in_billions": 4,
5882
+ "quantizations": [
5883
+ "none",
5884
+ "4-bit",
5885
+ "8-bit"
5886
+ ],
5887
+ "model_id": "google/gemma-3-4b-it"
5888
+ },
5889
+ {
5890
+ "model_format": "pytorch",
5891
+ "model_size_in_billions": 12,
5892
+ "quantizations": [
5893
+ "none",
5894
+ "4-bit",
5895
+ "8-bit"
5896
+ ],
5897
+ "model_id": "google/gemma-3-12b-it"
5898
+ },
5899
+ {
5900
+ "model_format": "pytorch",
5901
+ "model_size_in_billions": 27,
5902
+ "quantizations": [
5903
+ "none",
5904
+ "4-bit",
5905
+ "8-bit"
5906
+ ],
5907
+ "model_id": "google/gemma-3-27b-it"
5908
+ },
5909
+ {
5910
+ "model_format": "ggufv2",
5911
+ "model_size_in_billions": 4,
5912
+ "quantizations": [
5913
+ "IQ2_M",
5914
+ "IQ3_M",
5915
+ "IQ3_XS",
5916
+ "IQ3_XXS",
5917
+ "IQ4_NL",
5918
+ "IQ4_XS",
5919
+ "Q2_K",
5920
+ "Q2_K_L",
5921
+ "Q3_K_L",
5922
+ "Q3_K_M",
5923
+ "Q3_K_S",
5924
+ "Q4_0",
5925
+ "Q4_1",
5926
+ "Q4_K_L",
5927
+ "Q4_K_M",
5928
+ "Q4_K_S",
5929
+ "Q5_K_L",
5930
+ "Q5_K_M",
5931
+ "Q5_K_S",
5932
+ "Q6_K",
5933
+ "Q6_K_L",
5934
+ "Q8_0",
5935
+ "bf16"
5936
+ ],
5937
+ "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
5938
+ "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
5939
+ },
5940
+ {
5941
+ "model_format": "ggufv2",
5942
+ "model_size_in_billions": 12,
5943
+ "quantizations": [
5944
+ "IQ2_M",
5945
+ "IQ3_M",
5946
+ "IQ3_XS",
5947
+ "IQ3_XXS",
5948
+ "IQ4_NL",
5949
+ "IQ4_XS",
5950
+ "Q2_K",
5951
+ "Q2_K_L",
5952
+ "Q3_K_L",
5953
+ "Q3_K_M",
5954
+ "Q3_K_S",
5955
+ "Q4_0",
5956
+ "Q4_1",
5957
+ "Q4_K_L",
5958
+ "Q4_K_M",
5959
+ "Q4_K_S",
5960
+ "Q5_K_L",
5961
+ "Q5_K_M",
5962
+ "Q5_K_S",
5963
+ "Q6_K",
5964
+ "Q6_K_L",
5965
+ "Q8_0",
5966
+ "bf16"
5967
+ ],
5968
+ "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
5969
+ "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
5970
+ },
5971
+ {
5972
+ "model_format": "ggufv2",
5973
+ "model_size_in_billions": 27,
5974
+ "quantizations": [
5975
+ "IQ2_M",
5976
+ "IQ3_M",
5977
+ "IQ3_XS",
5978
+ "IQ3_XXS",
5979
+ "IQ4_NL",
5980
+ "IQ4_XS",
5981
+ "Q2_K",
5982
+ "Q2_K_L",
5983
+ "Q3_K_L",
5984
+ "Q3_K_M",
5985
+ "Q3_K_S",
5986
+ "Q4_0",
5987
+ "Q4_1",
5988
+ "Q4_K_L",
5989
+ "Q4_K_M",
5990
+ "Q4_K_S",
5991
+ "Q5_K_L",
5992
+ "Q5_K_M",
5993
+ "Q5_K_S",
5994
+ "Q6_K",
5995
+ "Q6_K_L",
5996
+ "Q8_0",
5997
+ "bf16"
5998
+ ],
5999
+ "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
6000
+ "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
6001
+ },
6002
+ {
6003
+ "model_format": "mlx",
6004
+ "model_size_in_billions": 4,
6005
+ "quantizations": [
6006
+ "4bit",
6007
+ "6bit",
6008
+ "8bit",
6009
+ "fp16"
6010
+ ],
6011
+ "model_id": "mlx-community/gemma-3-4b-it-{quantization}"
6012
+ },
6013
+ {
6014
+ "model_format": "mlx",
6015
+ "model_size_in_billions": 12,
6016
+ "quantizations": [
6017
+ "4bit",
6018
+ "6bit",
6019
+ "8bit",
6020
+ "fp16"
6021
+ ],
6022
+ "model_id": "mlx-community/gemma-3-12b-it-{quantization}"
6023
+ },
6024
+ {
6025
+ "model_format": "mlx",
6026
+ "model_size_in_billions": 27,
6027
+ "quantizations": [
6028
+ "4bit",
6029
+ "6bit",
6030
+ "8bit",
6031
+ "fp16"
6032
+ ],
6033
+ "model_id": "mlx-community/gemma-3-27b-it-{quantization}"
6034
+ }
6035
+ ],
6036
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
6037
+ "stop_token_ids": [
6038
+ 1,
6039
+ 105,
6040
+ 106
6041
+ ],
6042
+ "stop": [
6043
+ "<eos>",
6044
+ "<end_of_turn>",
6045
+ "<start_of_turn>"
6046
+ ]
6047
+ },
5789
6048
  {
5790
6049
  "version": 1,
5791
6050
  "context_length": 8192,
@@ -6923,7 +7182,7 @@
6923
7182
  "8-bit",
6924
7183
  "none"
6925
7184
  ],
6926
- "model_id": "OpenGVLab/InternVL2_5-MPO-1B"
7185
+ "model_id": "OpenGVLab/InternVL2_5-1B-MPO"
6927
7186
  },
6928
7187
  {
6929
7188
  "model_format": "pytorch",
@@ -6933,7 +7192,7 @@
6933
7192
  "8-bit",
6934
7193
  "none"
6935
7194
  ],
6936
- "model_id": "OpenGVLab/InternVL2_5-MPO-2B"
7195
+ "model_id": "OpenGVLab/InternVL2_5-2B-MPO"
6937
7196
  },
6938
7197
  {
6939
7198
  "model_format": "pytorch",
@@ -6943,7 +7202,7 @@
6943
7202
  "8-bit",
6944
7203
  "none"
6945
7204
  ],
6946
- "model_id": "OpenGVLab/InternVL2_5-MPO-4B"
7205
+ "model_id": "OpenGVLab/InternVL2_5-4B-MPO"
6947
7206
  },
6948
7207
  {
6949
7208
  "model_format": "awq",
@@ -6961,7 +7220,7 @@
6961
7220
  "8-bit",
6962
7221
  "none"
6963
7222
  ],
6964
- "model_id": "OpenGVLab/InternVL2_5-MPO-8B"
7223
+ "model_id": "OpenGVLab/InternVL2_5-8B-MPO"
6965
7224
  },
6966
7225
  {
6967
7226
  "model_format": "awq",
@@ -6969,7 +7228,7 @@
6969
7228
  "quantizations": [
6970
7229
  "Int4"
6971
7230
  ],
6972
- "model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
7231
+ "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
6973
7232
  },
6974
7233
  {
6975
7234
  "model_format": "pytorch",
@@ -6979,7 +7238,7 @@
6979
7238
  "8-bit",
6980
7239
  "none"
6981
7240
  ],
6982
- "model_id": "OpenGVLab/InternVL2_5-MPO-26B"
7241
+ "model_id": "OpenGVLab/InternVL2_5-26B-MPO"
6983
7242
  },
6984
7243
  {
6985
7244
  "model_format": "awq",
@@ -6987,7 +7246,7 @@
6987
7246
  "quantizations": [
6988
7247
  "Int4"
6989
7248
  ],
6990
- "model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
7249
+ "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
6991
7250
  },
6992
7251
  {
6993
7252
  "model_format": "pytorch",
@@ -6997,7 +7256,7 @@
6997
7256
  "8-bit",
6998
7257
  "none"
6999
7258
  ],
7000
- "model_id": "OpenGVLab/InternVL2_5-MPO-38B"
7259
+ "model_id": "OpenGVLab/InternVL2_5-38B-MPO"
7001
7260
  },
7002
7261
  {
7003
7262
  "model_format": "awq",
@@ -7005,7 +7264,7 @@
7005
7264
  "quantizations": [
7006
7265
  "Int4"
7007
7266
  ],
7008
- "model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
7267
+ "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
7009
7268
  },
7010
7269
  {
7011
7270
  "model_format": "pytorch",
@@ -7015,7 +7274,7 @@
7015
7274
  "8-bit",
7016
7275
  "none"
7017
7276
  ],
7018
- "model_id": "OpenGVLab/InternVL2_5-MPO-78B"
7277
+ "model_id": "OpenGVLab/InternVL2_5-78B-MPO"
7019
7278
  },
7020
7279
  {
7021
7280
  "model_format": "awq",
@@ -7023,7 +7282,7 @@
7023
7282
  "quantizations": [
7024
7283
  "Int4"
7025
7284
  ],
7026
- "model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
7285
+ "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
7027
7286
  }
7028
7287
  ],
7029
7288
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -7892,7 +8151,7 @@
7892
8151
  "model_id": "mlx-community/DeepSeek-V3-{quantization}"
7893
8152
  }
7894
8153
  ],
7895
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁callbegin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|toolcalls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁ofsentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
8154
+ "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <|User|> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables. {{ tools }} {% endif %} {{ message.content }} {% if last %} <|Assistant|> {% endif %} {% elif message.role == \"assistant\" %} <|Assistant|> {% if message.tool_calls %} <|tool▁calls▁begin|> {% for tool in message.tool_calls %} <|tool▁call▁begin|> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <|tool▁call▁end|> {% endfor %} <|tool▁callsend|> {% else %} {{ message.content }} {% if not last %} <|end▁of▁sentence|> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <|tool▁outputs▁begin|> <|tool▁output▁begin|> {{ message.content }} <|tool▁output▁end|> <|tooloutputsend|> {% if last and message.role != \"assistant\" %} <|Assistant|> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <|User|> {{ prompt }} {% endif %} <|Assistant|> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
7896
8155
  "stop_token_ids": [
7897
8156
  1
7898
8157
  ],
@@ -9449,7 +9708,7 @@
9449
9708
  },
9450
9709
  {
9451
9710
  "version": 1,
9452
- "context_length": 32768,
9711
+ "context_length": 131072,
9453
9712
  "model_name": "QwQ-32B",
9454
9713
  "model_lang": [
9455
9714
  "en",
@@ -9496,15 +9755,99 @@
9496
9755
  "model_size_in_billions": 32,
9497
9756
  "quantizations": [
9498
9757
  "fp16",
9499
- "Q2_k",
9500
- "Q3_K_M",
9501
- "Q4_0",
9502
- "Q4_K_M",
9503
- "Q5_0",
9504
- "Q5_K_M",
9505
- "Q6_K",
9506
- "Q8_0"
9758
+ "q2_k",
9759
+ "q3_k_m",
9760
+ "q4_0",
9761
+ "q4_k_m",
9762
+ "q5_0",
9763
+ "q5_k_m",
9764
+ "q6_k",
9765
+ "q8_0"
9507
9766
  ],
9767
+ "quantization_parts": {
9768
+ "fp16": [
9769
+ "00001-of-000017",
9770
+ "00002-of-000017",
9771
+ "00003-of-000017",
9772
+ "00004-of-000017",
9773
+ "00005-of-000017",
9774
+ "00006-of-000017",
9775
+ "00007-of-000017",
9776
+ "00008-of-000017",
9777
+ "00009-of-000017",
9778
+ "00010-of-000017",
9779
+ "00011-of-000017",
9780
+ "00012-of-000017",
9781
+ "00013-of-000017",
9782
+ "00014-of-000017",
9783
+ "00015-of-000017",
9784
+ "00016-of-000017",
9785
+ "00017-of-000017"
9786
+ ],
9787
+ "q2_k": [
9788
+ "00001-of-00004",
9789
+ "00002-of-00004",
9790
+ "00003-of-00004",
9791
+ "00004-of-00004"
9792
+ ],
9793
+ "q3_k_m": [
9794
+ "00001-of-00005",
9795
+ "00002-of-00005",
9796
+ "00003-of-00005",
9797
+ "00004-of-00005",
9798
+ "00005-of-00005"
9799
+ ],
9800
+ "q4_0": [
9801
+ "00001-of-00005",
9802
+ "00002-of-00005",
9803
+ "00003-of-00005",
9804
+ "00004-of-00005",
9805
+ "00005-of-00005"
9806
+ ],
9807
+ "q4_k_m": [
9808
+ "00001-of-00005",
9809
+ "00002-of-00005",
9810
+ "00003-of-00005",
9811
+ "00004-of-00005",
9812
+ "00005-of-00005"
9813
+ ],
9814
+ "q5_0": [
9815
+ "00001-of-00006",
9816
+ "00002-of-00006",
9817
+ "00003-of-00006",
9818
+ "00004-of-00006",
9819
+ "00005-of-00006",
9820
+ "00006-of-00006"
9821
+ ],
9822
+ "q5_k_m": [
9823
+ "00001-of-00006",
9824
+ "00002-of-00006",
9825
+ "00003-of-00006",
9826
+ "00004-of-00006",
9827
+ "00005-of-00006",
9828
+ "00006-of-00006"
9829
+ ],
9830
+ "q6_k": [
9831
+ "00001-of-00007",
9832
+ "00002-of-00007",
9833
+ "00003-of-00007",
9834
+ "00004-of-00007",
9835
+ "00005-of-00007",
9836
+ "00006-of-00007",
9837
+ "00007-of-00007"
9838
+ ],
9839
+ "q8_0": [
9840
+ "00001-of-00009",
9841
+ "00002-of-00009",
9842
+ "00003-of-00009",
9843
+ "00004-of-00009",
9844
+ "00005-of-00009",
9845
+ "00006-of-00009",
9846
+ "00007-of-00009",
9847
+ "00008-of-00009",
9848
+ "00009-of-00009"
9849
+ ]
9850
+ },
9508
9851
  "model_id": "Qwen/QwQ-32B-GGUF",
9509
9852
  "model_file_name_template": "qwq-32b-{quantization}.gguf"
9510
9853
  }