xinference 1.3.1.post1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (75) hide show
  1. xinference/_compat.py +1 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +4 -0
  4. xinference/core/chat_interface.py +1 -1
  5. xinference/core/model.py +23 -3
  6. xinference/core/supervisor.py +6 -0
  7. xinference/core/worker.py +54 -11
  8. xinference/model/llm/__init__.py +7 -2
  9. xinference/model/llm/core.py +1 -0
  10. xinference/model/llm/llama_cpp/core.py +50 -15
  11. xinference/model/llm/llm_family.json +388 -13
  12. xinference/model/llm/llm_family_modelscope.json +373 -14
  13. xinference/model/llm/mlx/core.py +15 -11
  14. xinference/model/llm/reasoning_parser.py +17 -9
  15. xinference/model/llm/sglang/core.py +112 -12
  16. xinference/model/llm/transformers/core.py +4 -2
  17. xinference/model/llm/transformers/deepseek_vl.py +1 -1
  18. xinference/model/llm/transformers/deepseek_vl2.py +287 -0
  19. xinference/model/llm/transformers/gemma3.py +185 -0
  20. xinference/model/llm/transformers/intern_vl.py +0 -2
  21. xinference/model/llm/utils.py +62 -42
  22. xinference/model/llm/vllm/core.py +157 -11
  23. xinference/model/llm/vllm/distributed_executor.py +314 -0
  24. xinference/model/rerank/core.py +16 -11
  25. xinference/thirdparty/deepseek_vl2/__init__.py +31 -0
  26. xinference/thirdparty/deepseek_vl2/models/__init__.py +26 -0
  27. xinference/thirdparty/deepseek_vl2/models/configuration_deepseek.py +210 -0
  28. xinference/thirdparty/deepseek_vl2/models/conversation.py +310 -0
  29. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek.py +1975 -0
  30. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek_vl_v2.py +697 -0
  31. xinference/thirdparty/deepseek_vl2/models/processing_deepseek_vl_v2.py +675 -0
  32. xinference/thirdparty/deepseek_vl2/models/siglip_vit.py +661 -0
  33. xinference/thirdparty/deepseek_vl2/serve/__init__.py +0 -0
  34. xinference/thirdparty/deepseek_vl2/serve/app_modules/__init__.py +0 -0
  35. xinference/thirdparty/deepseek_vl2/serve/app_modules/gradio_utils.py +83 -0
  36. xinference/thirdparty/deepseek_vl2/serve/app_modules/overwrites.py +81 -0
  37. xinference/thirdparty/deepseek_vl2/serve/app_modules/presets.py +115 -0
  38. xinference/thirdparty/deepseek_vl2/serve/app_modules/utils.py +333 -0
  39. xinference/thirdparty/deepseek_vl2/serve/assets/Kelpy-Codos.js +100 -0
  40. xinference/thirdparty/deepseek_vl2/serve/assets/avatar.png +0 -0
  41. xinference/thirdparty/deepseek_vl2/serve/assets/custom.css +355 -0
  42. xinference/thirdparty/deepseek_vl2/serve/assets/custom.js +22 -0
  43. xinference/thirdparty/deepseek_vl2/serve/assets/favicon.ico +0 -0
  44. xinference/thirdparty/deepseek_vl2/serve/assets/simsun.ttc +0 -0
  45. xinference/thirdparty/deepseek_vl2/serve/inference.py +197 -0
  46. xinference/thirdparty/deepseek_vl2/utils/__init__.py +18 -0
  47. xinference/thirdparty/deepseek_vl2/utils/io.py +80 -0
  48. xinference/types.py +2 -2
  49. xinference/web/ui/build/asset-manifest.json +6 -6
  50. xinference/web/ui/build/index.html +1 -1
  51. xinference/web/ui/build/static/css/main.b494ae7e.css +2 -0
  52. xinference/web/ui/build/static/css/main.b494ae7e.css.map +1 -0
  53. xinference/web/ui/build/static/js/main.5ca4eea1.js +3 -0
  54. xinference/web/ui/build/static/js/main.5ca4eea1.js.map +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/cc97b49285d7717c63374766c789141a4329a04582ab32756d7e0e614d4c5c7f.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +1 -0
  59. xinference/web/ui/src/locales/en.json +2 -2
  60. xinference/web/ui/src/locales/zh.json +1 -1
  61. {xinference-1.3.1.post1.dist-info → xinference-1.4.1.dist-info}/METADATA +4 -4
  62. {xinference-1.3.1.post1.dist-info → xinference-1.4.1.dist-info}/RECORD +67 -41
  63. xinference/web/ui/build/static/css/main.f8177338.css +0 -2
  64. xinference/web/ui/build/static/css/main.f8177338.css.map +0 -1
  65. xinference/web/ui/build/static/js/main.55b70cb7.js +0 -3
  66. xinference/web/ui/build/static/js/main.55b70cb7.js.map +0 -1
  67. xinference/web/ui/node_modules/.cache/babel-loader/2deac8d5636974533e3714f34e94fc754f9153a07c6ee11e72846cb8eae47e4b.json +0 -1
  68. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +0 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/e23d476fcbf6fd69c8986bf82133d257d28aa8fc9a5cab231d81c1c75c58cd99.json +0 -1
  70. xinference/web/ui/node_modules/.cache/babel-loader/e7a8c37fda8725cab69c7ef8c627060bd7fc806adc67e00fe628ba148cb86d7f.json +0 -1
  71. /xinference/web/ui/build/static/js/{main.55b70cb7.js.LICENSE.txt → main.5ca4eea1.js.LICENSE.txt} +0 -0
  72. {xinference-1.3.1.post1.dist-info → xinference-1.4.1.dist-info}/LICENSE +0 -0
  73. {xinference-1.3.1.post1.dist-info → xinference-1.4.1.dist-info}/WHEEL +0 -0
  74. {xinference-1.3.1.post1.dist-info → xinference-1.4.1.dist-info}/entry_points.txt +0 -0
  75. {xinference-1.3.1.post1.dist-info → xinference-1.4.1.dist-info}/top_level.txt +0 -0
@@ -5786,6 +5786,265 @@
5786
5786
  "<start_of_turn>"
5787
5787
  ]
5788
5788
  },
5789
+ {
5790
+ "version": 1,
5791
+ "context_length": 32768,
5792
+ "model_name": "gemma-3-1b-it",
5793
+ "model_lang": [
5794
+ "en"
5795
+ ],
5796
+ "model_ability": [
5797
+ "chat"
5798
+ ],
5799
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
5800
+ "model_specs": [
5801
+ {
5802
+ "model_format": "pytorch",
5803
+ "model_size_in_billions": 1,
5804
+ "quantizations": [
5805
+ "none",
5806
+ "4-bit",
5807
+ "8-bit"
5808
+ ],
5809
+ "model_id": "google/gemma-3-1b-it"
5810
+ },
5811
+ {
5812
+ "model_format": "ggufv2",
5813
+ "model_size_in_billions": 1,
5814
+ "quantizations": [
5815
+ "IQ2_M",
5816
+ "IQ3_M",
5817
+ "IQ3_XS",
5818
+ "IQ3_XXS",
5819
+ "IQ4_NL",
5820
+ "IQ4_XS",
5821
+ "Q2_K",
5822
+ "Q2_K_L",
5823
+ "Q3_K_L",
5824
+ "Q3_K_M",
5825
+ "Q3_K_S",
5826
+ "Q4_0",
5827
+ "Q4_1",
5828
+ "Q4_K_L",
5829
+ "Q4_K_M",
5830
+ "Q4_K_S",
5831
+ "Q5_K_L",
5832
+ "Q5_K_M",
5833
+ "Q5_K_S",
5834
+ "Q6_K",
5835
+ "Q6_K_L",
5836
+ "Q8_0",
5837
+ "bf16"
5838
+ ],
5839
+ "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
5840
+ "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
5841
+ },
5842
+ {
5843
+ "model_format": "mlx",
5844
+ "model_size_in_billions": 1,
5845
+ "quantizations": [
5846
+ "4bit",
5847
+ "6bit",
5848
+ "8bit",
5849
+ "fp16"
5850
+ ],
5851
+ "model_id": "mlx-community/gemma-3-1b-it-{quantization}"
5852
+ }
5853
+ ],
5854
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
5855
+ "stop_token_ids": [
5856
+ 1,
5857
+ 105,
5858
+ 106
5859
+ ],
5860
+ "stop": [
5861
+ "<eos>",
5862
+ "<end_of_turn>",
5863
+ "<start_of_turn>"
5864
+ ]
5865
+ },
5866
+ {
5867
+ "version": 1,
5868
+ "context_length": 131072,
5869
+ "model_name": "gemma-3-it",
5870
+ "model_lang": [
5871
+ "en"
5872
+ ],
5873
+ "model_ability": [
5874
+ "chat",
5875
+ "vision"
5876
+ ],
5877
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
5878
+ "model_specs": [
5879
+ {
5880
+ "model_format": "pytorch",
5881
+ "model_size_in_billions": 4,
5882
+ "quantizations": [
5883
+ "none",
5884
+ "4-bit",
5885
+ "8-bit"
5886
+ ],
5887
+ "model_id": "google/gemma-3-4b-it"
5888
+ },
5889
+ {
5890
+ "model_format": "pytorch",
5891
+ "model_size_in_billions": 12,
5892
+ "quantizations": [
5893
+ "none",
5894
+ "4-bit",
5895
+ "8-bit"
5896
+ ],
5897
+ "model_id": "google/gemma-3-12b-it"
5898
+ },
5899
+ {
5900
+ "model_format": "pytorch",
5901
+ "model_size_in_billions": 27,
5902
+ "quantizations": [
5903
+ "none",
5904
+ "4-bit",
5905
+ "8-bit"
5906
+ ],
5907
+ "model_id": "google/gemma-3-27b-it"
5908
+ },
5909
+ {
5910
+ "model_format": "ggufv2",
5911
+ "model_size_in_billions": 4,
5912
+ "quantizations": [
5913
+ "IQ2_M",
5914
+ "IQ3_M",
5915
+ "IQ3_XS",
5916
+ "IQ3_XXS",
5917
+ "IQ4_NL",
5918
+ "IQ4_XS",
5919
+ "Q2_K",
5920
+ "Q2_K_L",
5921
+ "Q3_K_L",
5922
+ "Q3_K_M",
5923
+ "Q3_K_S",
5924
+ "Q4_0",
5925
+ "Q4_1",
5926
+ "Q4_K_L",
5927
+ "Q4_K_M",
5928
+ "Q4_K_S",
5929
+ "Q5_K_L",
5930
+ "Q5_K_M",
5931
+ "Q5_K_S",
5932
+ "Q6_K",
5933
+ "Q6_K_L",
5934
+ "Q8_0",
5935
+ "bf16"
5936
+ ],
5937
+ "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
5938
+ "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
5939
+ },
5940
+ {
5941
+ "model_format": "ggufv2",
5942
+ "model_size_in_billions": 12,
5943
+ "quantizations": [
5944
+ "IQ2_M",
5945
+ "IQ3_M",
5946
+ "IQ3_XS",
5947
+ "IQ3_XXS",
5948
+ "IQ4_NL",
5949
+ "IQ4_XS",
5950
+ "Q2_K",
5951
+ "Q2_K_L",
5952
+ "Q3_K_L",
5953
+ "Q3_K_M",
5954
+ "Q3_K_S",
5955
+ "Q4_0",
5956
+ "Q4_1",
5957
+ "Q4_K_L",
5958
+ "Q4_K_M",
5959
+ "Q4_K_S",
5960
+ "Q5_K_L",
5961
+ "Q5_K_M",
5962
+ "Q5_K_S",
5963
+ "Q6_K",
5964
+ "Q6_K_L",
5965
+ "Q8_0",
5966
+ "bf16"
5967
+ ],
5968
+ "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
5969
+ "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
5970
+ },
5971
+ {
5972
+ "model_format": "ggufv2",
5973
+ "model_size_in_billions": 27,
5974
+ "quantizations": [
5975
+ "IQ2_M",
5976
+ "IQ3_M",
5977
+ "IQ3_XS",
5978
+ "IQ3_XXS",
5979
+ "IQ4_NL",
5980
+ "IQ4_XS",
5981
+ "Q2_K",
5982
+ "Q2_K_L",
5983
+ "Q3_K_L",
5984
+ "Q3_K_M",
5985
+ "Q3_K_S",
5986
+ "Q4_0",
5987
+ "Q4_1",
5988
+ "Q4_K_L",
5989
+ "Q4_K_M",
5990
+ "Q4_K_S",
5991
+ "Q5_K_L",
5992
+ "Q5_K_M",
5993
+ "Q5_K_S",
5994
+ "Q6_K",
5995
+ "Q6_K_L",
5996
+ "Q8_0",
5997
+ "bf16"
5998
+ ],
5999
+ "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
6000
+ "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
6001
+ },
6002
+ {
6003
+ "model_format": "mlx",
6004
+ "model_size_in_billions": 4,
6005
+ "quantizations": [
6006
+ "4bit",
6007
+ "6bit",
6008
+ "8bit",
6009
+ "fp16"
6010
+ ],
6011
+ "model_id": "mlx-community/gemma-3-4b-it-{quantization}"
6012
+ },
6013
+ {
6014
+ "model_format": "mlx",
6015
+ "model_size_in_billions": 12,
6016
+ "quantizations": [
6017
+ "4bit",
6018
+ "6bit",
6019
+ "8bit",
6020
+ "fp16"
6021
+ ],
6022
+ "model_id": "mlx-community/gemma-3-12b-it-{quantization}"
6023
+ },
6024
+ {
6025
+ "model_format": "mlx",
6026
+ "model_size_in_billions": 27,
6027
+ "quantizations": [
6028
+ "4bit",
6029
+ "6bit",
6030
+ "8bit",
6031
+ "fp16"
6032
+ ],
6033
+ "model_id": "mlx-community/gemma-3-27b-it-{quantization}"
6034
+ }
6035
+ ],
6036
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
6037
+ "stop_token_ids": [
6038
+ 1,
6039
+ 105,
6040
+ 106
6041
+ ],
6042
+ "stop": [
6043
+ "<eos>",
6044
+ "<end_of_turn>",
6045
+ "<start_of_turn>"
6046
+ ]
6047
+ },
5789
6048
  {
5790
6049
  "version": 1,
5791
6050
  "context_length": 8192,
@@ -6923,7 +7182,7 @@
6923
7182
  "8-bit",
6924
7183
  "none"
6925
7184
  ],
6926
- "model_id": "OpenGVLab/InternVL2_5-MPO-1B"
7185
+ "model_id": "OpenGVLab/InternVL2_5-1B-MPO"
6927
7186
  },
6928
7187
  {
6929
7188
  "model_format": "pytorch",
@@ -6933,7 +7192,7 @@
6933
7192
  "8-bit",
6934
7193
  "none"
6935
7194
  ],
6936
- "model_id": "OpenGVLab/InternVL2_5-MPO-2B"
7195
+ "model_id": "OpenGVLab/InternVL2_5-2B-MPO"
6937
7196
  },
6938
7197
  {
6939
7198
  "model_format": "pytorch",
@@ -6943,7 +7202,7 @@
6943
7202
  "8-bit",
6944
7203
  "none"
6945
7204
  ],
6946
- "model_id": "OpenGVLab/InternVL2_5-MPO-4B"
7205
+ "model_id": "OpenGVLab/InternVL2_5-4B-MPO"
6947
7206
  },
6948
7207
  {
6949
7208
  "model_format": "awq",
@@ -6961,7 +7220,7 @@
6961
7220
  "8-bit",
6962
7221
  "none"
6963
7222
  ],
6964
- "model_id": "OpenGVLab/InternVL2_5-MPO-8B"
7223
+ "model_id": "OpenGVLab/InternVL2_5-8B-MPO"
6965
7224
  },
6966
7225
  {
6967
7226
  "model_format": "awq",
@@ -6969,7 +7228,7 @@
6969
7228
  "quantizations": [
6970
7229
  "Int4"
6971
7230
  ],
6972
- "model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
7231
+ "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
6973
7232
  },
6974
7233
  {
6975
7234
  "model_format": "pytorch",
@@ -6979,7 +7238,7 @@
6979
7238
  "8-bit",
6980
7239
  "none"
6981
7240
  ],
6982
- "model_id": "OpenGVLab/InternVL2_5-MPO-26B"
7241
+ "model_id": "OpenGVLab/InternVL2_5-26B-MPO"
6983
7242
  },
6984
7243
  {
6985
7244
  "model_format": "awq",
@@ -6987,7 +7246,7 @@
6987
7246
  "quantizations": [
6988
7247
  "Int4"
6989
7248
  ],
6990
- "model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
7249
+ "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
6991
7250
  },
6992
7251
  {
6993
7252
  "model_format": "pytorch",
@@ -6997,7 +7256,7 @@
6997
7256
  "8-bit",
6998
7257
  "none"
6999
7258
  ],
7000
- "model_id": "OpenGVLab/InternVL2_5-MPO-38B"
7259
+ "model_id": "OpenGVLab/InternVL2_5-38B-MPO"
7001
7260
  },
7002
7261
  {
7003
7262
  "model_format": "awq",
@@ -7005,7 +7264,7 @@
7005
7264
  "quantizations": [
7006
7265
  "Int4"
7007
7266
  ],
7008
- "model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
7267
+ "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
7009
7268
  },
7010
7269
  {
7011
7270
  "model_format": "pytorch",
@@ -7015,7 +7274,7 @@
7015
7274
  "8-bit",
7016
7275
  "none"
7017
7276
  ],
7018
- "model_id": "OpenGVLab/InternVL2_5-MPO-78B"
7277
+ "model_id": "OpenGVLab/InternVL2_5-78B-MPO"
7019
7278
  },
7020
7279
  {
7021
7280
  "model_format": "awq",
@@ -7023,7 +7282,7 @@
7023
7282
  "quantizations": [
7024
7283
  "Int4"
7025
7284
  ],
7026
- "model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
7285
+ "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
7027
7286
  }
7028
7287
  ],
7029
7288
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -7302,7 +7561,7 @@
7302
7561
  "model_id":"Qwen/Qwen2-VL-7B-Instruct",
7303
7562
  "model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
7304
7563
  },
7305
- {
7564
+ {
7306
7565
  "model_format":"gptq",
7307
7566
  "model_size_in_billions":7,
7308
7567
  "quantizations":[
@@ -7413,6 +7672,14 @@
7413
7672
  ],
7414
7673
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
7415
7674
  },
7675
+ {
7676
+ "model_format":"pytorch",
7677
+ "model_size_in_billions":32,
7678
+ "quantizations":[
7679
+ "none"
7680
+ ],
7681
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
7682
+ },
7416
7683
  {
7417
7684
  "model_format":"pytorch",
7418
7685
  "model_size_in_billions":72,
@@ -7437,6 +7704,14 @@
7437
7704
  ],
7438
7705
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
7439
7706
  },
7707
+ {
7708
+ "model_format":"awq",
7709
+ "model_size_in_billions":32,
7710
+ "quantizations":[
7711
+ "Int4"
7712
+ ],
7713
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
7714
+ },
7440
7715
  {
7441
7716
  "model_format":"awq",
7442
7717
  "model_size_in_billions":72,
@@ -7892,7 +8167,7 @@
7892
8167
  "model_id": "mlx-community/DeepSeek-V3-{quantization}"
7893
8168
  }
7894
8169
  ],
7895
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁callbegin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|toolcalls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁ofsentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
8170
+ "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <|User|> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables. {{ tools }} {% endif %} {{ message.content }} {% if last %} <|Assistant|> {% endif %} {% elif message.role == \"assistant\" %} <|Assistant|> {% if message.tool_calls %} <|tool▁calls▁begin|> {% for tool in message.tool_calls %} <|tool▁call▁begin|> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <|tool▁call▁end|> {% endfor %} <|tool▁callsend|> {% else %} {{ message.content }} {% if not last %} <|end▁of▁sentence|> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <|tool▁outputs▁begin|> <|tool▁output▁begin|> {{ message.content }} <|tool▁output▁end|> <|tooloutputsend|> {% if last and message.role != \"assistant\" %} <|Assistant|> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <|User|> {{ prompt }} {% endif %} <|Assistant|> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
7896
8171
  "stop_token_ids": [
7897
8172
  1
7898
8173
  ],
@@ -10499,5 +10774,105 @@
10499
10774
  "stop": [
10500
10775
  "<|im_end|>"
10501
10776
  ]
10777
+ },
10778
+ {
10779
+ "version": 1,
10780
+ "context_length": 131072,
10781
+ "model_name": "fin-r1",
10782
+ "model_lang": [
10783
+ "en",
10784
+ "zh"
10785
+ ],
10786
+ "model_ability": [
10787
+ "chat"
10788
+ ],
10789
+ "model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
10790
+ "model_specs": [
10791
+ {
10792
+ "model_format": "pytorch",
10793
+ "model_size_in_billions": 7,
10794
+ "quantizations": [
10795
+ "4-bit",
10796
+ "8-bit",
10797
+ "none"
10798
+ ],
10799
+ "model_id": "SUFE-AIFLM-Lab/Fin-R1"
10800
+ },
10801
+ {
10802
+ "model_format":"gptq",
10803
+ "model_size_in_billions":7,
10804
+ "quantizations":[
10805
+ "Int4",
10806
+ "Int8"
10807
+ ],
10808
+ "model_id":"JunHowie/Fin-R1-GPTQ-{quantization}"
10809
+ },
10810
+ {
10811
+ "model_format":"fp8",
10812
+ "model_size_in_billions":7,
10813
+ "quantizations":[
10814
+ "FP8"
10815
+ ],
10816
+ "model_id":"JunHowie/Fin-R1-FP8-Dynamic"
10817
+ }
10818
+ ],
10819
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
10820
+ "stop_token_ids": [
10821
+ 151643,
10822
+ 151644,
10823
+ 151645
10824
+ ],
10825
+ "stop": [
10826
+ "<|endoftext|>",
10827
+ "<|im_start|>",
10828
+ "<|im_end|>"
10829
+ ]
10830
+ },
10831
+ {
10832
+ "version": 1,
10833
+ "context_length": 4096,
10834
+ "model_name": "deepseek-vl2",
10835
+ "model_lang": [
10836
+ "en",
10837
+ "zh"
10838
+ ],
10839
+ "model_ability": [
10840
+ "chat",
10841
+ "vision"
10842
+ ],
10843
+ "model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
10844
+ "model_specs": [
10845
+ {
10846
+ "model_format": "pytorch",
10847
+ "model_size_in_billions": 27,
10848
+ "quantizations": [
10849
+ "none"
10850
+ ],
10851
+ "model_id": "deepseek-ai/deepseek-vl2"
10852
+ },
10853
+ {
10854
+ "model_format": "pytorch",
10855
+ "model_size_in_billions": 16,
10856
+ "quantizations": [
10857
+ "none"
10858
+ ],
10859
+ "model_id": "deepseek-ai/deepseek-vl2-small"
10860
+ },
10861
+ {
10862
+ "model_format": "pytorch",
10863
+ "model_size_in_billions": 3,
10864
+ "quantizations": [
10865
+ "none"
10866
+ ],
10867
+ "model_id": "deepseek-ai/deepseek-vl2-tiny"
10868
+ }
10869
+ ],
10870
+ "chat_template": "",
10871
+ "stop_token_ids": [
10872
+ 1
10873
+ ],
10874
+ "stop": [
10875
+ "<|end▁of▁sentence|>"
10876
+ ]
10502
10877
  }
10503
10878
  ]