xinference 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (132) hide show
  1. xinference/_compat.py +1 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +54 -1
  4. xinference/client/restful/restful_client.py +82 -2
  5. xinference/constants.py +3 -0
  6. xinference/core/chat_interface.py +297 -83
  7. xinference/core/model.py +24 -3
  8. xinference/core/progress_tracker.py +16 -8
  9. xinference/core/supervisor.py +51 -1
  10. xinference/core/worker.py +315 -47
  11. xinference/deploy/cmdline.py +33 -1
  12. xinference/model/audio/core.py +11 -1
  13. xinference/model/audio/megatts.py +105 -0
  14. xinference/model/audio/model_spec.json +24 -1
  15. xinference/model/audio/model_spec_modelscope.json +26 -1
  16. xinference/model/core.py +14 -0
  17. xinference/model/embedding/core.py +6 -1
  18. xinference/model/flexible/core.py +6 -1
  19. xinference/model/image/core.py +6 -1
  20. xinference/model/image/model_spec.json +17 -1
  21. xinference/model/image/model_spec_modelscope.json +17 -1
  22. xinference/model/llm/__init__.py +4 -6
  23. xinference/model/llm/core.py +5 -0
  24. xinference/model/llm/llama_cpp/core.py +46 -17
  25. xinference/model/llm/llm_family.json +530 -85
  26. xinference/model/llm/llm_family.py +24 -1
  27. xinference/model/llm/llm_family_modelscope.json +572 -1
  28. xinference/model/llm/mlx/core.py +16 -2
  29. xinference/model/llm/reasoning_parser.py +3 -3
  30. xinference/model/llm/sglang/core.py +111 -13
  31. xinference/model/llm/transformers/__init__.py +14 -0
  32. xinference/model/llm/transformers/core.py +31 -6
  33. xinference/model/llm/transformers/deepseek_vl.py +1 -1
  34. xinference/model/llm/transformers/deepseek_vl2.py +287 -0
  35. xinference/model/llm/transformers/gemma3.py +17 -2
  36. xinference/model/llm/transformers/intern_vl.py +28 -18
  37. xinference/model/llm/transformers/minicpmv26.py +21 -2
  38. xinference/model/llm/transformers/qwen-omni.py +308 -0
  39. xinference/model/llm/transformers/qwen2_audio.py +1 -1
  40. xinference/model/llm/transformers/qwen2_vl.py +20 -4
  41. xinference/model/llm/utils.py +37 -15
  42. xinference/model/llm/vllm/core.py +184 -8
  43. xinference/model/llm/vllm/distributed_executor.py +320 -0
  44. xinference/model/rerank/core.py +22 -12
  45. xinference/model/utils.py +118 -1
  46. xinference/model/video/core.py +6 -1
  47. xinference/thirdparty/deepseek_vl2/__init__.py +31 -0
  48. xinference/thirdparty/deepseek_vl2/models/__init__.py +26 -0
  49. xinference/thirdparty/deepseek_vl2/models/configuration_deepseek.py +210 -0
  50. xinference/thirdparty/deepseek_vl2/models/conversation.py +310 -0
  51. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek.py +1975 -0
  52. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek_vl_v2.py +697 -0
  53. xinference/thirdparty/deepseek_vl2/models/processing_deepseek_vl_v2.py +675 -0
  54. xinference/thirdparty/deepseek_vl2/models/siglip_vit.py +661 -0
  55. xinference/thirdparty/deepseek_vl2/serve/__init__.py +0 -0
  56. xinference/thirdparty/deepseek_vl2/serve/app_modules/__init__.py +0 -0
  57. xinference/thirdparty/deepseek_vl2/serve/app_modules/gradio_utils.py +83 -0
  58. xinference/thirdparty/deepseek_vl2/serve/app_modules/overwrites.py +81 -0
  59. xinference/thirdparty/deepseek_vl2/serve/app_modules/presets.py +115 -0
  60. xinference/thirdparty/deepseek_vl2/serve/app_modules/utils.py +333 -0
  61. xinference/thirdparty/deepseek_vl2/serve/assets/Kelpy-Codos.js +100 -0
  62. xinference/thirdparty/deepseek_vl2/serve/assets/avatar.png +0 -0
  63. xinference/thirdparty/deepseek_vl2/serve/assets/custom.css +355 -0
  64. xinference/thirdparty/deepseek_vl2/serve/assets/custom.js +22 -0
  65. xinference/thirdparty/deepseek_vl2/serve/assets/favicon.ico +0 -0
  66. xinference/thirdparty/deepseek_vl2/serve/assets/simsun.ttc +0 -0
  67. xinference/thirdparty/deepseek_vl2/serve/inference.py +197 -0
  68. xinference/thirdparty/deepseek_vl2/utils/__init__.py +18 -0
  69. xinference/thirdparty/deepseek_vl2/utils/io.py +80 -0
  70. xinference/thirdparty/megatts3/__init__.py +0 -0
  71. xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
  72. xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
  73. xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
  74. xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
  75. xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
  76. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
  77. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
  78. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
  79. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
  80. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
  81. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
  82. xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
  83. xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
  84. xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
  85. xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
  86. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
  87. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
  88. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
  89. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
  90. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
  91. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
  92. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
  93. xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
  94. xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
  95. xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
  96. xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
  97. xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
  98. xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
  99. xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
  100. xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
  101. xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
  102. xinference/types.py +10 -0
  103. xinference/utils.py +54 -0
  104. xinference/web/ui/build/asset-manifest.json +6 -6
  105. xinference/web/ui/build/index.html +1 -1
  106. xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
  107. xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
  108. xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
  109. xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
  116. xinference/web/ui/src/locales/en.json +2 -1
  117. xinference/web/ui/src/locales/zh.json +2 -1
  118. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/METADATA +128 -115
  119. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/RECORD +124 -63
  120. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
  121. xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
  122. xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
  123. xinference/web/ui/build/static/js/main.3cea968e.js +0 -3
  124. xinference/web/ui/build/static/js/main.3cea968e.js.map +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
  129. /xinference/web/ui/build/static/js/{main.3cea968e.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
  130. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
  131. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
  132. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
@@ -7289,6 +7289,148 @@
7289
7289
  "stop_token_ids": [],
7290
7290
  "stop": []
7291
7291
  },
7292
+ {
7293
+ "version": 1,
7294
+ "context_length": 8192,
7295
+ "model_name": "InternVL3",
7296
+ "model_lang": [
7297
+ "en",
7298
+ "zh"
7299
+ ],
7300
+ "model_ability": [
7301
+ "chat",
7302
+ "vision"
7303
+ ],
7304
+ "model_description": "InternVL3, an advanced multimodal large language model (MLLM) series that demonstrates superior overall performance.",
7305
+ "model_specs": [
7306
+ {
7307
+ "model_format": "pytorch",
7308
+ "model_size_in_billions": 1,
7309
+ "quantizations": [
7310
+ "8-bit",
7311
+ "none"
7312
+ ],
7313
+ "model_id": "OpenGVLab/InternVL3-1B"
7314
+ },
7315
+ {
7316
+ "model_format": "awq",
7317
+ "model_size_in_billions": 1,
7318
+ "quantizations": [
7319
+ "Int4"
7320
+ ],
7321
+ "model_id": "OpenGVLab/InternVL3-1B-AWQ"
7322
+ },
7323
+ {
7324
+ "model_format": "pytorch",
7325
+ "model_size_in_billions": 2,
7326
+ "quantizations": [
7327
+ "8-bit",
7328
+ "none"
7329
+ ],
7330
+ "model_id": "OpenGVLab/InternVL3-2B"
7331
+ },
7332
+ {
7333
+ "model_format": "awq",
7334
+ "model_size_in_billions": 2,
7335
+ "quantizations": [
7336
+ "Int4"
7337
+ ],
7338
+ "model_id": "OpenGVLab/InternVL3-2B-AWQ"
7339
+ },
7340
+ {
7341
+ "model_format": "pytorch",
7342
+ "model_size_in_billions": 8,
7343
+ "quantizations": [
7344
+ "8-bit",
7345
+ "none"
7346
+ ],
7347
+ "model_id": "OpenGVLab/InternVL3-8B"
7348
+ },
7349
+ {
7350
+ "model_format": "awq",
7351
+ "model_size_in_billions": 8,
7352
+ "quantizations": [
7353
+ "Int4"
7354
+ ],
7355
+ "model_id": "OpenGVLab/InternVL3-8B-AWQ"
7356
+ },
7357
+ {
7358
+ "model_format": "pytorch",
7359
+ "model_size_in_billions": 9,
7360
+ "quantizations": [
7361
+ "8-bit",
7362
+ "none"
7363
+ ],
7364
+ "model_id": "OpenGVLab/InternVL3-9B"
7365
+ },
7366
+ {
7367
+ "model_format": "awq",
7368
+ "model_size_in_billions": 9,
7369
+ "quantizations": [
7370
+ "Int4"
7371
+ ],
7372
+ "model_id": "OpenGVLab/InternVL3-9B-AWQ"
7373
+ },
7374
+ {
7375
+ "model_format": "pytorch",
7376
+ "model_size_in_billions": 14,
7377
+ "quantizations": [
7378
+ "8-bit",
7379
+ "none"
7380
+ ],
7381
+ "model_id": "OpenGVLab/InternVL3-14B"
7382
+ },
7383
+ {
7384
+ "model_format": "awq",
7385
+ "model_size_in_billions": 14,
7386
+ "quantizations": [
7387
+ "Int4"
7388
+ ],
7389
+ "model_id": "OpenGVLab/InternVL3-14B-AWQ"
7390
+ },
7391
+ {
7392
+ "model_format": "pytorch",
7393
+ "model_size_in_billions": 38,
7394
+ "quantizations": [
7395
+ "8-bit",
7396
+ "none"
7397
+ ],
7398
+ "model_id": "OpenGVLab/InternVL3-38B"
7399
+ },
7400
+ {
7401
+ "model_format": "awq",
7402
+ "model_size_in_billions": 38,
7403
+ "quantizations": [
7404
+ "Int4"
7405
+ ],
7406
+ "model_id": "OpenGVLab/InternVL3-38B-AWQ"
7407
+ },
7408
+ {
7409
+ "model_format": "pytorch",
7410
+ "model_size_in_billions": 78,
7411
+ "quantizations": [
7412
+ "8-bit",
7413
+ "none"
7414
+ ],
7415
+ "model_id": "OpenGVLab/InternVL3-78B"
7416
+ },
7417
+ {
7418
+ "model_format": "awq",
7419
+ "model_size_in_billions": 78,
7420
+ "quantizations": [
7421
+ "Int4"
7422
+ ],
7423
+ "model_id": "OpenGVLab/InternVL3-78B-AWQ"
7424
+ }
7425
+ ],
7426
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7427
+ "stop_token_ids": [
7428
+ 151645
7429
+ ],
7430
+ "stop": [
7431
+ "<|im_end|>"
7432
+ ]
7433
+ },
7292
7434
  {
7293
7435
  "version": 1,
7294
7436
  "context_length": 8192,
@@ -7561,7 +7703,7 @@
7561
7703
  "model_id":"Qwen/Qwen2-VL-7B-Instruct",
7562
7704
  "model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
7563
7705
  },
7564
- {
7706
+ {
7565
7707
  "model_format":"gptq",
7566
7708
  "model_size_in_billions":7,
7567
7709
  "quantizations":[
@@ -7672,6 +7814,14 @@
7672
7814
  ],
7673
7815
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
7674
7816
  },
7817
+ {
7818
+ "model_format":"pytorch",
7819
+ "model_size_in_billions":32,
7820
+ "quantizations":[
7821
+ "none"
7822
+ ],
7823
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
7824
+ },
7675
7825
  {
7676
7826
  "model_format":"pytorch",
7677
7827
  "model_size_in_billions":72,
@@ -7696,6 +7846,14 @@
7696
7846
  ],
7697
7847
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
7698
7848
  },
7849
+ {
7850
+ "model_format":"awq",
7851
+ "model_size_in_billions":32,
7852
+ "quantizations":[
7853
+ "Int4"
7854
+ ],
7855
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
7856
+ },
7699
7857
  {
7700
7858
  "model_format":"awq",
7701
7859
  "model_size_in_billions":72,
@@ -7751,6 +7909,47 @@
7751
7909
  "<|endoftext|>"
7752
7910
  ]
7753
7911
  },
7912
+ {
7913
+ "version":1,
7914
+ "context_length":32768,
7915
+ "model_name":"qwen2.5-omni",
7916
+ "model_lang":[
7917
+ "en",
7918
+ "zh"
7919
+ ],
7920
+ "model_ability":[
7921
+ "chat",
7922
+ "vision",
7923
+ "audio",
7924
+ "omni"
7925
+ ],
7926
+ "model_description":"Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
7927
+ "model_specs":[
7928
+ {
7929
+ "model_format":"pytorch",
7930
+ "model_size_in_billions":7,
7931
+ "quantizations":[
7932
+ "none"
7933
+ ],
7934
+ "model_id":"Qwen/Qwen2.5-Omni-7B"
7935
+ }
7936
+ ],
7937
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
7938
+ "stop_token_ids": [
7939
+ 151645,
7940
+ 151643
7941
+ ],
7942
+ "stop": [
7943
+ "<|im_end|>",
7944
+ "<|endoftext|>"
7945
+ ],
7946
+ "virtualenv": {
7947
+ "packages": [
7948
+ "git+https://github.com/huggingface/transformers@v4.51.3-Qwen2.5-Omni-preview",
7949
+ "numpy==1.26.4"
7950
+ ]
7951
+ }
7952
+ },
7754
7953
  {
7755
7954
  "version": 1,
7756
7955
  "context_length": 32768,
@@ -9755,9 +9954,6 @@
9755
9954
  "model_size_in_billions": 32,
9756
9955
  "quantizations": [
9757
9956
  "fp16",
9758
- "q2_k",
9759
- "q3_k_m",
9760
- "q4_0",
9761
9957
  "q4_k_m",
9762
9958
  "q5_0",
9763
9959
  "q5_k_m",
@@ -9766,90 +9962,28 @@
9766
9962
  ],
9767
9963
  "quantization_parts": {
9768
9964
  "fp16": [
9769
- "00001-of-000017",
9770
- "00002-of-000017",
9771
- "00003-of-000017",
9772
- "00004-of-000017",
9773
- "00005-of-000017",
9774
- "00006-of-000017",
9775
- "00007-of-000017",
9776
- "00008-of-000017",
9777
- "00009-of-000017",
9778
- "00010-of-000017",
9779
- "00011-of-000017",
9780
- "00012-of-000017",
9781
- "00013-of-000017",
9782
- "00014-of-000017",
9783
- "00015-of-000017",
9784
- "00016-of-000017",
9785
- "00017-of-000017"
9786
- ],
9787
- "q2_k": [
9788
- "00001-of-00004",
9789
- "00002-of-00004",
9790
- "00003-of-00004",
9791
- "00004-of-00004"
9792
- ],
9793
- "q3_k_m": [
9794
- "00001-of-00005",
9795
- "00002-of-00005",
9796
- "00003-of-00005",
9797
- "00004-of-00005",
9798
- "00005-of-00005"
9799
- ],
9800
- "q4_0": [
9801
- "00001-of-00005",
9802
- "00002-of-00005",
9803
- "00003-of-00005",
9804
- "00004-of-00005",
9805
- "00005-of-00005"
9806
- ],
9807
- "q4_k_m": [
9808
- "00001-of-00005",
9809
- "00002-of-00005",
9810
- "00003-of-00005",
9811
- "00004-of-00005",
9812
- "00005-of-00005"
9813
- ],
9814
- "q5_0": [
9815
- "00001-of-00006",
9816
- "00002-of-00006",
9817
- "00003-of-00006",
9818
- "00004-of-00006",
9819
- "00005-of-00006",
9820
- "00006-of-00006"
9821
- ],
9822
- "q5_k_m": [
9823
- "00001-of-00006",
9824
- "00002-of-00006",
9825
- "00003-of-00006",
9826
- "00004-of-00006",
9827
- "00005-of-00006",
9828
- "00006-of-00006"
9829
- ],
9830
- "q6_k": [
9831
- "00001-of-00007",
9832
- "00002-of-00007",
9833
- "00003-of-00007",
9834
- "00004-of-00007",
9835
- "00005-of-00007",
9836
- "00006-of-00007",
9837
- "00007-of-00007"
9838
- ],
9839
- "q8_0": [
9840
- "00001-of-00009",
9841
- "00002-of-00009",
9842
- "00003-of-00009",
9843
- "00004-of-00009",
9844
- "00005-of-00009",
9845
- "00006-of-00009",
9846
- "00007-of-00009",
9847
- "00008-of-00009",
9848
- "00009-of-00009"
9965
+ "00001-of-00017",
9966
+ "00002-of-00017",
9967
+ "00003-of-00017",
9968
+ "00004-of-00017",
9969
+ "00005-of-00017",
9970
+ "00006-of-00017",
9971
+ "00007-of-00017",
9972
+ "00008-of-00017",
9973
+ "00009-of-00017",
9974
+ "00010-of-00017",
9975
+ "00011-of-00017",
9976
+ "00012-of-00017",
9977
+ "00013-of-00017",
9978
+ "00014-of-00017",
9979
+ "00015-of-00017",
9980
+ "00016-of-00017",
9981
+ "00017-of-00017"
9849
9982
  ]
9850
9983
  },
9851
9984
  "model_id": "Qwen/QwQ-32B-GGUF",
9852
- "model_file_name_template": "qwq-32b-{quantization}.gguf"
9985
+ "model_file_name_template": "qwq-32b-{quantization}.gguf",
9986
+ "model_file_name_split_template": "fp16/qwq-32b-{quantization}-{part}.gguf"
9853
9987
  }
9854
9988
  ],
9855
9989
  "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
@@ -10758,5 +10892,316 @@
10758
10892
  "stop": [
10759
10893
  "<|im_end|>"
10760
10894
  ]
10895
+ },
10896
+ {
10897
+ "version": 1,
10898
+ "context_length": 131072,
10899
+ "model_name": "fin-r1",
10900
+ "model_lang": [
10901
+ "en",
10902
+ "zh"
10903
+ ],
10904
+ "model_ability": [
10905
+ "chat"
10906
+ ],
10907
+ "model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
10908
+ "model_specs": [
10909
+ {
10910
+ "model_format": "pytorch",
10911
+ "model_size_in_billions": 7,
10912
+ "quantizations": [
10913
+ "4-bit",
10914
+ "8-bit",
10915
+ "none"
10916
+ ],
10917
+ "model_id": "SUFE-AIFLM-Lab/Fin-R1"
10918
+ },
10919
+ {
10920
+ "model_format":"gptq",
10921
+ "model_size_in_billions":7,
10922
+ "quantizations":[
10923
+ "Int4",
10924
+ "Int8"
10925
+ ],
10926
+ "model_id":"JunHowie/Fin-R1-GPTQ-{quantization}"
10927
+ },
10928
+ {
10929
+ "model_format":"fp8",
10930
+ "model_size_in_billions":7,
10931
+ "quantizations":[
10932
+ "FP8"
10933
+ ],
10934
+ "model_id":"JunHowie/Fin-R1-FP8-Dynamic"
10935
+ }
10936
+ ],
10937
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
10938
+ "stop_token_ids": [
10939
+ 151643,
10940
+ 151644,
10941
+ 151645
10942
+ ],
10943
+ "stop": [
10944
+ "<|endoftext|>",
10945
+ "<|im_start|>",
10946
+ "<|im_end|>"
10947
+ ]
10948
+ },
10949
+ {
10950
+ "version": 1,
10951
+ "context_length": 4096,
10952
+ "model_name": "deepseek-vl2",
10953
+ "model_lang": [
10954
+ "en",
10955
+ "zh"
10956
+ ],
10957
+ "model_ability": [
10958
+ "chat",
10959
+ "vision"
10960
+ ],
10961
+ "model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
10962
+ "model_specs": [
10963
+ {
10964
+ "model_format": "pytorch",
10965
+ "model_size_in_billions": 27,
10966
+ "quantizations": [
10967
+ "none"
10968
+ ],
10969
+ "model_id": "deepseek-ai/deepseek-vl2"
10970
+ },
10971
+ {
10972
+ "model_format": "pytorch",
10973
+ "model_size_in_billions": 16,
10974
+ "quantizations": [
10975
+ "none"
10976
+ ],
10977
+ "model_id": "deepseek-ai/deepseek-vl2-small"
10978
+ },
10979
+ {
10980
+ "model_format": "pytorch",
10981
+ "model_size_in_billions": 3,
10982
+ "quantizations": [
10983
+ "none"
10984
+ ],
10985
+ "model_id": "deepseek-ai/deepseek-vl2-tiny"
10986
+ }
10987
+ ],
10988
+ "chat_template": "",
10989
+ "stop_token_ids": [
10990
+ 1
10991
+ ],
10992
+ "stop": [
10993
+ "<|end▁of▁sentence|>"
10994
+ ]
10995
+ },
10996
+ {
10997
+ "version": 1,
10998
+ "context_length": 32768,
10999
+ "model_name": "seallms-v3",
11000
+ "model_lang": [
11001
+ "en",
11002
+ "zh",
11003
+ "id",
11004
+ "vi",
11005
+ "th",
11006
+ "ph",
11007
+ "ms",
11008
+ "mm",
11009
+ "kh",
11010
+ "la",
11011
+ "in"
11012
+ ],
11013
+ "model_ability": [
11014
+ "chat"
11015
+ ],
11016
+ "model_description": "SeaLLMs - Large Language Models for Southeast Asia",
11017
+ "model_specs": [
11018
+ {
11019
+ "model_format": "pytorch",
11020
+ "model_size_in_billions": "1_5",
11021
+ "quantizations": [
11022
+ "none"
11023
+ ],
11024
+ "model_id": "SeaLLMs/SeaLLMs-v3-1.5B-Chat"
11025
+ },
11026
+ {
11027
+ "model_format": "pytorch",
11028
+ "model_size_in_billions": 7,
11029
+ "quantizations": [
11030
+ "none"
11031
+ ],
11032
+ "model_id": "SeaLLMs/SeaLLMs-v3-7B-Chat"
11033
+ }
11034
+ ],
11035
+ "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
11036
+ "stop_token_ids": [
11037
+ 151643,
11038
+ 151644,
11039
+ 151645
11040
+ ],
11041
+ "stop": [
11042
+ "<|endoftext|>",
11043
+ "<|im_start|>",
11044
+ "<|im_end|>"
11045
+ ]
11046
+ },
11047
+ {
11048
+ "version": 1,
11049
+ "context_length": 32768,
11050
+ "model_name": "glm4-0414",
11051
+ "model_lang": [
11052
+ "en",
11053
+ "zh"
11054
+ ],
11055
+ "model_ability": [
11056
+ "chat",
11057
+ "tools"
11058
+ ],
11059
+ "model_description": "The GLM family welcomes new members, the GLM-4-32B-0414 series models, featuring 32 billion parameters. Its performance is comparable to OpenAI’s GPT series and DeepSeek’s V3/R1 series",
11060
+ "model_specs": [
11061
+ {
11062
+ "model_format": "pytorch",
11063
+ "model_size_in_billions": 9,
11064
+ "quantizations": [
11065
+ "none"
11066
+ ],
11067
+ "model_id": "THUDM/GLM-4-9B-0414"
11068
+ },
11069
+ {
11070
+ "model_format": "pytorch",
11071
+ "model_size_in_billions": 32,
11072
+ "quantizations": [
11073
+ "none"
11074
+ ],
11075
+ "model_id": "THUDM/GLM-4-32B-0414"
11076
+ }
11077
+ ],
11078
+ "chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
11079
+ "stop_token_ids": [
11080
+ 151329,
11081
+ 151336,
11082
+ 151338
11083
+ ],
11084
+ "stop": [
11085
+ "<|endoftext|>",
11086
+ "<|user|>",
11087
+ "<|observation|>"
11088
+ ]
11089
+ },
11090
+ {
11091
+ "version": 1,
11092
+ "context_length": 32768,
11093
+ "model_name": "skywork-or1-preview",
11094
+ "model_lang": [
11095
+ "en",
11096
+ "zh"
11097
+ ],
11098
+ "model_ability": [
11099
+ "chat"
11100
+ ],
11101
+ "model_description": "The Skywork-OR1 (Open Reasoner 1) model series consists of powerful math and code reasoning models trained using large-scale rule-based reinforcement learning with carefully designed datasets and training recipes.",
11102
+ "model_specs": [
11103
+ {
11104
+ "model_format": "pytorch",
11105
+ "model_size_in_billions": 32,
11106
+ "quantizations": [
11107
+ "none"
11108
+ ],
11109
+ "model_id": "Skywork/Skywork-OR1-32B-Preview"
11110
+ },
11111
+ {
11112
+ "model_format": "gptq",
11113
+ "model_size_in_billions": 32,
11114
+ "quantizations": [
11115
+ "Int4",
11116
+ "int8"
11117
+ ],
11118
+ "model_id": "JunHowie/Skywork-OR1-32B-Preview-GPTQ-{quantization}"
11119
+ },
11120
+ {
11121
+ "model_format": "pytorch",
11122
+ "model_size_in_billions": 7,
11123
+ "quantizations": [
11124
+ "none"
11125
+ ],
11126
+ "model_id": "Skywork/Skywork-OR1-7B-Preview"
11127
+ },
11128
+ {
11129
+ "model_format": "ggufv2",
11130
+ "model_size_in_billions": 32,
11131
+ "quantizations": [
11132
+ "IQ2_M",
11133
+ "IQ2_S",
11134
+ "IQ2_XS",
11135
+ "IQ3_M",
11136
+ "IQ3_XS",
11137
+ "IQ3_XXS",
11138
+ "IQ4_NL",
11139
+ "IQ4_XS",
11140
+ "Q2_K",
11141
+ "Q2_K_L",
11142
+ "Q3_K_L",
11143
+ "Q3_K_M",
11144
+ "Q3_K_S",
11145
+ "Q3_K_XL",
11146
+ "Q4_0",
11147
+ "Q4_1",
11148
+ "Q4_K_L",
11149
+ "Q4_K_M",
11150
+ "Q4_K_S",
11151
+ "Q5_K_L",
11152
+ "Q5_K_M",
11153
+ "Q5_K_S",
11154
+ "Q6_K",
11155
+ "Q6_K_L",
11156
+ "Q8_0"
11157
+ ],
11158
+ "model_id": "bartowski/Skywork_Skywork-OR1-32B-Preview-GGUF",
11159
+ "model_file_name_template": "Skywork_Skywork-OR1-32B-Preview-{quantization}.gguf"
11160
+ },
11161
+ {
11162
+ "model_format": "ggufv2",
11163
+ "model_size_in_billions": 7,
11164
+ "quantizations": [
11165
+ "IQ2_M",
11166
+ "IQ2_S",
11167
+ "IQ2_XS",
11168
+ "IQ3_M",
11169
+ "IQ3_XS",
11170
+ "IQ3_XXS",
11171
+ "IQ4_NL",
11172
+ "IQ4_XS",
11173
+ "Q2_K",
11174
+ "Q2_K_L",
11175
+ "Q3_K_L",
11176
+ "Q3_K_M",
11177
+ "Q3_K_S",
11178
+ "Q3_K_XL",
11179
+ "Q4_0",
11180
+ "Q4_1",
11181
+ "Q4_K_L",
11182
+ "Q4_K_M",
11183
+ "Q4_K_S",
11184
+ "Q5_K_L",
11185
+ "Q5_K_M",
11186
+ "Q5_K_S",
11187
+ "Q6_K",
11188
+ "Q6_K_L",
11189
+ "Q8_0"
11190
+ ],
11191
+ "model_id": "bartowski/Skywork_Skywork-OR1-7B-Preview-GGUF",
11192
+ "model_file_name_template": "Skywork_Skywork-OR1-7B-Preview-{quantization}.gguf"
11193
+ }
11194
+ ],
11195
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
11196
+ "stop_token_ids": [
11197
+ 151643,
11198
+ 151644,
11199
+ 151645
11200
+ ],
11201
+ "stop": [
11202
+ "<|endoftext|>",
11203
+ "<|im_start|>",
11204
+ "<|im_end|>"
11205
+ ]
10761
11206
  }
10762
11207
  ]