xinference 1.3.0.post2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +1 -0
- xinference/conftest.py +7 -0
- xinference/core/model.py +3 -1
- xinference/core/scheduler.py +3 -0
- xinference/core/worker.py +1 -1
- xinference/model/embedding/core.py +12 -5
- xinference/model/llm/__init__.py +2 -1
- xinference/model/llm/core.py +13 -0
- xinference/model/llm/llama_cpp/core.py +260 -3
- xinference/model/llm/llm_family.json +306 -17
- xinference/model/llm/llm_family_modelscope.json +347 -28
- xinference/model/llm/mlx/core.py +15 -4
- xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +1 -1
- xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +4 -5
- xinference/model/llm/sglang/core.py +7 -2
- xinference/model/llm/transformers/chatglm.py +4 -4
- xinference/model/llm/transformers/core.py +22 -5
- xinference/model/llm/transformers/intern_vl.py +2 -1
- xinference/model/llm/transformers/utils.py +1 -1
- xinference/model/llm/utils.py +103 -67
- xinference/model/llm/vllm/core.py +29 -42
- xinference/types.py +4 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.55b70cb7.js +3 -0
- xinference/web/ui/build/static/js/main.55b70cb7.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2deac8d5636974533e3714f34e94fc754f9153a07c6ee11e72846cb8eae47e4b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/87a9b13f2466f375ae5c6e7c08b279cc38351d29710d7f7626bbb07a85262b79.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e23d476fcbf6fd69c8986bf82133d257d28aa8fc9a5cab231d81c1c75c58cd99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e7a8c37fda8725cab69c7ef8c627060bd7fc806adc67e00fe628ba148cb86d7f.json +1 -0
- xinference/web/ui/src/locales/en.json +9 -1
- xinference/web/ui/src/locales/zh.json +9 -1
- {xinference-1.3.0.post2.dist-info → xinference-1.3.1.dist-info}/METADATA +7 -3
- {xinference-1.3.0.post2.dist-info → xinference-1.3.1.dist-info}/RECORD +43 -42
- xinference/web/ui/build/static/js/main.ad42919c.js +0 -3
- xinference/web/ui/build/static/js/main.ad42919c.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +0 -1
- /xinference/web/ui/build/static/js/{main.ad42919c.js.LICENSE.txt → main.55b70cb7.js.LICENSE.txt} +0 -0
- {xinference-1.3.0.post2.dist-info → xinference-1.3.1.dist-info}/LICENSE +0 -0
- {xinference-1.3.0.post2.dist-info → xinference-1.3.1.dist-info}/WHEEL +0 -0
- {xinference-1.3.0.post2.dist-info → xinference-1.3.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.3.0.post2.dist-info → xinference-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -6796,14 +6796,6 @@
|
|
|
6796
6796
|
],
|
|
6797
6797
|
"model_id": "OpenGVLab/InternVL2_5-1B"
|
|
6798
6798
|
},
|
|
6799
|
-
{
|
|
6800
|
-
"model_format": "awq",
|
|
6801
|
-
"model_size_in_billions": 1,
|
|
6802
|
-
"quantizations": [
|
|
6803
|
-
"Int4"
|
|
6804
|
-
],
|
|
6805
|
-
"model_id": "OpenGVLab/InternVL2_5-1B-AWQ"
|
|
6806
|
-
},
|
|
6807
6799
|
{
|
|
6808
6800
|
"model_format": "pytorch",
|
|
6809
6801
|
"model_size_in_billions": 2,
|
|
@@ -6814,14 +6806,6 @@
|
|
|
6814
6806
|
],
|
|
6815
6807
|
"model_id": "OpenGVLab/InternVL2_5-2B"
|
|
6816
6808
|
},
|
|
6817
|
-
{
|
|
6818
|
-
"model_format": "awq",
|
|
6819
|
-
"model_size_in_billions": 2,
|
|
6820
|
-
"quantizations": [
|
|
6821
|
-
"Int4"
|
|
6822
|
-
],
|
|
6823
|
-
"model_id": "OpenGVLab/InternVL2_5-2B-AWQ"
|
|
6824
|
-
},
|
|
6825
6809
|
{
|
|
6826
6810
|
"model_format": "pytorch",
|
|
6827
6811
|
"model_size_in_billions": 4,
|
|
@@ -6917,6 +6901,135 @@
|
|
|
6917
6901
|
"stop_token_ids": [],
|
|
6918
6902
|
"stop": []
|
|
6919
6903
|
},
|
|
6904
|
+
{
|
|
6905
|
+
"version": 1,
|
|
6906
|
+
"context_length": 16384,
|
|
6907
|
+
"model_name": "InternVL2.5-MPO",
|
|
6908
|
+
"model_lang": [
|
|
6909
|
+
"en",
|
|
6910
|
+
"zh"
|
|
6911
|
+
],
|
|
6912
|
+
"model_ability": [
|
|
6913
|
+
"chat",
|
|
6914
|
+
"vision"
|
|
6915
|
+
],
|
|
6916
|
+
"model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
6917
|
+
"model_specs": [
|
|
6918
|
+
{
|
|
6919
|
+
"model_format": "pytorch",
|
|
6920
|
+
"model_size_in_billions": 1,
|
|
6921
|
+
"quantizations": [
|
|
6922
|
+
"4-bit",
|
|
6923
|
+
"8-bit",
|
|
6924
|
+
"none"
|
|
6925
|
+
],
|
|
6926
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-1B"
|
|
6927
|
+
},
|
|
6928
|
+
{
|
|
6929
|
+
"model_format": "pytorch",
|
|
6930
|
+
"model_size_in_billions": 2,
|
|
6931
|
+
"quantizations": [
|
|
6932
|
+
"4-bit",
|
|
6933
|
+
"8-bit",
|
|
6934
|
+
"none"
|
|
6935
|
+
],
|
|
6936
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-2B"
|
|
6937
|
+
},
|
|
6938
|
+
{
|
|
6939
|
+
"model_format": "pytorch",
|
|
6940
|
+
"model_size_in_billions": 4,
|
|
6941
|
+
"quantizations": [
|
|
6942
|
+
"4-bit",
|
|
6943
|
+
"8-bit",
|
|
6944
|
+
"none"
|
|
6945
|
+
],
|
|
6946
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-4B"
|
|
6947
|
+
},
|
|
6948
|
+
{
|
|
6949
|
+
"model_format": "awq",
|
|
6950
|
+
"model_size_in_billions": 4,
|
|
6951
|
+
"quantizations": [
|
|
6952
|
+
"Int4"
|
|
6953
|
+
],
|
|
6954
|
+
"model_id": "OpenGVLab/InternVL2_5-4B-MPO-AWQ"
|
|
6955
|
+
},
|
|
6956
|
+
{
|
|
6957
|
+
"model_format": "pytorch",
|
|
6958
|
+
"model_size_in_billions": 8,
|
|
6959
|
+
"quantizations": [
|
|
6960
|
+
"4-bit",
|
|
6961
|
+
"8-bit",
|
|
6962
|
+
"none"
|
|
6963
|
+
],
|
|
6964
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-8B"
|
|
6965
|
+
},
|
|
6966
|
+
{
|
|
6967
|
+
"model_format": "awq",
|
|
6968
|
+
"model_size_in_billions": 8,
|
|
6969
|
+
"quantizations": [
|
|
6970
|
+
"Int4"
|
|
6971
|
+
],
|
|
6972
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
|
|
6973
|
+
},
|
|
6974
|
+
{
|
|
6975
|
+
"model_format": "pytorch",
|
|
6976
|
+
"model_size_in_billions": 26,
|
|
6977
|
+
"quantizations": [
|
|
6978
|
+
"4-bit",
|
|
6979
|
+
"8-bit",
|
|
6980
|
+
"none"
|
|
6981
|
+
],
|
|
6982
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-26B"
|
|
6983
|
+
},
|
|
6984
|
+
{
|
|
6985
|
+
"model_format": "awq",
|
|
6986
|
+
"model_size_in_billions": 26,
|
|
6987
|
+
"quantizations": [
|
|
6988
|
+
"Int4"
|
|
6989
|
+
],
|
|
6990
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
|
|
6991
|
+
},
|
|
6992
|
+
{
|
|
6993
|
+
"model_format": "pytorch",
|
|
6994
|
+
"model_size_in_billions": 38,
|
|
6995
|
+
"quantizations": [
|
|
6996
|
+
"4-bit",
|
|
6997
|
+
"8-bit",
|
|
6998
|
+
"none"
|
|
6999
|
+
],
|
|
7000
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-38B"
|
|
7001
|
+
},
|
|
7002
|
+
{
|
|
7003
|
+
"model_format": "awq",
|
|
7004
|
+
"model_size_in_billions": 38,
|
|
7005
|
+
"quantizations": [
|
|
7006
|
+
"Int4"
|
|
7007
|
+
],
|
|
7008
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
|
|
7009
|
+
},
|
|
7010
|
+
{
|
|
7011
|
+
"model_format": "pytorch",
|
|
7012
|
+
"model_size_in_billions": 78,
|
|
7013
|
+
"quantizations": [
|
|
7014
|
+
"4-bit",
|
|
7015
|
+
"8-bit",
|
|
7016
|
+
"none"
|
|
7017
|
+
],
|
|
7018
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-78B"
|
|
7019
|
+
},
|
|
7020
|
+
{
|
|
7021
|
+
"model_format": "awq",
|
|
7022
|
+
"model_size_in_billions": 78,
|
|
7023
|
+
"quantizations": [
|
|
7024
|
+
"Int4"
|
|
7025
|
+
],
|
|
7026
|
+
"model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
|
|
7027
|
+
}
|
|
7028
|
+
],
|
|
7029
|
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
7030
|
+
"stop_token_ids": [],
|
|
7031
|
+
"stop": []
|
|
7032
|
+
},
|
|
6920
7033
|
{
|
|
6921
7034
|
"version": 1,
|
|
6922
7035
|
"context_length": 8192,
|
|
@@ -7308,6 +7421,30 @@
|
|
|
7308
7421
|
],
|
|
7309
7422
|
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
|
|
7310
7423
|
},
|
|
7424
|
+
{
|
|
7425
|
+
"model_format":"awq",
|
|
7426
|
+
"model_size_in_billions":3,
|
|
7427
|
+
"quantizations":[
|
|
7428
|
+
"Int4"
|
|
7429
|
+
],
|
|
7430
|
+
"model_id":"Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
|
|
7431
|
+
},
|
|
7432
|
+
{
|
|
7433
|
+
"model_format":"awq",
|
|
7434
|
+
"model_size_in_billions":7,
|
|
7435
|
+
"quantizations":[
|
|
7436
|
+
"Int4"
|
|
7437
|
+
],
|
|
7438
|
+
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
|
|
7439
|
+
},
|
|
7440
|
+
{
|
|
7441
|
+
"model_format":"awq",
|
|
7442
|
+
"model_size_in_billions":72,
|
|
7443
|
+
"quantizations":[
|
|
7444
|
+
"Int4"
|
|
7445
|
+
],
|
|
7446
|
+
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct-AWQ"
|
|
7447
|
+
},
|
|
7311
7448
|
{
|
|
7312
7449
|
"model_format":"mlx",
|
|
7313
7450
|
"model_size_in_billions":3,
|
|
@@ -7380,7 +7517,7 @@
|
|
|
7380
7517
|
"model_format": "gptq",
|
|
7381
7518
|
"model_size_in_billions": 4,
|
|
7382
7519
|
"quantizations": [
|
|
7383
|
-
"
|
|
7520
|
+
"Int4"
|
|
7384
7521
|
],
|
|
7385
7522
|
"model_id": "openbmb/MiniCPM3-4B-GPTQ-Int4",
|
|
7386
7523
|
"model_revision": "97a66a62f7d09c1ee35b087b42694716a8113dce"
|
|
@@ -9310,6 +9447,82 @@
|
|
|
9310
9447
|
"<|im_end|>"
|
|
9311
9448
|
]
|
|
9312
9449
|
},
|
|
9450
|
+
{
|
|
9451
|
+
"version": 1,
|
|
9452
|
+
"context_length": 32768,
|
|
9453
|
+
"model_name": "QwQ-32B",
|
|
9454
|
+
"model_lang": [
|
|
9455
|
+
"en",
|
|
9456
|
+
"zh"
|
|
9457
|
+
],
|
|
9458
|
+
"model_ability": [
|
|
9459
|
+
"chat",
|
|
9460
|
+
"reasoning"
|
|
9461
|
+
],
|
|
9462
|
+
"model_description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
|
|
9463
|
+
"model_specs": [
|
|
9464
|
+
{
|
|
9465
|
+
"model_format": "pytorch",
|
|
9466
|
+
"model_size_in_billions": 32,
|
|
9467
|
+
"quantizations": [
|
|
9468
|
+
"4-bit",
|
|
9469
|
+
"8-bit",
|
|
9470
|
+
"none"
|
|
9471
|
+
],
|
|
9472
|
+
"model_id": "Qwen/QwQ-32B"
|
|
9473
|
+
},
|
|
9474
|
+
{
|
|
9475
|
+
"model_format": "awq",
|
|
9476
|
+
"model_size_in_billions": 32,
|
|
9477
|
+
"quantizations": [
|
|
9478
|
+
"Int4"
|
|
9479
|
+
],
|
|
9480
|
+
"model_id": "Qwen/QwQ-32B-AWQ"
|
|
9481
|
+
},
|
|
9482
|
+
{
|
|
9483
|
+
"model_format": "mlx",
|
|
9484
|
+
"model_size_in_billions": 32,
|
|
9485
|
+
"quantizations": [
|
|
9486
|
+
"3bit",
|
|
9487
|
+
"4bit",
|
|
9488
|
+
"6bit",
|
|
9489
|
+
"8bit",
|
|
9490
|
+
"bf16"
|
|
9491
|
+
],
|
|
9492
|
+
"model_id": "mlx-community/QwQ-32B-{quantization}"
|
|
9493
|
+
},
|
|
9494
|
+
{
|
|
9495
|
+
"model_format": "ggufv2",
|
|
9496
|
+
"model_size_in_billions": 32,
|
|
9497
|
+
"quantizations": [
|
|
9498
|
+
"fp16",
|
|
9499
|
+
"Q2_k",
|
|
9500
|
+
"Q3_K_M",
|
|
9501
|
+
"Q4_0",
|
|
9502
|
+
"Q4_K_M",
|
|
9503
|
+
"Q5_0",
|
|
9504
|
+
"Q5_K_M",
|
|
9505
|
+
"Q6_K",
|
|
9506
|
+
"Q8_0"
|
|
9507
|
+
],
|
|
9508
|
+
"model_id": "Qwen/QwQ-32B-GGUF",
|
|
9509
|
+
"model_file_name_template": "qwq-32b-{quantization}.gguf"
|
|
9510
|
+
}
|
|
9511
|
+
],
|
|
9512
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
|
|
9513
|
+
"stop_token_ids": [
|
|
9514
|
+
151643,
|
|
9515
|
+
151644,
|
|
9516
|
+
151645
|
|
9517
|
+
],
|
|
9518
|
+
"stop": [
|
|
9519
|
+
"<|endoftext|>",
|
|
9520
|
+
"<|im_start|>",
|
|
9521
|
+
"<|im_end|>"
|
|
9522
|
+
],
|
|
9523
|
+
"reasoning_start_tag": "<think>",
|
|
9524
|
+
"reasoning_end_tag": "</think>"
|
|
9525
|
+
},
|
|
9313
9526
|
{
|
|
9314
9527
|
"version": 1,
|
|
9315
9528
|
"context_length": 131072,
|
|
@@ -10126,5 +10339,81 @@
|
|
|
10126
10339
|
"</s>",
|
|
10127
10340
|
"<|im_end|>"
|
|
10128
10341
|
]
|
|
10342
|
+
},
|
|
10343
|
+
{
|
|
10344
|
+
"version": 1,
|
|
10345
|
+
"context_length": 1010000,
|
|
10346
|
+
"model_name": "qwen2.5-instruct-1m",
|
|
10347
|
+
"model_lang": [
|
|
10348
|
+
"en",
|
|
10349
|
+
"zh"
|
|
10350
|
+
],
|
|
10351
|
+
"model_ability": [
|
|
10352
|
+
"chat"
|
|
10353
|
+
],
|
|
10354
|
+
"model_description": "Qwen2.5-1M is the long-context version of the Qwen2.5 series models, supporting a context length of up to 1M tokens.",
|
|
10355
|
+
"model_specs": [
|
|
10356
|
+
{
|
|
10357
|
+
"model_format": "pytorch",
|
|
10358
|
+
"model_size_in_billions": 7,
|
|
10359
|
+
"quantizations": [
|
|
10360
|
+
"4-bit",
|
|
10361
|
+
"8-bit",
|
|
10362
|
+
"none"
|
|
10363
|
+
],
|
|
10364
|
+
"model_id": "Qwen/Qwen2.5-7B-Instruct-1M"
|
|
10365
|
+
},
|
|
10366
|
+
{
|
|
10367
|
+
"model_format": "pytorch",
|
|
10368
|
+
"model_size_in_billions": 14,
|
|
10369
|
+
"quantizations": [
|
|
10370
|
+
"4-bit",
|
|
10371
|
+
"8-bit",
|
|
10372
|
+
"none"
|
|
10373
|
+
],
|
|
10374
|
+
"model_id": "Qwen/Qwen2.5-14B-Instruct-1M"
|
|
10375
|
+
}
|
|
10376
|
+
],
|
|
10377
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
10378
|
+
"stop_token_ids": [
|
|
10379
|
+
151645,
|
|
10380
|
+
151643
|
|
10381
|
+
],
|
|
10382
|
+
"stop": [
|
|
10383
|
+
"<|im_end|>",
|
|
10384
|
+
"<|endoftext|>"
|
|
10385
|
+
]
|
|
10386
|
+
},
|
|
10387
|
+
{
|
|
10388
|
+
"version": 1,
|
|
10389
|
+
"context_length": 8192,
|
|
10390
|
+
"model_name": "moonlight-16b-a3b-instruct",
|
|
10391
|
+
"model_lang": [
|
|
10392
|
+
"en",
|
|
10393
|
+
"zh"
|
|
10394
|
+
],
|
|
10395
|
+
"model_ability": [
|
|
10396
|
+
"chat"
|
|
10397
|
+
],
|
|
10398
|
+
"model_description": "Kimi Muon is Scalable for LLM Training",
|
|
10399
|
+
"model_specs": [
|
|
10400
|
+
{
|
|
10401
|
+
"model_format": "pytorch",
|
|
10402
|
+
"model_size_in_billions": 3,
|
|
10403
|
+
"quantizations": [
|
|
10404
|
+
"4-bit",
|
|
10405
|
+
"8-bit",
|
|
10406
|
+
"none"
|
|
10407
|
+
],
|
|
10408
|
+
"model_id": "moonshotai/Moonlight-16B-A3B-Instruct"
|
|
10409
|
+
}
|
|
10410
|
+
],
|
|
10411
|
+
"chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
|
|
10412
|
+
"stop_token_ids": [
|
|
10413
|
+
163586
|
|
10414
|
+
],
|
|
10415
|
+
"stop": [
|
|
10416
|
+
"<|im_end|>"
|
|
10417
|
+
]
|
|
10129
10418
|
}
|
|
10130
10419
|
]
|