xinference 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/client/restful/restful_client.py +9 -1
- xinference/core/model.py +19 -0
- xinference/core/resource.py +7 -1
- xinference/core/scheduler.py +4 -7
- xinference/core/status_guard.py +1 -0
- xinference/core/supervisor.py +228 -19
- xinference/core/utils.py +1 -29
- xinference/core/worker.py +28 -2
- xinference/deploy/cmdline.py +33 -3
- xinference/deploy/local.py +2 -1
- xinference/deploy/test/test_cmdline.py +32 -0
- xinference/device_utils.py +43 -1
- xinference/model/audio/core.py +5 -0
- xinference/model/audio/kokoro.py +122 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/stable_diffusion/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +866 -46
- xinference/model/llm/llm_family.py +7 -2
- xinference/model/llm/llm_family_modelscope.json +873 -16
- xinference/model/llm/mlx/core.py +11 -3
- xinference/model/llm/reasoning_parsers/__init__.py +13 -0
- xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
- xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
- xinference/model/llm/sglang/core.py +99 -11
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/intern_vl.py +23 -14
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +164 -20
- xinference/model/llm/vllm/core.py +36 -4
- xinference/model/llm/vllm/xavier/executor.py +2 -2
- xinference/model/llm/vllm/xavier/scheduler.py +3 -3
- xinference/thirdparty/internvl/conversation.py +26 -17
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.f8177338.css +2 -0
- xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
- xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
- xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
- xinference/web/ui/src/locales/en.json +14 -1
- xinference/web/ui/src/locales/zh.json +14 -1
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/METADATA +18 -17
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/RECORD +67 -60
- xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
- xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
- xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
- /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/LICENSE +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/WHEEL +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -4497,6 +4497,179 @@
|
|
|
4497
4497
|
"stop_token_ids": [],
|
|
4498
4498
|
"stop": []
|
|
4499
4499
|
},
|
|
4500
|
+
{
|
|
4501
|
+
"version": 1,
|
|
4502
|
+
"context_length": 16384,
|
|
4503
|
+
"model_name": "InternVL2.5",
|
|
4504
|
+
"model_lang": [
|
|
4505
|
+
"en",
|
|
4506
|
+
"zh"
|
|
4507
|
+
],
|
|
4508
|
+
"model_ability": [
|
|
4509
|
+
"chat",
|
|
4510
|
+
"vision"
|
|
4511
|
+
],
|
|
4512
|
+
"model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
4513
|
+
"model_specs": [
|
|
4514
|
+
{
|
|
4515
|
+
"model_format": "pytorch",
|
|
4516
|
+
"model_size_in_billions": 1,
|
|
4517
|
+
"quantizations": [
|
|
4518
|
+
"4-bit",
|
|
4519
|
+
"8-bit",
|
|
4520
|
+
"none"
|
|
4521
|
+
],
|
|
4522
|
+
"model_hub": "modelscope",
|
|
4523
|
+
"model_id": "OpenGVLab/InternVL2_5-1B",
|
|
4524
|
+
"model_revision": "master"
|
|
4525
|
+
},
|
|
4526
|
+
{
|
|
4527
|
+
"model_format": "awq",
|
|
4528
|
+
"model_size_in_billions": 1,
|
|
4529
|
+
"quantizations": [
|
|
4530
|
+
"Int4"
|
|
4531
|
+
],
|
|
4532
|
+
"model_hub": "modelscope",
|
|
4533
|
+
"model_id": "OpenGVLab/InternVL2_5-1B-AWQ",
|
|
4534
|
+
"model_revision": "master"
|
|
4535
|
+
},
|
|
4536
|
+
{
|
|
4537
|
+
"model_format": "pytorch",
|
|
4538
|
+
"model_size_in_billions": 2,
|
|
4539
|
+
"quantizations": [
|
|
4540
|
+
"4-bit",
|
|
4541
|
+
"8-bit",
|
|
4542
|
+
"none"
|
|
4543
|
+
],
|
|
4544
|
+
"model_hub": "modelscope",
|
|
4545
|
+
"model_id": "OpenGVLab/InternVL2_5-2B",
|
|
4546
|
+
"model_revision": "master"
|
|
4547
|
+
},
|
|
4548
|
+
{
|
|
4549
|
+
"model_format": "awq",
|
|
4550
|
+
"model_size_in_billions": 2,
|
|
4551
|
+
"quantizations": [
|
|
4552
|
+
"Int4"
|
|
4553
|
+
],
|
|
4554
|
+
"model_hub": "modelscope",
|
|
4555
|
+
"model_id": "OpenGVLab/InternVL2_5-2B-AWQ",
|
|
4556
|
+
"model_revision": "master"
|
|
4557
|
+
},
|
|
4558
|
+
{
|
|
4559
|
+
"model_format": "pytorch",
|
|
4560
|
+
"model_size_in_billions": 4,
|
|
4561
|
+
"quantizations": [
|
|
4562
|
+
"4-bit",
|
|
4563
|
+
"8-bit",
|
|
4564
|
+
"none"
|
|
4565
|
+
],
|
|
4566
|
+
"model_hub": "modelscope",
|
|
4567
|
+
"model_id": "OpenGVLab/InternVL2_5-4B",
|
|
4568
|
+
"model_revision": "master"
|
|
4569
|
+
},
|
|
4570
|
+
{
|
|
4571
|
+
"model_format": "awq",
|
|
4572
|
+
"model_size_in_billions": 4,
|
|
4573
|
+
"quantizations": [
|
|
4574
|
+
"Int4"
|
|
4575
|
+
],
|
|
4576
|
+
"model_hub": "modelscope",
|
|
4577
|
+
"model_id": "OpenGVLab/InternVL2_5-4B-AWQ",
|
|
4578
|
+
"model_revision": "master"
|
|
4579
|
+
},
|
|
4580
|
+
{
|
|
4581
|
+
"model_format": "pytorch",
|
|
4582
|
+
"model_size_in_billions": 8,
|
|
4583
|
+
"quantizations": [
|
|
4584
|
+
"4-bit",
|
|
4585
|
+
"8-bit",
|
|
4586
|
+
"none"
|
|
4587
|
+
],
|
|
4588
|
+
"model_hub": "modelscope",
|
|
4589
|
+
"model_id": "OpenGVLab/InternVL2_5-8B",
|
|
4590
|
+
"model_revision": "master"
|
|
4591
|
+
},
|
|
4592
|
+
{
|
|
4593
|
+
"model_format": "awq",
|
|
4594
|
+
"model_size_in_billions": 8,
|
|
4595
|
+
"quantizations": [
|
|
4596
|
+
"Int4"
|
|
4597
|
+
],
|
|
4598
|
+
"model_hub": "modelscope",
|
|
4599
|
+
"model_id": "OpenGVLab/InternVL2_5-8B-AWQ",
|
|
4600
|
+
"model_revision": "master"
|
|
4601
|
+
},
|
|
4602
|
+
{
|
|
4603
|
+
"model_format": "pytorch",
|
|
4604
|
+
"model_size_in_billions": 26,
|
|
4605
|
+
"quantizations": [
|
|
4606
|
+
"4-bit",
|
|
4607
|
+
"8-bit",
|
|
4608
|
+
"none"
|
|
4609
|
+
],
|
|
4610
|
+
"model_hub": "modelscope",
|
|
4611
|
+
"model_id": "OpenGVLab/InternVL2_5-26B",
|
|
4612
|
+
"model_revision": "master"
|
|
4613
|
+
},
|
|
4614
|
+
{
|
|
4615
|
+
"model_format": "awq",
|
|
4616
|
+
"model_size_in_billions": 26,
|
|
4617
|
+
"quantizations": [
|
|
4618
|
+
"Int4"
|
|
4619
|
+
],
|
|
4620
|
+
"model_hub": "modelscope",
|
|
4621
|
+
"model_id": "OpenGVLab/InternVL2_5-26B-AWQ",
|
|
4622
|
+
"model_revision": "master"
|
|
4623
|
+
},
|
|
4624
|
+
{
|
|
4625
|
+
"model_format": "pytorch",
|
|
4626
|
+
"model_size_in_billions": 38,
|
|
4627
|
+
"quantizations": [
|
|
4628
|
+
"4-bit",
|
|
4629
|
+
"8-bit",
|
|
4630
|
+
"none"
|
|
4631
|
+
],
|
|
4632
|
+
"model_hub": "modelscope",
|
|
4633
|
+
"model_id": "OpenGVLab/InternVL2_5-38B",
|
|
4634
|
+
"model_revision": "master"
|
|
4635
|
+
},
|
|
4636
|
+
{
|
|
4637
|
+
"model_format": "awq",
|
|
4638
|
+
"model_size_in_billions": 38,
|
|
4639
|
+
"quantizations": [
|
|
4640
|
+
"Int4"
|
|
4641
|
+
],
|
|
4642
|
+
"model_hub": "modelscope",
|
|
4643
|
+
"model_id": "OpenGVLab/InternVL2_5-38B-AWQ",
|
|
4644
|
+
"model_revision": "master"
|
|
4645
|
+
},
|
|
4646
|
+
{
|
|
4647
|
+
"model_format": "pytorch",
|
|
4648
|
+
"model_size_in_billions": 78,
|
|
4649
|
+
"quantizations": [
|
|
4650
|
+
"4-bit",
|
|
4651
|
+
"8-bit",
|
|
4652
|
+
"none"
|
|
4653
|
+
],
|
|
4654
|
+
"model_hub": "modelscope",
|
|
4655
|
+
"model_id": "OpenGVLab/InternVL2_5-78B",
|
|
4656
|
+
"model_revision": "master"
|
|
4657
|
+
},
|
|
4658
|
+
{
|
|
4659
|
+
"model_format": "awq",
|
|
4660
|
+
"model_size_in_billions": 78,
|
|
4661
|
+
"quantizations": [
|
|
4662
|
+
"Int4"
|
|
4663
|
+
],
|
|
4664
|
+
"model_hub": "modelscope",
|
|
4665
|
+
"model_id": "OpenGVLab/InternVL2_5-78B-AWQ",
|
|
4666
|
+
"model_revision": "master"
|
|
4667
|
+
}
|
|
4668
|
+
],
|
|
4669
|
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4670
|
+
"stop_token_ids": [],
|
|
4671
|
+
"stop": []
|
|
4672
|
+
},
|
|
4500
4673
|
{
|
|
4501
4674
|
"version": 1,
|
|
4502
4675
|
"context_length": 8192,
|
|
@@ -4769,10 +4942,11 @@
|
|
|
4769
4942
|
"model_format":"mlx",
|
|
4770
4943
|
"model_size_in_billions":2,
|
|
4771
4944
|
"quantizations":[
|
|
4945
|
+
"4bit",
|
|
4772
4946
|
"8bit"
|
|
4773
4947
|
],
|
|
4774
4948
|
"model_hub": "modelscope",
|
|
4775
|
-
"model_id":"
|
|
4949
|
+
"model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}",
|
|
4776
4950
|
"model_revision":"master"
|
|
4777
4951
|
},
|
|
4778
4952
|
{
|
|
@@ -4825,6 +4999,97 @@
|
|
|
4825
4999
|
"<|endoftext|>"
|
|
4826
5000
|
]
|
|
4827
5001
|
},
|
|
5002
|
+
{
|
|
5003
|
+
"version":1,
|
|
5004
|
+
"context_length":128000,
|
|
5005
|
+
"model_name":"qwen2.5-vl-instruct",
|
|
5006
|
+
"model_lang":[
|
|
5007
|
+
"en",
|
|
5008
|
+
"zh"
|
|
5009
|
+
],
|
|
5010
|
+
"model_ability":[
|
|
5011
|
+
"chat",
|
|
5012
|
+
"vision"
|
|
5013
|
+
],
|
|
5014
|
+
"model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
5015
|
+
"model_specs":[
|
|
5016
|
+
{
|
|
5017
|
+
"model_format":"pytorch",
|
|
5018
|
+
"model_size_in_billions":3,
|
|
5019
|
+
"quantizations":[
|
|
5020
|
+
"none"
|
|
5021
|
+
],
|
|
5022
|
+
"model_hub": "modelscope",
|
|
5023
|
+
"model_id":"qwen/Qwen2.5-VL-3B-Instruct"
|
|
5024
|
+
},
|
|
5025
|
+
{
|
|
5026
|
+
"model_format":"pytorch",
|
|
5027
|
+
"model_size_in_billions":7,
|
|
5028
|
+
"quantizations":[
|
|
5029
|
+
"none"
|
|
5030
|
+
],
|
|
5031
|
+
"model_hub": "modelscope",
|
|
5032
|
+
"model_id":"qwen/Qwen2.5-VL-7B-Instruct"
|
|
5033
|
+
},
|
|
5034
|
+
{
|
|
5035
|
+
"model_format":"pytorch",
|
|
5036
|
+
"model_size_in_billions":72,
|
|
5037
|
+
"quantizations":[
|
|
5038
|
+
"none"
|
|
5039
|
+
],
|
|
5040
|
+
"model_hub": "modelscope",
|
|
5041
|
+
"model_id":"qwen/Qwen2.5-VL-72B-Instruct"
|
|
5042
|
+
},
|
|
5043
|
+
{
|
|
5044
|
+
"model_format":"mlx",
|
|
5045
|
+
"model_size_in_billions":3,
|
|
5046
|
+
"quantizations":[
|
|
5047
|
+
"3bit",
|
|
5048
|
+
"4bit",
|
|
5049
|
+
"6bit",
|
|
5050
|
+
"8bit",
|
|
5051
|
+
"bf16"
|
|
5052
|
+
],
|
|
5053
|
+
"model_hub": "modelscope",
|
|
5054
|
+
"model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
|
|
5055
|
+
},
|
|
5056
|
+
{
|
|
5057
|
+
"model_format":"mlx",
|
|
5058
|
+
"model_size_in_billions":7,
|
|
5059
|
+
"quantizations":[
|
|
5060
|
+
"3bit",
|
|
5061
|
+
"4bit",
|
|
5062
|
+
"6bit",
|
|
5063
|
+
"8bit",
|
|
5064
|
+
"bf16"
|
|
5065
|
+
],
|
|
5066
|
+
"model_hub": "modelscope",
|
|
5067
|
+
"model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
5068
|
+
},
|
|
5069
|
+
{
|
|
5070
|
+
"model_format":"mlx",
|
|
5071
|
+
"model_size_in_billions":72,
|
|
5072
|
+
"quantizations":[
|
|
5073
|
+
"3bit",
|
|
5074
|
+
"4bit",
|
|
5075
|
+
"6bit",
|
|
5076
|
+
"8bit",
|
|
5077
|
+
"bf16"
|
|
5078
|
+
],
|
|
5079
|
+
"model_hub": "modelscope",
|
|
5080
|
+
"model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
|
|
5081
|
+
}
|
|
5082
|
+
],
|
|
5083
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
5084
|
+
"stop_token_ids": [
|
|
5085
|
+
151645,
|
|
5086
|
+
151643
|
|
5087
|
+
],
|
|
5088
|
+
"stop": [
|
|
5089
|
+
"<|im_end|>",
|
|
5090
|
+
"<|endoftext|>"
|
|
5091
|
+
]
|
|
5092
|
+
},
|
|
4828
5093
|
{
|
|
4829
5094
|
"version": 1,
|
|
4830
5095
|
"context_length": 32768,
|
|
@@ -5069,21 +5334,395 @@
|
|
|
5069
5334
|
"model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
|
|
5070
5335
|
"model_specs": [
|
|
5071
5336
|
{
|
|
5072
|
-
"model_format": "pytorch",
|
|
5073
|
-
"model_size_in_billions": 236,
|
|
5337
|
+
"model_format": "pytorch",
|
|
5338
|
+
"model_size_in_billions": 236,
|
|
5339
|
+
"quantizations": [
|
|
5340
|
+
"4-bit",
|
|
5341
|
+
"8-bit",
|
|
5342
|
+
"none"
|
|
5343
|
+
],
|
|
5344
|
+
"model_id": "deepseek-ai/DeepSeek-V2.5",
|
|
5345
|
+
"model_hub": "modelscope",
|
|
5346
|
+
"model_revision": "master"
|
|
5347
|
+
}
|
|
5348
|
+
],
|
|
5349
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
5350
|
+
"stop_token_ids": [
|
|
5351
|
+
100001
|
|
5352
|
+
],
|
|
5353
|
+
"stop": [
|
|
5354
|
+
"<|end▁of▁sentence|>"
|
|
5355
|
+
]
|
|
5356
|
+
},
|
|
5357
|
+
{
|
|
5358
|
+
"version": 1,
|
|
5359
|
+
"context_length": 163840,
|
|
5360
|
+
"model_name": "deepseek-v3",
|
|
5361
|
+
"model_lang": [
|
|
5362
|
+
"en",
|
|
5363
|
+
"zh"
|
|
5364
|
+
],
|
|
5365
|
+
"model_ability": [
|
|
5366
|
+
"chat",
|
|
5367
|
+
"reasoning"
|
|
5368
|
+
],
|
|
5369
|
+
"model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
|
|
5370
|
+
"model_specs": [
|
|
5371
|
+
{
|
|
5372
|
+
"model_format": "pytorch",
|
|
5373
|
+
"model_size_in_billions": 671,
|
|
5374
|
+
"quantizations": [
|
|
5375
|
+
"4-bit",
|
|
5376
|
+
"8-bit",
|
|
5377
|
+
"none"
|
|
5378
|
+
],
|
|
5379
|
+
"model_id": "deepseek-ai/DeepSeek-V3",
|
|
5380
|
+
"model_hub": "modelscope",
|
|
5381
|
+
"model_revision": "master"
|
|
5382
|
+
},
|
|
5383
|
+
{
|
|
5384
|
+
"model_format": "awq",
|
|
5385
|
+
"model_size_in_billions": 671,
|
|
5386
|
+
"quantizations": [
|
|
5387
|
+
"Int4"
|
|
5388
|
+
],
|
|
5389
|
+
"model_id": "cognitivecomputations/DeepSeek-V3-awq",
|
|
5390
|
+
"model_hub": "modelscope",
|
|
5391
|
+
"model_revision": "master"
|
|
5392
|
+
},
|
|
5393
|
+
{
|
|
5394
|
+
"model_format": "ggufv2",
|
|
5395
|
+
"model_size_in_billions": 671,
|
|
5396
|
+
"quantizations": [
|
|
5397
|
+
"Q2_K_L",
|
|
5398
|
+
"Q2_K_XS",
|
|
5399
|
+
"Q3_K_M",
|
|
5400
|
+
"Q4_K_M",
|
|
5401
|
+
"Q5_K_M",
|
|
5402
|
+
"Q6_K",
|
|
5403
|
+
"Q8_0"
|
|
5404
|
+
],
|
|
5405
|
+
"model_id": "unsloth/DeepSeek-V3-GGUF",
|
|
5406
|
+
"model_hub": "modelscope",
|
|
5407
|
+
"model_file_name_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}.gguf",
|
|
5408
|
+
"model_file_name_split_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}-{part}.gguf",
|
|
5409
|
+
"quantization_parts": {
|
|
5410
|
+
"Q2_K_L": [
|
|
5411
|
+
"00001-of-00005",
|
|
5412
|
+
"00002-of-00005",
|
|
5413
|
+
"00003-of-00005",
|
|
5414
|
+
"00004-of-00005",
|
|
5415
|
+
"00005-of-00005"
|
|
5416
|
+
],
|
|
5417
|
+
"Q2_K_XS": [
|
|
5418
|
+
"00001-of-00005",
|
|
5419
|
+
"00002-of-00005",
|
|
5420
|
+
"00003-of-00005",
|
|
5421
|
+
"00004-of-00005",
|
|
5422
|
+
"00005-of-00005"
|
|
5423
|
+
],
|
|
5424
|
+
"Q3_K_M": [
|
|
5425
|
+
"00001-of-00007",
|
|
5426
|
+
"00002-of-00007",
|
|
5427
|
+
"00003-of-00007",
|
|
5428
|
+
"00004-of-00007",
|
|
5429
|
+
"00005-of-00007",
|
|
5430
|
+
"00006-of-00007",
|
|
5431
|
+
"00007-of-00007"
|
|
5432
|
+
],
|
|
5433
|
+
"Q4_K_M": [
|
|
5434
|
+
"00001-of-00009",
|
|
5435
|
+
"00002-of-00009",
|
|
5436
|
+
"00003-of-00009",
|
|
5437
|
+
"00004-of-00009",
|
|
5438
|
+
"00005-of-00009",
|
|
5439
|
+
"00006-of-00009",
|
|
5440
|
+
"00007-of-00009",
|
|
5441
|
+
"00008-of-00009",
|
|
5442
|
+
"00009-of-00009"
|
|
5443
|
+
],
|
|
5444
|
+
"Q5_K_M": [
|
|
5445
|
+
"00001-of-00010",
|
|
5446
|
+
"00002-of-00010",
|
|
5447
|
+
"00003-of-00010",
|
|
5448
|
+
"00004-of-00010",
|
|
5449
|
+
"00005-of-00010",
|
|
5450
|
+
"00006-of-00010",
|
|
5451
|
+
"00007-of-00010",
|
|
5452
|
+
"00008-of-00010",
|
|
5453
|
+
"00009-of-00010",
|
|
5454
|
+
"00010-of-00010"
|
|
5455
|
+
],
|
|
5456
|
+
"Q6_K": [
|
|
5457
|
+
"00001-of-00012",
|
|
5458
|
+
"00002-of-00012",
|
|
5459
|
+
"00003-of-00012",
|
|
5460
|
+
"00004-of-00012",
|
|
5461
|
+
"00005-of-00012",
|
|
5462
|
+
"00006-of-00012",
|
|
5463
|
+
"00007-of-00012",
|
|
5464
|
+
"00008-of-00012",
|
|
5465
|
+
"00009-of-00012",
|
|
5466
|
+
"00010-of-00012",
|
|
5467
|
+
"00011-of-00012",
|
|
5468
|
+
"00012-of-00012"
|
|
5469
|
+
],
|
|
5470
|
+
"Q8_0": [
|
|
5471
|
+
"00001-of-00016",
|
|
5472
|
+
"00002-of-00016",
|
|
5473
|
+
"00003-of-00016",
|
|
5474
|
+
"00004-of-00016",
|
|
5475
|
+
"00005-of-00016",
|
|
5476
|
+
"00006-of-00016",
|
|
5477
|
+
"00007-of-00016",
|
|
5478
|
+
"00008-of-00016",
|
|
5479
|
+
"00009-of-00016",
|
|
5480
|
+
"00010-of-00016",
|
|
5481
|
+
"00011-of-00016",
|
|
5482
|
+
"00012-of-00016",
|
|
5483
|
+
"00013-of-00016",
|
|
5484
|
+
"00014-of-00016",
|
|
5485
|
+
"00015-of-00016",
|
|
5486
|
+
"00016-of-00016"
|
|
5487
|
+
]
|
|
5488
|
+
}
|
|
5489
|
+
},
|
|
5490
|
+
{
|
|
5491
|
+
"model_format": "mlx",
|
|
5492
|
+
"model_size_in_billions": 671,
|
|
5493
|
+
"quantizations": [
|
|
5494
|
+
"3bit",
|
|
5495
|
+
"4bit"
|
|
5496
|
+
],
|
|
5497
|
+
"model_id": "mlx-community/DeepSeek-V3-{quantization}",
|
|
5498
|
+
"model_hub": "modelscope"
|
|
5499
|
+
}
|
|
5500
|
+
],
|
|
5501
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
5502
|
+
"stop_token_ids": [
|
|
5503
|
+
1
|
|
5504
|
+
],
|
|
5505
|
+
"stop": [
|
|
5506
|
+
"<|end▁of▁sentence|>"
|
|
5507
|
+
],
|
|
5508
|
+
"reasoning_start_tag": "<think>",
|
|
5509
|
+
"reasoning_end_tag": "</think>"
|
|
5510
|
+
},
|
|
5511
|
+
{
|
|
5512
|
+
"version": 1,
|
|
5513
|
+
"context_length": 163840,
|
|
5514
|
+
"model_name": "deepseek-r1",
|
|
5515
|
+
"model_lang": [
|
|
5516
|
+
"en",
|
|
5517
|
+
"zh"
|
|
5518
|
+
],
|
|
5519
|
+
"model_ability": [
|
|
5520
|
+
"chat"
|
|
5521
|
+
],
|
|
5522
|
+
"model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
|
|
5523
|
+
"model_specs": [
|
|
5524
|
+
{
|
|
5525
|
+
"model_format": "pytorch",
|
|
5526
|
+
"model_size_in_billions": 671,
|
|
5527
|
+
"quantizations": [
|
|
5528
|
+
"4-bit",
|
|
5529
|
+
"8-bit",
|
|
5530
|
+
"none"
|
|
5531
|
+
],
|
|
5532
|
+
"model_id": "deepseek-ai/DeepSeek-R1",
|
|
5533
|
+
"model_hub": "modelscope",
|
|
5534
|
+
"model_revision": "master"
|
|
5535
|
+
},
|
|
5536
|
+
{
|
|
5537
|
+
"model_format": "awq",
|
|
5538
|
+
"model_size_in_billions": 671,
|
|
5539
|
+
"quantizations": [
|
|
5540
|
+
"Int4"
|
|
5541
|
+
],
|
|
5542
|
+
"model_id": "cognitivecomputations/DeepSeek-R1-awq",
|
|
5543
|
+
"model_hub": "modelscope",
|
|
5544
|
+
"model_revision": "master"
|
|
5545
|
+
},
|
|
5546
|
+
{
|
|
5547
|
+
"model_format": "ggufv2",
|
|
5548
|
+
"model_size_in_billions": 671,
|
|
5549
|
+
"quantizations": [
|
|
5550
|
+
"UD-IQ1_S",
|
|
5551
|
+
"UD-IQ1_M",
|
|
5552
|
+
"UD-IQ2_XXS",
|
|
5553
|
+
"UD-Q2_K_XL",
|
|
5554
|
+
"Q2_K",
|
|
5555
|
+
"Q2_K_L",
|
|
5556
|
+
"Q2_K_XS",
|
|
5557
|
+
"Q3_K_M",
|
|
5558
|
+
"Q4_K_M",
|
|
5559
|
+
"Q5_K_M",
|
|
5560
|
+
"Q6_K",
|
|
5561
|
+
"Q8_0",
|
|
5562
|
+
"BF16"
|
|
5563
|
+
],
|
|
5564
|
+
"model_id": "unsloth/DeepSeek-R1-GGUF",
|
|
5565
|
+
"model_hub": "modelscope",
|
|
5566
|
+
"model_file_name_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}.gguf",
|
|
5567
|
+
"model_file_name_split_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}-{part}.gguf",
|
|
5568
|
+
"quantization_parts": {
|
|
5569
|
+
"UD-IQ1_S": [
|
|
5570
|
+
"00001-of-00003",
|
|
5571
|
+
"00002-of-00003",
|
|
5572
|
+
"00003-of-00003"
|
|
5573
|
+
],
|
|
5574
|
+
"UD-IQ1_M": [
|
|
5575
|
+
"00001-of-00004",
|
|
5576
|
+
"00002-of-00004",
|
|
5577
|
+
"00003-of-00004",
|
|
5578
|
+
"00004-of-00004"
|
|
5579
|
+
],
|
|
5580
|
+
"UD-IQ2_XXS": [
|
|
5581
|
+
"00001-of-00004",
|
|
5582
|
+
"00002-of-00004",
|
|
5583
|
+
"00003-of-00004",
|
|
5584
|
+
"00004-of-00004"
|
|
5585
|
+
],
|
|
5586
|
+
"UD-Q2_K_XL": [
|
|
5587
|
+
"00001-of-00005",
|
|
5588
|
+
"00002-of-00005",
|
|
5589
|
+
"00003-of-00005",
|
|
5590
|
+
"00004-of-00005",
|
|
5591
|
+
"00005-of-00005"
|
|
5592
|
+
],
|
|
5593
|
+
"Q2_K": [
|
|
5594
|
+
"00001-of-00005",
|
|
5595
|
+
"00002-of-00005",
|
|
5596
|
+
"00003-of-00005",
|
|
5597
|
+
"00004-of-00005",
|
|
5598
|
+
"00005-of-00005"
|
|
5599
|
+
],
|
|
5600
|
+
"Q2_K_L": [
|
|
5601
|
+
"00001-of-00005",
|
|
5602
|
+
"00002-of-00005",
|
|
5603
|
+
"00003-of-00005",
|
|
5604
|
+
"00004-of-00005",
|
|
5605
|
+
"00005-of-00005"
|
|
5606
|
+
],
|
|
5607
|
+
"Q2_K_XS": [
|
|
5608
|
+
"00001-of-00005",
|
|
5609
|
+
"00002-of-00005",
|
|
5610
|
+
"00003-of-00005",
|
|
5611
|
+
"00004-of-00005",
|
|
5612
|
+
"00005-of-00005"
|
|
5613
|
+
],
|
|
5614
|
+
"Q3_K_M": [
|
|
5615
|
+
"00001-of-00007",
|
|
5616
|
+
"00002-of-00007",
|
|
5617
|
+
"00003-of-00007",
|
|
5618
|
+
"00004-of-00007",
|
|
5619
|
+
"00005-of-00007",
|
|
5620
|
+
"00006-of-00007",
|
|
5621
|
+
"00007-of-00007"
|
|
5622
|
+
],
|
|
5623
|
+
"Q4_K_M": [
|
|
5624
|
+
"00001-of-00009",
|
|
5625
|
+
"00002-of-00009",
|
|
5626
|
+
"00003-of-00009",
|
|
5627
|
+
"00004-of-00009",
|
|
5628
|
+
"00005-of-00009",
|
|
5629
|
+
"00006-of-00009",
|
|
5630
|
+
"00007-of-00009",
|
|
5631
|
+
"00008-of-00009",
|
|
5632
|
+
"00009-of-00009"
|
|
5633
|
+
],
|
|
5634
|
+
"Q5_K_M": [
|
|
5635
|
+
"00001-of-00010",
|
|
5636
|
+
"00002-of-00010",
|
|
5637
|
+
"00003-of-00010",
|
|
5638
|
+
"00004-of-00010",
|
|
5639
|
+
"00005-of-00010",
|
|
5640
|
+
"00006-of-00010",
|
|
5641
|
+
"00007-of-00010",
|
|
5642
|
+
"00008-of-00010",
|
|
5643
|
+
"00009-of-00010",
|
|
5644
|
+
"00010-of-00010"
|
|
5645
|
+
],
|
|
5646
|
+
"Q6_K": [
|
|
5647
|
+
"00001-of-00012",
|
|
5648
|
+
"00002-of-00012",
|
|
5649
|
+
"00003-of-00012",
|
|
5650
|
+
"00004-of-00012",
|
|
5651
|
+
"00005-of-00012",
|
|
5652
|
+
"00006-of-00012",
|
|
5653
|
+
"00007-of-00012",
|
|
5654
|
+
"00008-of-00012",
|
|
5655
|
+
"00009-of-00012",
|
|
5656
|
+
"00010-of-00012",
|
|
5657
|
+
"00011-of-00012",
|
|
5658
|
+
"00012-of-00012"
|
|
5659
|
+
],
|
|
5660
|
+
"Q8_0": [
|
|
5661
|
+
"00001-of-00015",
|
|
5662
|
+
"00002-of-00015",
|
|
5663
|
+
"00003-of-00015",
|
|
5664
|
+
"00004-of-00015",
|
|
5665
|
+
"00005-of-00015",
|
|
5666
|
+
"00006-of-00015",
|
|
5667
|
+
"00007-of-00015",
|
|
5668
|
+
"00008-of-00015",
|
|
5669
|
+
"00009-of-00015",
|
|
5670
|
+
"00010-of-00015",
|
|
5671
|
+
"00011-of-00015",
|
|
5672
|
+
"00012-of-00015",
|
|
5673
|
+
"00013-of-00015",
|
|
5674
|
+
"00014-of-00015",
|
|
5675
|
+
"00015-of-00015"
|
|
5676
|
+
],
|
|
5677
|
+
"BF16": [
|
|
5678
|
+
"00001-of-00030",
|
|
5679
|
+
"00002-of-00030",
|
|
5680
|
+
"00003-of-00030",
|
|
5681
|
+
"00004-of-00030",
|
|
5682
|
+
"00005-of-00030",
|
|
5683
|
+
"00006-of-00030",
|
|
5684
|
+
"00007-of-00030",
|
|
5685
|
+
"00008-of-00030",
|
|
5686
|
+
"00009-of-00030",
|
|
5687
|
+
"00010-of-00030",
|
|
5688
|
+
"00011-of-00030",
|
|
5689
|
+
"00012-of-00030",
|
|
5690
|
+
"00013-of-00030",
|
|
5691
|
+
"00014-of-00030",
|
|
5692
|
+
"00015-of-00030",
|
|
5693
|
+
"00016-of-00030",
|
|
5694
|
+
"00017-of-00030",
|
|
5695
|
+
"00018-of-00030",
|
|
5696
|
+
"00019-of-00030",
|
|
5697
|
+
"00020-of-00030",
|
|
5698
|
+
"00021-of-00030",
|
|
5699
|
+
"00022-of-00030",
|
|
5700
|
+
"00023-of-00030",
|
|
5701
|
+
"00024-of-00030",
|
|
5702
|
+
"00025-of-00030",
|
|
5703
|
+
"00026-of-00030",
|
|
5704
|
+
"00027-of-00030",
|
|
5705
|
+
"00028-of-00030",
|
|
5706
|
+
"00029-of-00030",
|
|
5707
|
+
"00030-of-00030"
|
|
5708
|
+
]
|
|
5709
|
+
}
|
|
5710
|
+
},
|
|
5711
|
+
{
|
|
5712
|
+
"model_format": "mlx",
|
|
5713
|
+
"model_size_in_billions": 671,
|
|
5074
5714
|
"quantizations": [
|
|
5075
|
-
"
|
|
5076
|
-
"
|
|
5077
|
-
"
|
|
5715
|
+
"2bit",
|
|
5716
|
+
"3bit",
|
|
5717
|
+
"4bit"
|
|
5078
5718
|
],
|
|
5079
|
-
"model_id": "
|
|
5080
|
-
"model_hub": "modelscope"
|
|
5081
|
-
"model_revision": "master"
|
|
5719
|
+
"model_id": "mlx-community/DeepSeek-R1-{quantization}",
|
|
5720
|
+
"model_hub": "modelscope"
|
|
5082
5721
|
}
|
|
5083
5722
|
],
|
|
5084
|
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}
|
|
5723
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
5085
5724
|
"stop_token_ids": [
|
|
5086
|
-
|
|
5725
|
+
1
|
|
5087
5726
|
],
|
|
5088
5727
|
"stop": [
|
|
5089
5728
|
"<|end▁of▁sentence|>"
|
|
@@ -5558,7 +6197,7 @@
|
|
|
5558
6197
|
"q8_0"
|
|
5559
6198
|
],
|
|
5560
6199
|
"model_id": "qwen/Qwen2.5-7B-Instruct-GGUF",
|
|
5561
|
-
"model_file_name_template": "
|
|
6200
|
+
"model_file_name_template": "qwen2.5-7b-instruct-{quantization}.gguf",
|
|
5562
6201
|
"model_hub": "modelscope",
|
|
5563
6202
|
"model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
|
|
5564
6203
|
"quantization_parts": {
|
|
@@ -6442,7 +7081,8 @@
|
|
|
6442
7081
|
"zh"
|
|
6443
7082
|
],
|
|
6444
7083
|
"model_ability": [
|
|
6445
|
-
"chat"
|
|
7084
|
+
"chat",
|
|
7085
|
+
"reasoning"
|
|
6446
7086
|
],
|
|
6447
7087
|
"model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
|
|
6448
7088
|
"model_specs": [
|
|
@@ -6473,6 +7113,19 @@
|
|
|
6473
7113
|
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf",
|
|
6474
7114
|
"model_hub": "modelscope"
|
|
6475
7115
|
},
|
|
7116
|
+
{
|
|
7117
|
+
"model_format": "mlx",
|
|
7118
|
+
"model_size_in_billions": "1_5",
|
|
7119
|
+
"quantizations": [
|
|
7120
|
+
"3bit",
|
|
7121
|
+
"4bit",
|
|
7122
|
+
"6bit",
|
|
7123
|
+
"8bit",
|
|
7124
|
+
"bf16"
|
|
7125
|
+
],
|
|
7126
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}",
|
|
7127
|
+
"model_hub": "modelscope"
|
|
7128
|
+
},
|
|
6476
7129
|
{
|
|
6477
7130
|
"model_format": "pytorch",
|
|
6478
7131
|
"model_size_in_billions": 7,
|
|
@@ -6613,13 +7266,137 @@
|
|
|
6613
7266
|
"model_hub": "modelscope"
|
|
6614
7267
|
}
|
|
6615
7268
|
],
|
|
6616
|
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and
|
|
7269
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
6617
7270
|
"stop_token_ids": [
|
|
6618
7271
|
151643
|
|
6619
7272
|
],
|
|
6620
7273
|
"stop": [
|
|
6621
7274
|
"<|end▁of▁sentence|>"
|
|
6622
|
-
]
|
|
7275
|
+
],
|
|
7276
|
+
"reasoning_start_tag": "<think>",
|
|
7277
|
+
"reasoning_end_tag": "</think>"
|
|
7278
|
+
},
|
|
7279
|
+
{
|
|
7280
|
+
"version": 1,
|
|
7281
|
+
"context_length": 131072,
|
|
7282
|
+
"model_name": "deepseek-r1-distill-llama",
|
|
7283
|
+
"model_lang": [
|
|
7284
|
+
"en",
|
|
7285
|
+
"zh"
|
|
7286
|
+
],
|
|
7287
|
+
"model_ability": [
|
|
7288
|
+
"chat",
|
|
7289
|
+
"reasoning"
|
|
7290
|
+
],
|
|
7291
|
+
"model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
|
|
7292
|
+
"model_specs": [
|
|
7293
|
+
{
|
|
7294
|
+
"model_format": "pytorch",
|
|
7295
|
+
"model_size_in_billions": 8,
|
|
7296
|
+
"quantizations": [
|
|
7297
|
+
"4-bit",
|
|
7298
|
+
"8-bit",
|
|
7299
|
+
"none"
|
|
7300
|
+
],
|
|
7301
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
|
7302
|
+
"model_hub": "modelscope"
|
|
7303
|
+
},
|
|
7304
|
+
{
|
|
7305
|
+
"model_format": "ggufv2",
|
|
7306
|
+
"model_size_in_billions": 8,
|
|
7307
|
+
"quantizations": [
|
|
7308
|
+
"Q2_K",
|
|
7309
|
+
"Q2_K_L",
|
|
7310
|
+
"Q3_K_M",
|
|
7311
|
+
"Q4_K_M",
|
|
7312
|
+
"Q5_K_M",
|
|
7313
|
+
"Q6_K",
|
|
7314
|
+
"Q8_0",
|
|
7315
|
+
"F16"
|
|
7316
|
+
],
|
|
7317
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
|
|
7318
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf",
|
|
7319
|
+
"model_hub": "modelscope"
|
|
7320
|
+
},
|
|
7321
|
+
{
|
|
7322
|
+
"model_format": "mlx",
|
|
7323
|
+
"model_size_in_billions": 8,
|
|
7324
|
+
"quantizations": [
|
|
7325
|
+
"3bit",
|
|
7326
|
+
"4bit",
|
|
7327
|
+
"6bit",
|
|
7328
|
+
"8bit",
|
|
7329
|
+
"bf16"
|
|
7330
|
+
],
|
|
7331
|
+
"model_id": "okwinds/DeepSeek-R1-Distill-Llama-8B-MLX-{quantization}",
|
|
7332
|
+
"model_hub": "modelscope"
|
|
7333
|
+
},
|
|
7334
|
+
{
|
|
7335
|
+
"model_format": "pytorch",
|
|
7336
|
+
"model_size_in_billions": 70,
|
|
7337
|
+
"quantizations": [
|
|
7338
|
+
"4-bit",
|
|
7339
|
+
"8-bit",
|
|
7340
|
+
"none"
|
|
7341
|
+
],
|
|
7342
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
|
7343
|
+
"model_hub": "modelscope"
|
|
7344
|
+
},
|
|
7345
|
+
{
|
|
7346
|
+
"model_format": "ggufv2",
|
|
7347
|
+
"model_size_in_billions": 70,
|
|
7348
|
+
"quantizations": [
|
|
7349
|
+
"Q2_K",
|
|
7350
|
+
"Q2_K_L",
|
|
7351
|
+
"Q3_K_M",
|
|
7352
|
+
"Q4_K_M",
|
|
7353
|
+
"Q5_K_M",
|
|
7354
|
+
"Q6_K",
|
|
7355
|
+
"Q8_0",
|
|
7356
|
+
"F16"
|
|
7357
|
+
],
|
|
7358
|
+
"quantization_parts": {
|
|
7359
|
+
"Q6_K": [
|
|
7360
|
+
"00001-of-00002",
|
|
7361
|
+
"00002-of-00002"
|
|
7362
|
+
],
|
|
7363
|
+
"Q8_0": [
|
|
7364
|
+
"00001-of-00002",
|
|
7365
|
+
"00002-of-00002"
|
|
7366
|
+
],
|
|
7367
|
+
"F16": [
|
|
7368
|
+
"00001-of-00003",
|
|
7369
|
+
"00002-of-00003",
|
|
7370
|
+
"00003-of-00003"
|
|
7371
|
+
]
|
|
7372
|
+
},
|
|
7373
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
|
|
7374
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
|
|
7375
|
+
"model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf",
|
|
7376
|
+
"model_hub": "modelscope"
|
|
7377
|
+
},
|
|
7378
|
+
{
|
|
7379
|
+
"model_format": "mlx",
|
|
7380
|
+
"model_size_in_billions": 70,
|
|
7381
|
+
"quantizations": [
|
|
7382
|
+
"3bit",
|
|
7383
|
+
"4bit",
|
|
7384
|
+
"6bit",
|
|
7385
|
+
"8bit"
|
|
7386
|
+
],
|
|
7387
|
+
"model_id": "okwinds/DeepSeek-R1-Distill-Llama-70B-MLX-{quantization}",
|
|
7388
|
+
"model_hub": "modelscope"
|
|
7389
|
+
}
|
|
7390
|
+
],
|
|
7391
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
7392
|
+
"stop_token_ids": [
|
|
7393
|
+
151643
|
|
7394
|
+
],
|
|
7395
|
+
"stop": [
|
|
7396
|
+
"<|end▁of▁sentence|>"
|
|
7397
|
+
],
|
|
7398
|
+
"reasoning_start_tag": "<think>",
|
|
7399
|
+
"reasoning_end_tag": "</think>"
|
|
6623
7400
|
},
|
|
6624
7401
|
{
|
|
6625
7402
|
"version": 1,
|
|
@@ -6911,7 +7688,7 @@
|
|
|
6911
7688
|
"<|endoftext|>"
|
|
6912
7689
|
]
|
|
6913
7690
|
},
|
|
6914
|
-
|
|
7691
|
+
{
|
|
6915
7692
|
"version": 1,
|
|
6916
7693
|
"context_length": 32768,
|
|
6917
7694
|
"model_name": "marco-o1",
|
|
@@ -7009,5 +7786,85 @@
|
|
|
7009
7786
|
"<|user|>",
|
|
7010
7787
|
"<|observation|>"
|
|
7011
7788
|
]
|
|
7789
|
+
},
|
|
7790
|
+
{
|
|
7791
|
+
"version": 1,
|
|
7792
|
+
"context_length": 32768,
|
|
7793
|
+
"model_name": "internlm3-instruct",
|
|
7794
|
+
"model_lang": [
|
|
7795
|
+
"en",
|
|
7796
|
+
"zh"
|
|
7797
|
+
],
|
|
7798
|
+
"model_ability": [
|
|
7799
|
+
"chat",
|
|
7800
|
+
"tools"
|
|
7801
|
+
],
|
|
7802
|
+
"model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
|
|
7803
|
+
"model_specs": [
|
|
7804
|
+
{
|
|
7805
|
+
"model_format": "pytorch",
|
|
7806
|
+
"model_size_in_billions": 8,
|
|
7807
|
+
"quantizations": [
|
|
7808
|
+
"4-bit",
|
|
7809
|
+
"8-bit",
|
|
7810
|
+
"none"
|
|
7811
|
+
],
|
|
7812
|
+
"model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct",
|
|
7813
|
+
"model_hub": "modelscope"
|
|
7814
|
+
},
|
|
7815
|
+
{
|
|
7816
|
+
"model_format": "gptq",
|
|
7817
|
+
"model_size_in_billions": 8,
|
|
7818
|
+
"quantizations": [
|
|
7819
|
+
"Int4"
|
|
7820
|
+
],
|
|
7821
|
+
"model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-gptq-int4",
|
|
7822
|
+
"model_hub": "modelscope"
|
|
7823
|
+
},
|
|
7824
|
+
{
|
|
7825
|
+
"model_format": "awq",
|
|
7826
|
+
"model_size_in_billions": 8,
|
|
7827
|
+
"quantizations": [
|
|
7828
|
+
"Int4"
|
|
7829
|
+
],
|
|
7830
|
+
"model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-awq",
|
|
7831
|
+
"model_hub": "modelscope"
|
|
7832
|
+
},
|
|
7833
|
+
{
|
|
7834
|
+
"model_format": "ggufv2",
|
|
7835
|
+
"model_size_in_billions": 8,
|
|
7836
|
+
"quantizations": [
|
|
7837
|
+
"q2_k",
|
|
7838
|
+
"q3_k_m",
|
|
7839
|
+
"q4_0",
|
|
7840
|
+
"q4_k_m",
|
|
7841
|
+
"q5_0",
|
|
7842
|
+
"q5_k_m",
|
|
7843
|
+
"q6_k",
|
|
7844
|
+
"q8_0"
|
|
7845
|
+
],
|
|
7846
|
+
"model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-gguf",
|
|
7847
|
+
"model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf",
|
|
7848
|
+
"model_hub": "modelscope"
|
|
7849
|
+
},
|
|
7850
|
+
{
|
|
7851
|
+
"model_format":"mlx",
|
|
7852
|
+
"model_size_in_billions":8,
|
|
7853
|
+
"quantizations":[
|
|
7854
|
+
"4bit"
|
|
7855
|
+
],
|
|
7856
|
+
"model_hub": "modelscope",
|
|
7857
|
+
"model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
|
|
7858
|
+
}
|
|
7859
|
+
],
|
|
7860
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
7861
|
+
"stop_token_ids": [
|
|
7862
|
+
2,
|
|
7863
|
+
128131
|
|
7864
|
+
],
|
|
7865
|
+
"stop": [
|
|
7866
|
+
"</s>",
|
|
7867
|
+
"<|im_end|>"
|
|
7868
|
+
]
|
|
7012
7869
|
}
|
|
7013
7870
|
]
|