xinference 0.11.0__py3-none-any.whl → 0.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +30 -0
- xinference/client/restful/restful_client.py +29 -0
- xinference/core/cache_tracker.py +12 -1
- xinference/core/chat_interface.py +10 -4
- xinference/core/model.py +2 -2
- xinference/core/supervisor.py +30 -2
- xinference/core/utils.py +12 -0
- xinference/core/worker.py +4 -1
- xinference/deploy/cmdline.py +126 -0
- xinference/deploy/test/test_cmdline.py +24 -0
- xinference/fields.py +3 -1
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/ggml/chatglm.py +98 -13
- xinference/model/llm/ggml/llamacpp.py +49 -2
- xinference/model/llm/llm_family.json +633 -9
- xinference/model/llm/llm_family.py +84 -10
- xinference/model/llm/llm_family_modelscope.json +337 -10
- xinference/model/llm/memory.py +332 -0
- xinference/model/llm/pytorch/chatglm.py +48 -0
- xinference/model/llm/pytorch/core.py +25 -6
- xinference/model/llm/pytorch/deepseek_vl.py +35 -9
- xinference/model/llm/pytorch/intern_vl.py +387 -0
- xinference/model/llm/pytorch/internlm2.py +32 -1
- xinference/model/llm/pytorch/qwen_vl.py +38 -11
- xinference/model/llm/pytorch/utils.py +38 -1
- xinference/model/llm/pytorch/yi_vl.py +42 -14
- xinference/model/llm/sglang/core.py +31 -9
- xinference/model/llm/utils.py +38 -5
- xinference/model/llm/vllm/core.py +87 -5
- xinference/model/rerank/core.py +23 -1
- xinference/model/utils.py +17 -7
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +1 -1
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +2 -2
- xinference/thirdparty/llava/mm_utils.py +3 -2
- xinference/thirdparty/llava/model/llava_arch.py +1 -1
- xinference/thirdparty/omnilmm/chat.py +6 -5
- xinference/types.py +10 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.8e44da4b.js → main.551aa479.js} +3 -3
- xinference/web/ui/build/static/js/main.551aa479.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1fa824d82b2af519de7700c594e50bde4bbca60d13bd3fabff576802e4070304.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/23caf6f1e52c43e983ca3bfd4189f41dbd645fa78f2dfdcd7f6b69bc41678665.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6da6bc3d0d2191adebee87fb58ecebe82d071087bd2f7f3a9c7fdd2ada130f2.json +1 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/METADATA +10 -8
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/RECORD +52 -50
- xinference/web/ui/build/static/js/main.8e44da4b.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ddaec68b88e5eff792df1e39a4b4b8b737bfc832293c015660c3c69334e3cf5c.json +0 -1
- /xinference/web/ui/build/static/js/{main.8e44da4b.js.LICENSE.txt → main.551aa479.js.LICENSE.txt} +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/LICENSE +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/WHEEL +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/top_level.txt +0 -0
|
@@ -2198,6 +2198,31 @@
|
|
|
2198
2198
|
]
|
|
2199
2199
|
}
|
|
2200
2200
|
},
|
|
2201
|
+
{
|
|
2202
|
+
"version": 1,
|
|
2203
|
+
"context_length": 65536,
|
|
2204
|
+
"model_name": "codeqwen1.5",
|
|
2205
|
+
"model_lang": [
|
|
2206
|
+
"en",
|
|
2207
|
+
"zh"
|
|
2208
|
+
],
|
|
2209
|
+
"model_ability": [
|
|
2210
|
+
"generate"
|
|
2211
|
+
],
|
|
2212
|
+
"model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
|
|
2213
|
+
"model_specs": [
|
|
2214
|
+
{
|
|
2215
|
+
"model_format": "pytorch",
|
|
2216
|
+
"model_size_in_billions": 7,
|
|
2217
|
+
"quantizations": [
|
|
2218
|
+
"4-bit",
|
|
2219
|
+
"8-bit",
|
|
2220
|
+
"none"
|
|
2221
|
+
],
|
|
2222
|
+
"model_id": "Qwen/CodeQwen1.5-7B"
|
|
2223
|
+
}
|
|
2224
|
+
]
|
|
2225
|
+
},
|
|
2201
2226
|
{
|
|
2202
2227
|
"version": 1,
|
|
2203
2228
|
"context_length": 65536,
|
|
@@ -3651,7 +3676,7 @@
|
|
|
3651
3676
|
},
|
|
3652
3677
|
{
|
|
3653
3678
|
"version": 1,
|
|
3654
|
-
"context_length":
|
|
3679
|
+
"context_length": 262144,
|
|
3655
3680
|
"model_name": "Yi-200k",
|
|
3656
3681
|
"model_lang": [
|
|
3657
3682
|
"en",
|
|
@@ -3688,7 +3713,7 @@
|
|
|
3688
3713
|
},
|
|
3689
3714
|
{
|
|
3690
3715
|
"version": 1,
|
|
3691
|
-
"context_length":
|
|
3716
|
+
"context_length": 4096,
|
|
3692
3717
|
"model_name": "Yi-chat",
|
|
3693
3718
|
"model_lang": [
|
|
3694
3719
|
"en",
|
|
@@ -3707,6 +3732,17 @@
|
|
|
3707
3732
|
],
|
|
3708
3733
|
"model_id": "01-ai/Yi-34B-Chat-{quantization}"
|
|
3709
3734
|
},
|
|
3735
|
+
{
|
|
3736
|
+
"model_format": "pytorch",
|
|
3737
|
+
"model_size_in_billions": 6,
|
|
3738
|
+
"quantizations": [
|
|
3739
|
+
"4-bit",
|
|
3740
|
+
"8-bit",
|
|
3741
|
+
"none"
|
|
3742
|
+
],
|
|
3743
|
+
"model_id": "01-ai/Yi-6B-Chat",
|
|
3744
|
+
"model_revision": "1c20c960895e4c3877cf478bc2df074221b81d7b"
|
|
3745
|
+
},
|
|
3710
3746
|
{
|
|
3711
3747
|
"model_format": "pytorch",
|
|
3712
3748
|
"model_size_in_billions": 34,
|
|
@@ -3762,6 +3798,124 @@
|
|
|
3762
3798
|
]
|
|
3763
3799
|
}
|
|
3764
3800
|
},
|
|
3801
|
+
{
|
|
3802
|
+
"version": 1,
|
|
3803
|
+
"context_length": 4096,
|
|
3804
|
+
"model_name": "Yi-1.5",
|
|
3805
|
+
"model_lang": [
|
|
3806
|
+
"en",
|
|
3807
|
+
"zh"
|
|
3808
|
+
],
|
|
3809
|
+
"model_ability": [
|
|
3810
|
+
"generate"
|
|
3811
|
+
],
|
|
3812
|
+
"model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
|
|
3813
|
+
"model_specs": [
|
|
3814
|
+
{
|
|
3815
|
+
"model_format": "pytorch",
|
|
3816
|
+
"model_size_in_billions": 6,
|
|
3817
|
+
"quantizations": [
|
|
3818
|
+
"4-bit",
|
|
3819
|
+
"8-bit",
|
|
3820
|
+
"none"
|
|
3821
|
+
],
|
|
3822
|
+
"model_id": "01-ai/Yi-1.5-6B",
|
|
3823
|
+
"model_revision": "741a657c42d2081f777ce4c6c5572090f8b8c886"
|
|
3824
|
+
},
|
|
3825
|
+
{
|
|
3826
|
+
"model_format": "pytorch",
|
|
3827
|
+
"model_size_in_billions": 9,
|
|
3828
|
+
"quantizations": [
|
|
3829
|
+
"4-bit",
|
|
3830
|
+
"8-bit",
|
|
3831
|
+
"none"
|
|
3832
|
+
],
|
|
3833
|
+
"model_id": "01-ai/Yi-1.5-9B",
|
|
3834
|
+
"model_revision": "9a6839c5b9db3dbb245fb98a072bfabc242621f2"
|
|
3835
|
+
},
|
|
3836
|
+
{
|
|
3837
|
+
"model_format": "pytorch",
|
|
3838
|
+
"model_size_in_billions": 34,
|
|
3839
|
+
"quantizations": [
|
|
3840
|
+
"4-bit",
|
|
3841
|
+
"8-bit",
|
|
3842
|
+
"none"
|
|
3843
|
+
],
|
|
3844
|
+
"model_id": "01-ai/Yi-1.5-34B",
|
|
3845
|
+
"model_revision": "4f83007957ec3eec76d87df19ad061eb0f57b5c5"
|
|
3846
|
+
}
|
|
3847
|
+
]
|
|
3848
|
+
},
|
|
3849
|
+
{
|
|
3850
|
+
"version": 1,
|
|
3851
|
+
"context_length": 4096,
|
|
3852
|
+
"model_name": "Yi-1.5-chat",
|
|
3853
|
+
"model_lang": [
|
|
3854
|
+
"en",
|
|
3855
|
+
"zh"
|
|
3856
|
+
],
|
|
3857
|
+
"model_ability": [
|
|
3858
|
+
"chat"
|
|
3859
|
+
],
|
|
3860
|
+
"model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
|
|
3861
|
+
"model_specs": [
|
|
3862
|
+
{
|
|
3863
|
+
"model_format": "pytorch",
|
|
3864
|
+
"model_size_in_billions": 6,
|
|
3865
|
+
"quantizations": [
|
|
3866
|
+
"4-bit",
|
|
3867
|
+
"8-bit",
|
|
3868
|
+
"none"
|
|
3869
|
+
],
|
|
3870
|
+
"model_id": "01-ai/Yi-1.5-6B-Chat",
|
|
3871
|
+
"model_revision": "d68dab90947a3c869e28c9cb2806996af99a6080"
|
|
3872
|
+
},
|
|
3873
|
+
{
|
|
3874
|
+
"model_format": "pytorch",
|
|
3875
|
+
"model_size_in_billions": 9,
|
|
3876
|
+
"quantizations": [
|
|
3877
|
+
"4-bit",
|
|
3878
|
+
"8-bit",
|
|
3879
|
+
"none"
|
|
3880
|
+
],
|
|
3881
|
+
"model_id": "01-ai/Yi-1.5-9B-Chat",
|
|
3882
|
+
"model_revision": "1dc6e2b8dcfc12b95bede8dec67e6b6332ac64c6"
|
|
3883
|
+
},
|
|
3884
|
+
{
|
|
3885
|
+
"model_format": "pytorch",
|
|
3886
|
+
"model_size_in_billions": 34,
|
|
3887
|
+
"quantizations": [
|
|
3888
|
+
"4-bit",
|
|
3889
|
+
"8-bit",
|
|
3890
|
+
"none"
|
|
3891
|
+
],
|
|
3892
|
+
"model_id": "01-ai/Yi-1.5-34B-Chat",
|
|
3893
|
+
"model_revision": "fa695ee438bfcd0ec2b378fa1c7e0dea1b40393e"
|
|
3894
|
+
}
|
|
3895
|
+
],
|
|
3896
|
+
"prompt_style": {
|
|
3897
|
+
"style_name": "CHATML",
|
|
3898
|
+
"system_prompt": "",
|
|
3899
|
+
"roles": [
|
|
3900
|
+
"<|im_start|>user",
|
|
3901
|
+
"<|im_start|>assistant"
|
|
3902
|
+
],
|
|
3903
|
+
"intra_message_sep": "<|im_end|>",
|
|
3904
|
+
"inter_message_sep": "",
|
|
3905
|
+
"stop_token_ids": [
|
|
3906
|
+
2,
|
|
3907
|
+
6,
|
|
3908
|
+
7,
|
|
3909
|
+
8
|
|
3910
|
+
],
|
|
3911
|
+
"stop": [
|
|
3912
|
+
"<|endoftext|>",
|
|
3913
|
+
"<|im_start|>",
|
|
3914
|
+
"<|im_end|>",
|
|
3915
|
+
"<|im_sep|>"
|
|
3916
|
+
]
|
|
3917
|
+
}
|
|
3918
|
+
},
|
|
3765
3919
|
{
|
|
3766
3920
|
"version": 1,
|
|
3767
3921
|
"context_length": 2048,
|
|
@@ -4206,6 +4360,83 @@
|
|
|
4206
4360
|
]
|
|
4207
4361
|
}
|
|
4208
4362
|
},
|
|
4363
|
+
{
|
|
4364
|
+
"version": 1,
|
|
4365
|
+
"context_length": 4096,
|
|
4366
|
+
"model_name": "deepseek",
|
|
4367
|
+
"model_lang": [
|
|
4368
|
+
"en",
|
|
4369
|
+
"zh"
|
|
4370
|
+
],
|
|
4371
|
+
"model_ability": [
|
|
4372
|
+
"generate"
|
|
4373
|
+
],
|
|
4374
|
+
"model_description": "DeepSeek LLM, trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese. ",
|
|
4375
|
+
"model_specs": [
|
|
4376
|
+
{
|
|
4377
|
+
"model_format": "pytorch",
|
|
4378
|
+
"model_size_in_billions": 7,
|
|
4379
|
+
"quantizations": [
|
|
4380
|
+
"4-bit",
|
|
4381
|
+
"8-bit",
|
|
4382
|
+
"none"
|
|
4383
|
+
],
|
|
4384
|
+
"model_id": "deepseek-ai/deepseek-llm-7b-base",
|
|
4385
|
+
"model_revision": "7683fea62db869066ddaff6a41d032262c490d4f"
|
|
4386
|
+
},
|
|
4387
|
+
{
|
|
4388
|
+
"model_format": "pytorch",
|
|
4389
|
+
"model_size_in_billions": 67,
|
|
4390
|
+
"quantizations": [
|
|
4391
|
+
"4-bit",
|
|
4392
|
+
"8-bit",
|
|
4393
|
+
"none"
|
|
4394
|
+
],
|
|
4395
|
+
"model_id": "deepseek-ai/deepseek-llm-67b-base",
|
|
4396
|
+
"model_revision": "c3f813a1121c95488a20132d3a4da89f4a46452f"
|
|
4397
|
+
},
|
|
4398
|
+
{
|
|
4399
|
+
"model_format": "ggufv2",
|
|
4400
|
+
"model_size_in_billions": 7,
|
|
4401
|
+
"quantizations": [
|
|
4402
|
+
"Q2_K",
|
|
4403
|
+
"Q3_K_L",
|
|
4404
|
+
"Q3_K_M",
|
|
4405
|
+
"Q3_K_S",
|
|
4406
|
+
"Q4_0",
|
|
4407
|
+
"Q4_K_M",
|
|
4408
|
+
"Q4_K_S",
|
|
4409
|
+
"Q5_0",
|
|
4410
|
+
"Q5_K_M",
|
|
4411
|
+
"Q5_K_S",
|
|
4412
|
+
"Q6_K",
|
|
4413
|
+
"Q8_0"
|
|
4414
|
+
],
|
|
4415
|
+
"model_id": "TheBloke/deepseek-llm-7B-chat-GGUF",
|
|
4416
|
+
"model_file_name_template": "deepseek-llm-7b-chat.{quantization}.gguf"
|
|
4417
|
+
},
|
|
4418
|
+
{
|
|
4419
|
+
"model_format": "ggufv2",
|
|
4420
|
+
"model_size_in_billions": 67,
|
|
4421
|
+
"quantizations": [
|
|
4422
|
+
"Q2_K",
|
|
4423
|
+
"Q3_K_L",
|
|
4424
|
+
"Q3_K_M",
|
|
4425
|
+
"Q3_K_S",
|
|
4426
|
+
"Q4_0",
|
|
4427
|
+
"Q4_K_M",
|
|
4428
|
+
"Q4_K_S",
|
|
4429
|
+
"Q5_0",
|
|
4430
|
+
"Q5_K_M",
|
|
4431
|
+
"Q5_K_S",
|
|
4432
|
+
"Q6_K",
|
|
4433
|
+
"Q8_0"
|
|
4434
|
+
],
|
|
4435
|
+
"model_id": "TheBloke/deepseek-llm-67b-chat-GGUF",
|
|
4436
|
+
"model_file_name_template": "deepseek-llm-67b-chat.{quantization}.gguf"
|
|
4437
|
+
}
|
|
4438
|
+
]
|
|
4439
|
+
},
|
|
4209
4440
|
{
|
|
4210
4441
|
"version": 1,
|
|
4211
4442
|
"context_length": 4096,
|
|
@@ -4298,7 +4529,199 @@
|
|
|
4298
4529
|
},
|
|
4299
4530
|
{
|
|
4300
4531
|
"version": 1,
|
|
4301
|
-
"context_length":
|
|
4532
|
+
"context_length": 16384,
|
|
4533
|
+
"model_name": "deepseek-coder",
|
|
4534
|
+
"model_lang": [
|
|
4535
|
+
"en",
|
|
4536
|
+
"zh"
|
|
4537
|
+
],
|
|
4538
|
+
"model_ability": [
|
|
4539
|
+
"generate"
|
|
4540
|
+
],
|
|
4541
|
+
"model_description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese. ",
|
|
4542
|
+
"model_specs": [
|
|
4543
|
+
{
|
|
4544
|
+
"model_format": "pytorch",
|
|
4545
|
+
"model_size_in_billions": "1_3",
|
|
4546
|
+
"quantizations": [
|
|
4547
|
+
"4-bit",
|
|
4548
|
+
"8-bit",
|
|
4549
|
+
"none"
|
|
4550
|
+
],
|
|
4551
|
+
"model_id": "deepseek-ai/deepseek-coder-1.3b-base",
|
|
4552
|
+
"model_revision": "c919139c3a9b4070729c8b2cca4847ab29ca8d94"
|
|
4553
|
+
},
|
|
4554
|
+
{
|
|
4555
|
+
"model_format": "pytorch",
|
|
4556
|
+
"model_size_in_billions": "6_7",
|
|
4557
|
+
"quantizations": [
|
|
4558
|
+
"4-bit",
|
|
4559
|
+
"8-bit",
|
|
4560
|
+
"none"
|
|
4561
|
+
],
|
|
4562
|
+
"model_id": "deepseek-ai/deepseek-coder-6.7b-base",
|
|
4563
|
+
"model_revision": "ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912"
|
|
4564
|
+
},
|
|
4565
|
+
{
|
|
4566
|
+
"model_format": "pytorch",
|
|
4567
|
+
"model_size_in_billions": 7,
|
|
4568
|
+
"quantizations": [
|
|
4569
|
+
"4-bit",
|
|
4570
|
+
"8-bit",
|
|
4571
|
+
"none"
|
|
4572
|
+
],
|
|
4573
|
+
"model_id": "deepseek-ai/deepseek-coder-7b-base-v1.5",
|
|
4574
|
+
"model_revision": "98f0904cee2237e235f10408ae12292037b21dac"
|
|
4575
|
+
},
|
|
4576
|
+
{
|
|
4577
|
+
"model_format": "pytorch",
|
|
4578
|
+
"model_size_in_billions": 33,
|
|
4579
|
+
"quantizations": [
|
|
4580
|
+
"4-bit",
|
|
4581
|
+
"8-bit",
|
|
4582
|
+
"none"
|
|
4583
|
+
],
|
|
4584
|
+
"model_id": "deepseek-ai/deepseek-coder-33b-base",
|
|
4585
|
+
"model_revision": "45c85cadf3720ef3e85a492e24fd4b8c5d21d8ac"
|
|
4586
|
+
},
|
|
4587
|
+
{
|
|
4588
|
+
"model_format": "ggufv2",
|
|
4589
|
+
"model_size_in_billions": "1_3",
|
|
4590
|
+
"quantizations": [
|
|
4591
|
+
"Q2_K",
|
|
4592
|
+
"Q3_K_L",
|
|
4593
|
+
"Q3_K_M",
|
|
4594
|
+
"Q3_K_S",
|
|
4595
|
+
"Q4_0",
|
|
4596
|
+
"Q4_K_M",
|
|
4597
|
+
"Q4_K_S",
|
|
4598
|
+
"Q5_0",
|
|
4599
|
+
"Q5_K_M",
|
|
4600
|
+
"Q5_K_S",
|
|
4601
|
+
"Q6_K",
|
|
4602
|
+
"Q8_0"
|
|
4603
|
+
],
|
|
4604
|
+
"model_id": "TheBloke/deepseek-coder-1.3b-base-GGUF",
|
|
4605
|
+
"model_file_name_template": "deepseek-coder-1.3b-base.{quantization}.gguf"
|
|
4606
|
+
},
|
|
4607
|
+
{
|
|
4608
|
+
"model_format": "ggufv2",
|
|
4609
|
+
"model_size_in_billions": "6_7",
|
|
4610
|
+
"quantizations": [
|
|
4611
|
+
"Q2_K",
|
|
4612
|
+
"Q3_K_L",
|
|
4613
|
+
"Q3_K_M",
|
|
4614
|
+
"Q3_K_S",
|
|
4615
|
+
"Q4_0",
|
|
4616
|
+
"Q4_K_M",
|
|
4617
|
+
"Q4_K_S",
|
|
4618
|
+
"Q5_0",
|
|
4619
|
+
"Q5_K_M",
|
|
4620
|
+
"Q5_K_S",
|
|
4621
|
+
"Q6_K",
|
|
4622
|
+
"Q8_0"
|
|
4623
|
+
],
|
|
4624
|
+
"model_id": "TheBloke/deepseek-coder-6.7B-base-GGUF",
|
|
4625
|
+
"model_file_name_template": "deepseek-coder-6.7b-base.{quantization}.gguf"
|
|
4626
|
+
},
|
|
4627
|
+
{
|
|
4628
|
+
"model_format": "ggufv2",
|
|
4629
|
+
"model_size_in_billions": 7,
|
|
4630
|
+
"quantizations": [
|
|
4631
|
+
"Q2_K",
|
|
4632
|
+
"Q3_K_L",
|
|
4633
|
+
"Q3_K_M",
|
|
4634
|
+
"Q3_K_S",
|
|
4635
|
+
"Q4_K_M",
|
|
4636
|
+
"Q4_K_S",
|
|
4637
|
+
"Q5_0",
|
|
4638
|
+
"Q5_K_M",
|
|
4639
|
+
"Q5_K_S",
|
|
4640
|
+
"Q6_K",
|
|
4641
|
+
"Q8_0"
|
|
4642
|
+
],
|
|
4643
|
+
"model_id": "dagbs/deepseek-coder-7b-base-v1.5-GGUF",
|
|
4644
|
+
"model_file_name_template": "deepseek-coder-7b-base-v1.5.{quantization}.gguf"
|
|
4645
|
+
},
|
|
4646
|
+
{
|
|
4647
|
+
"model_format": "ggufv2",
|
|
4648
|
+
"model_size_in_billions": 33,
|
|
4649
|
+
"quantizations": [
|
|
4650
|
+
"Q2_K",
|
|
4651
|
+
"Q3_K_L",
|
|
4652
|
+
"Q3_K_M",
|
|
4653
|
+
"Q3_K_S",
|
|
4654
|
+
"Q4_0",
|
|
4655
|
+
"Q4_K_M",
|
|
4656
|
+
"Q4_K_S",
|
|
4657
|
+
"Q5_0",
|
|
4658
|
+
"Q5_K_M",
|
|
4659
|
+
"Q5_K_S",
|
|
4660
|
+
"Q6_K",
|
|
4661
|
+
"Q8_0"
|
|
4662
|
+
],
|
|
4663
|
+
"model_id": "TheBloke/deepseek-coder-33B-base-GGUF",
|
|
4664
|
+
"model_file_name_template": "deepseek-coder-33b-base.{quantization}.gguf"
|
|
4665
|
+
},
|
|
4666
|
+
{
|
|
4667
|
+
"model_format": "gptq",
|
|
4668
|
+
"model_size_in_billions": "1_3",
|
|
4669
|
+
"quantizations": [
|
|
4670
|
+
"Int4"
|
|
4671
|
+
],
|
|
4672
|
+
"model_id": "TheBloke/deepseek-coder-1.3b-base-GPTQ",
|
|
4673
|
+
"model_revision": "a5bf3b76d70cda53327311a631b1003024d5de29"
|
|
4674
|
+
},
|
|
4675
|
+
{
|
|
4676
|
+
"model_format": "gptq",
|
|
4677
|
+
"model_size_in_billions": "6_7",
|
|
4678
|
+
"quantizations": [
|
|
4679
|
+
"Int4"
|
|
4680
|
+
],
|
|
4681
|
+
"model_id": "TheBloke/deepseek-coder-6.7B-base-GPTQ",
|
|
4682
|
+
"model_revision": "6476ea3d6e623a1313d363dbc6e172773e031bb1"
|
|
4683
|
+
},
|
|
4684
|
+
{
|
|
4685
|
+
"model_format": "gptq",
|
|
4686
|
+
"model_size_in_billions": 33,
|
|
4687
|
+
"quantizations": [
|
|
4688
|
+
"Int4"
|
|
4689
|
+
],
|
|
4690
|
+
"model_id": "TheBloke/deepseek-coder-33B-base-GPTQ",
|
|
4691
|
+
"model_revision": "f527d7325e463a5cb091d044e4f2b15902674a70"
|
|
4692
|
+
},
|
|
4693
|
+
{
|
|
4694
|
+
"model_format": "awq",
|
|
4695
|
+
"model_size_in_billions": "1_3",
|
|
4696
|
+
"quantizations": [
|
|
4697
|
+
"Int4"
|
|
4698
|
+
],
|
|
4699
|
+
"model_id": "TheBloke/deepseek-coder-1.3b-base-AWQ",
|
|
4700
|
+
"model_revision": "ffb66f1a2a194401b4f29025edcd261d7f0a08a7"
|
|
4701
|
+
},
|
|
4702
|
+
{
|
|
4703
|
+
"model_format": "awq",
|
|
4704
|
+
"model_size_in_billions": "6_7",
|
|
4705
|
+
"quantizations": [
|
|
4706
|
+
"Int4"
|
|
4707
|
+
],
|
|
4708
|
+
"model_id": "TheBloke/deepseek-coder-6.7B-base-AWQ",
|
|
4709
|
+
"model_revision": "e3d4bdf39712665f5e9d5c05c9df6f20fe1e2d5a"
|
|
4710
|
+
},
|
|
4711
|
+
{
|
|
4712
|
+
"model_format": "awq",
|
|
4713
|
+
"model_size_in_billions": 33,
|
|
4714
|
+
"quantizations": [
|
|
4715
|
+
"Int4"
|
|
4716
|
+
],
|
|
4717
|
+
"model_id": "TheBloke/deepseek-coder-33B-base-AWQ",
|
|
4718
|
+
"model_revision": "c7edb2d5868d61a5dcf2591933a8992c8cbe3ef4"
|
|
4719
|
+
}
|
|
4720
|
+
]
|
|
4721
|
+
},
|
|
4722
|
+
{
|
|
4723
|
+
"version": 1,
|
|
4724
|
+
"context_length": 16384,
|
|
4302
4725
|
"model_name": "deepseek-coder-instruct",
|
|
4303
4726
|
"model_lang": [
|
|
4304
4727
|
"en",
|
|
@@ -4331,6 +4754,17 @@
|
|
|
4331
4754
|
"model_id": "deepseek-ai/deepseek-coder-6.7b-instruct",
|
|
4332
4755
|
"model_revision": "cbb77d7448ea3168d884758817e7f895e3828d1c"
|
|
4333
4756
|
},
|
|
4757
|
+
{
|
|
4758
|
+
"model_format": "pytorch",
|
|
4759
|
+
"model_size_in_billions": 7,
|
|
4760
|
+
"quantizations": [
|
|
4761
|
+
"4-bit",
|
|
4762
|
+
"8-bit",
|
|
4763
|
+
"none"
|
|
4764
|
+
],
|
|
4765
|
+
"model_id": "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
|
|
4766
|
+
"model_revision": "2a050a4c59d687a85324d32e147517992117ed30"
|
|
4767
|
+
},
|
|
4334
4768
|
{
|
|
4335
4769
|
"model_format": "pytorch",
|
|
4336
4770
|
"model_size_in_billions": 33,
|
|
@@ -4382,6 +4816,25 @@
|
|
|
4382
4816
|
"model_id": "TheBloke/deepseek-coder-6.7B-instruct-GGUF",
|
|
4383
4817
|
"model_file_name_template": "deepseek-coder-6.7b-instruct.{quantization}.gguf"
|
|
4384
4818
|
},
|
|
4819
|
+
{
|
|
4820
|
+
"model_format": "ggufv2",
|
|
4821
|
+
"model_size_in_billions": 7,
|
|
4822
|
+
"quantizations": [
|
|
4823
|
+
"Q3_K_L",
|
|
4824
|
+
"Q3_K_M",
|
|
4825
|
+
"Q3_K_S",
|
|
4826
|
+
"Q4_0",
|
|
4827
|
+
"Q4_K_M",
|
|
4828
|
+
"Q4_K_S",
|
|
4829
|
+
"Q5_0",
|
|
4830
|
+
"Q5_K_M",
|
|
4831
|
+
"Q5_K_S",
|
|
4832
|
+
"Q6_K",
|
|
4833
|
+
"Q8_0"
|
|
4834
|
+
],
|
|
4835
|
+
"model_id": "LoneStriker/deepseek-coder-7b-instruct-v1.5-GGUF",
|
|
4836
|
+
"model_file_name_template": "deepseek-coder-7b-instruct-v1.5-{quantization}.gguf"
|
|
4837
|
+
},
|
|
4385
4838
|
{
|
|
4386
4839
|
"model_format": "ggufv2",
|
|
4387
4840
|
"model_size_in_billions": 33,
|
|
@@ -4401,6 +4854,60 @@
|
|
|
4401
4854
|
],
|
|
4402
4855
|
"model_id": "TheBloke/deepseek-coder-33B-instruct-GGUF",
|
|
4403
4856
|
"model_file_name_template": "deepseek-coder-33b-instruct.{quantization}.gguf"
|
|
4857
|
+
},
|
|
4858
|
+
{
|
|
4859
|
+
"model_format": "gptq",
|
|
4860
|
+
"model_size_in_billions": "1_3",
|
|
4861
|
+
"quantizations": [
|
|
4862
|
+
"Int4"
|
|
4863
|
+
],
|
|
4864
|
+
"model_id": "TheBloke/deepseek-coder-1.3b-instruct-GPTQ",
|
|
4865
|
+
"model_revision": "9c002e9af6cbdf3bd9244e2d7264b6a35d1dcacf"
|
|
4866
|
+
},
|
|
4867
|
+
{
|
|
4868
|
+
"model_format": "gptq",
|
|
4869
|
+
"model_size_in_billions": "6_7",
|
|
4870
|
+
"quantizations": [
|
|
4871
|
+
"Int4"
|
|
4872
|
+
],
|
|
4873
|
+
"model_id": "TheBloke/deepseek-coder-6.7B-instruct-GPTQ",
|
|
4874
|
+
"model_revision": "13ccea6e3a43dcfdcb655d92097610018b431a17"
|
|
4875
|
+
},
|
|
4876
|
+
{
|
|
4877
|
+
"model_format": "gptq",
|
|
4878
|
+
"model_size_in_billions": 33,
|
|
4879
|
+
"quantizations": [
|
|
4880
|
+
"Int4"
|
|
4881
|
+
],
|
|
4882
|
+
"model_id": "TheBloke/deepseek-coder-33B-instruct-GPTQ",
|
|
4883
|
+
"model_revision": "08372729d98dfc248f9531a412fe69e14e607027"
|
|
4884
|
+
},
|
|
4885
|
+
{
|
|
4886
|
+
"model_format": "awq",
|
|
4887
|
+
"model_size_in_billions": "1_3",
|
|
4888
|
+
"quantizations": [
|
|
4889
|
+
"Int4"
|
|
4890
|
+
],
|
|
4891
|
+
"model_id": "TheBloke/deepseek-coder-1.3b-instruct-AWQ",
|
|
4892
|
+
"model_revision": "a2a484da6e4146d055316a9a63cf5b13955715a4"
|
|
4893
|
+
},
|
|
4894
|
+
{
|
|
4895
|
+
"model_format": "awq",
|
|
4896
|
+
"model_size_in_billions": "6_7",
|
|
4897
|
+
"quantizations": [
|
|
4898
|
+
"Int4"
|
|
4899
|
+
],
|
|
4900
|
+
"model_id": "TheBloke/deepseek-coder-6.7B-instruct-AWQ",
|
|
4901
|
+
"model_revision": "502ae3e19e57ae78dc30a791ba33c565da72dc62"
|
|
4902
|
+
},
|
|
4903
|
+
{
|
|
4904
|
+
"model_format": "awq",
|
|
4905
|
+
"model_size_in_billions": 33,
|
|
4906
|
+
"quantizations": [
|
|
4907
|
+
"Int4"
|
|
4908
|
+
],
|
|
4909
|
+
"model_id": "TheBloke/deepseek-coder-33B-instruct-AWQ",
|
|
4910
|
+
"model_revision": "c40b499bac2712cd3c445cf1b05d2c6558ab0d29"
|
|
4404
4911
|
}
|
|
4405
4912
|
],
|
|
4406
4913
|
"prompt_style": {
|
|
@@ -4684,7 +5191,7 @@
|
|
|
4684
5191
|
},
|
|
4685
5192
|
{
|
|
4686
5193
|
"version": 1,
|
|
4687
|
-
"context_length":
|
|
5194
|
+
"context_length": 4096,
|
|
4688
5195
|
"model_name": "yi-vl-chat",
|
|
4689
5196
|
"model_lang": [
|
|
4690
5197
|
"en",
|
|
@@ -5326,9 +5833,9 @@
|
|
|
5326
5833
|
"ar"
|
|
5327
5834
|
],
|
|
5328
5835
|
"model_ability": [
|
|
5329
|
-
"
|
|
5836
|
+
"chat"
|
|
5330
5837
|
],
|
|
5331
|
-
"model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
|
|
5838
|
+
"model_description": "C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.",
|
|
5332
5839
|
"model_specs": [
|
|
5333
5840
|
{
|
|
5334
5841
|
"model_format": "pytorch",
|
|
@@ -5377,7 +5884,21 @@
|
|
|
5377
5884
|
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
|
|
5378
5885
|
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
|
|
5379
5886
|
}
|
|
5380
|
-
]
|
|
5887
|
+
],
|
|
5888
|
+
"prompt_style": {
|
|
5889
|
+
"style_name": "c4ai-command-r",
|
|
5890
|
+
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
|
|
5891
|
+
"roles": [
|
|
5892
|
+
"<|USER_TOKEN|>",
|
|
5893
|
+
"<|CHATBOT_TOKEN|>"
|
|
5894
|
+
],
|
|
5895
|
+
"intra_message_sep": "",
|
|
5896
|
+
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
|
|
5897
|
+
"stop_token_ids": [
|
|
5898
|
+
6,
|
|
5899
|
+
255001
|
|
5900
|
+
]
|
|
5901
|
+
}
|
|
5381
5902
|
},
|
|
5382
5903
|
{
|
|
5383
5904
|
"version": 1,
|
|
@@ -5418,7 +5939,21 @@
|
|
|
5418
5939
|
"model_id": "CohereForAI/c4ai-command-r-plus-4bit",
|
|
5419
5940
|
"model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
|
|
5420
5941
|
}
|
|
5421
|
-
]
|
|
5942
|
+
],
|
|
5943
|
+
"prompt_style": {
|
|
5944
|
+
"style_name": "c4ai-command-r",
|
|
5945
|
+
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
|
|
5946
|
+
"roles": [
|
|
5947
|
+
"<|USER_TOKEN|>",
|
|
5948
|
+
"<|CHATBOT_TOKEN|>"
|
|
5949
|
+
],
|
|
5950
|
+
"intra_message_sep": "",
|
|
5951
|
+
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
|
|
5952
|
+
"stop_token_ids": [
|
|
5953
|
+
6,
|
|
5954
|
+
255001
|
|
5955
|
+
]
|
|
5956
|
+
}
|
|
5422
5957
|
},
|
|
5423
5958
|
{
|
|
5424
5959
|
"version": 1,
|
|
@@ -5459,5 +5994,94 @@
|
|
|
5459
5994
|
32000
|
|
5460
5995
|
]
|
|
5461
5996
|
}
|
|
5462
|
-
}
|
|
5997
|
+
},
|
|
5998
|
+
{
|
|
5999
|
+
"version": 1,
|
|
6000
|
+
"context_length": 32768,
|
|
6001
|
+
"model_name": "internvl-chat",
|
|
6002
|
+
"model_lang": [
|
|
6003
|
+
"en",
|
|
6004
|
+
"zh"
|
|
6005
|
+
],
|
|
6006
|
+
"model_ability": [
|
|
6007
|
+
"chat",
|
|
6008
|
+
"vision"
|
|
6009
|
+
],
|
|
6010
|
+
"model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
6011
|
+
"model_specs": [
|
|
6012
|
+
{
|
|
6013
|
+
"model_format": "pytorch",
|
|
6014
|
+
"model_size_in_billions": 26,
|
|
6015
|
+
"quantizations": [
|
|
6016
|
+
"none"
|
|
6017
|
+
],
|
|
6018
|
+
"model_id": "OpenGVLab/InternVL-Chat-V1-5",
|
|
6019
|
+
"model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
|
|
6020
|
+
},
|
|
6021
|
+
{
|
|
6022
|
+
"model_format": "pytorch",
|
|
6023
|
+
"model_size_in_billions": 26,
|
|
6024
|
+
"quantizations": [
|
|
6025
|
+
"Int8"
|
|
6026
|
+
],
|
|
6027
|
+
"model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
|
|
6028
|
+
"model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
|
|
6029
|
+
}
|
|
6030
|
+
],
|
|
6031
|
+
"prompt_style": {
|
|
6032
|
+
"style_name": "INTERNLM2",
|
|
6033
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
6034
|
+
"roles": [
|
|
6035
|
+
"<|im_start|>user",
|
|
6036
|
+
"<|im_start|>assistant"
|
|
6037
|
+
],
|
|
6038
|
+
"intra_message_sep": "<|im_end|>",
|
|
6039
|
+
"stop_token_ids": [
|
|
6040
|
+
92542
|
|
6041
|
+
],
|
|
6042
|
+
"stop": [
|
|
6043
|
+
"<|im_end|>"
|
|
6044
|
+
]
|
|
6045
|
+
}
|
|
6046
|
+
},
|
|
6047
|
+
{
|
|
6048
|
+
"version": 1,
|
|
6049
|
+
"context_length": 32768,
|
|
6050
|
+
"model_name": "mini-internvl-chat",
|
|
6051
|
+
"model_lang": [
|
|
6052
|
+
"en",
|
|
6053
|
+
"zh"
|
|
6054
|
+
],
|
|
6055
|
+
"model_ability": [
|
|
6056
|
+
"chat",
|
|
6057
|
+
"vision"
|
|
6058
|
+
],
|
|
6059
|
+
"model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
6060
|
+
"model_specs": [
|
|
6061
|
+
{
|
|
6062
|
+
"model_format": "pytorch",
|
|
6063
|
+
"model_size_in_billions": 2,
|
|
6064
|
+
"quantizations": [
|
|
6065
|
+
"none"
|
|
6066
|
+
],
|
|
6067
|
+
"model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
|
|
6068
|
+
"model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
|
|
6069
|
+
}
|
|
6070
|
+
],
|
|
6071
|
+
"prompt_style": {
|
|
6072
|
+
"style_name": "INTERNLM2",
|
|
6073
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
6074
|
+
"roles": [
|
|
6075
|
+
"<|im_start|>user",
|
|
6076
|
+
"<|im_start|>assistant"
|
|
6077
|
+
],
|
|
6078
|
+
"intra_message_sep": "<|im_end|>",
|
|
6079
|
+
"stop_token_ids": [
|
|
6080
|
+
92542
|
|
6081
|
+
],
|
|
6082
|
+
"stop": [
|
|
6083
|
+
"<|im_end|>"
|
|
6084
|
+
]
|
|
6085
|
+
}
|
|
6086
|
+
}
|
|
5463
6087
|
]
|