xinference 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +47 -18
- xinference/api/oauth2/types.py +1 -0
- xinference/api/restful_api.py +16 -11
- xinference/client/restful/restful_client.py +12 -2
- xinference/conftest.py +13 -2
- xinference/constants.py +2 -0
- xinference/core/supervisor.py +32 -1
- xinference/core/worker.py +139 -20
- xinference/deploy/cmdline.py +119 -20
- xinference/model/llm/__init__.py +6 -0
- xinference/model/llm/llm_family.json +711 -10
- xinference/model/llm/llm_family_modelscope.json +557 -7
- xinference/model/llm/pytorch/chatglm.py +2 -1
- xinference/model/llm/pytorch/core.py +2 -0
- xinference/model/llm/pytorch/deepseek_vl.py +232 -0
- xinference/model/llm/pytorch/internlm2.py +2 -1
- xinference/model/llm/pytorch/omnilmm.py +153 -0
- xinference/model/llm/sglang/__init__.py +13 -0
- xinference/model/llm/sglang/core.py +365 -0
- xinference/model/llm/utils.py +46 -13
- xinference/model/llm/vllm/core.py +10 -0
- xinference/thirdparty/deepseek_vl/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
- xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
- xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
- xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
- xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
- xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
- xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
- xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +216 -0
- xinference/thirdparty/omnilmm/constants.py +4 -0
- xinference/thirdparty/omnilmm/conversation.py +332 -0
- xinference/thirdparty/omnilmm/model/__init__.py +1 -0
- xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
- xinference/thirdparty/omnilmm/model/resampler.py +166 -0
- xinference/thirdparty/omnilmm/model/utils.py +563 -0
- xinference/thirdparty/omnilmm/train/__init__.py +13 -0
- xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
- xinference/thirdparty/omnilmm/utils.py +134 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.98516614.js +3 -0
- xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
- {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/METADATA +21 -5
- {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/RECORD +60 -31
- xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
- xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
- /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
- {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
- {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
- {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
|
@@ -98,6 +98,72 @@
|
|
|
98
98
|
]
|
|
99
99
|
}
|
|
100
100
|
},
|
|
101
|
+
{
|
|
102
|
+
"version": 1,
|
|
103
|
+
"context_length": 8194,
|
|
104
|
+
"model_name": "codeshell",
|
|
105
|
+
"model_lang": [
|
|
106
|
+
"en",
|
|
107
|
+
"zh"
|
|
108
|
+
],
|
|
109
|
+
"model_ability": [
|
|
110
|
+
"generate"
|
|
111
|
+
],
|
|
112
|
+
"model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
|
|
113
|
+
"model_specs": [
|
|
114
|
+
{
|
|
115
|
+
"model_format": "pytorch",
|
|
116
|
+
"model_size_in_billions": 7,
|
|
117
|
+
"quantizations": [
|
|
118
|
+
"none"
|
|
119
|
+
],
|
|
120
|
+
"model_id": "WisdomShell/CodeShell-7B",
|
|
121
|
+
"model_revision": "1c79ab7fd316a62ab41d764facd3548a23fa5dee"
|
|
122
|
+
}
|
|
123
|
+
]
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"version": 1,
|
|
127
|
+
"context_length": 8194,
|
|
128
|
+
"model_name": "codeshell-chat",
|
|
129
|
+
"model_lang": [
|
|
130
|
+
"en",
|
|
131
|
+
"zh"
|
|
132
|
+
],
|
|
133
|
+
"model_ability": [
|
|
134
|
+
"chat"
|
|
135
|
+
],
|
|
136
|
+
"model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
|
|
137
|
+
"model_specs": [
|
|
138
|
+
{
|
|
139
|
+
"model_format": "pytorch",
|
|
140
|
+
"model_size_in_billions": 7,
|
|
141
|
+
"quantizations": [
|
|
142
|
+
"none"
|
|
143
|
+
],
|
|
144
|
+
"model_id": "WisdomShell/CodeShell-7B-Chat",
|
|
145
|
+
"model_revision": "3cb06f589b7b1e2f8e728c77280b1114191d24de"
|
|
146
|
+
}
|
|
147
|
+
],
|
|
148
|
+
"prompt_style": {
|
|
149
|
+
"style_name": "CodeShell",
|
|
150
|
+
"system_prompt": "",
|
|
151
|
+
"roles": [
|
|
152
|
+
"## human:",
|
|
153
|
+
"## assistant: "
|
|
154
|
+
],
|
|
155
|
+
"intra_message_sep": "",
|
|
156
|
+
"inter_message_sep": "",
|
|
157
|
+
"stop_token_ids": [
|
|
158
|
+
70000
|
|
159
|
+
],
|
|
160
|
+
"stop": [
|
|
161
|
+
"<|endoftext|>",
|
|
162
|
+
"|||",
|
|
163
|
+
"|<end>|"
|
|
164
|
+
]
|
|
165
|
+
}
|
|
166
|
+
},
|
|
101
167
|
{
|
|
102
168
|
"version": 1,
|
|
103
169
|
"context_length": 2048,
|
|
@@ -573,7 +639,7 @@
|
|
|
573
639
|
64797,
|
|
574
640
|
2
|
|
575
641
|
],
|
|
576
|
-
"stop":[
|
|
642
|
+
"stop": [
|
|
577
643
|
"<|user|>",
|
|
578
644
|
"<|observation|>"
|
|
579
645
|
]
|
|
@@ -616,7 +682,50 @@
|
|
|
616
682
|
64797,
|
|
617
683
|
2
|
|
618
684
|
],
|
|
619
|
-
"stop":[
|
|
685
|
+
"stop": [
|
|
686
|
+
"<|user|>",
|
|
687
|
+
"<|observation|>"
|
|
688
|
+
]
|
|
689
|
+
}
|
|
690
|
+
},
|
|
691
|
+
{
|
|
692
|
+
"version": 1,
|
|
693
|
+
"context_length": 131072,
|
|
694
|
+
"model_name": "chatglm3-128k",
|
|
695
|
+
"model_lang": [
|
|
696
|
+
"en",
|
|
697
|
+
"zh"
|
|
698
|
+
],
|
|
699
|
+
"model_ability": [
|
|
700
|
+
"chat"
|
|
701
|
+
],
|
|
702
|
+
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
703
|
+
"model_specs": [
|
|
704
|
+
{
|
|
705
|
+
"model_format": "pytorch",
|
|
706
|
+
"model_size_in_billions": 6,
|
|
707
|
+
"quantizations": [
|
|
708
|
+
"4-bit",
|
|
709
|
+
"8-bit",
|
|
710
|
+
"none"
|
|
711
|
+
],
|
|
712
|
+
"model_id": "THUDM/chatglm3-6b-128k",
|
|
713
|
+
"model_revision": "f0afbe671009abc9e31182170cf60636d5546cda"
|
|
714
|
+
}
|
|
715
|
+
],
|
|
716
|
+
"prompt_style": {
|
|
717
|
+
"style_name": "CHATGLM3",
|
|
718
|
+
"system_prompt": "",
|
|
719
|
+
"roles": [
|
|
720
|
+
"user",
|
|
721
|
+
"assistant"
|
|
722
|
+
],
|
|
723
|
+
"stop_token_ids": [
|
|
724
|
+
64795,
|
|
725
|
+
64797,
|
|
726
|
+
2
|
|
727
|
+
],
|
|
728
|
+
"stop": [
|
|
620
729
|
"<|user|>",
|
|
621
730
|
"<|observation|>"
|
|
622
731
|
]
|
|
@@ -667,7 +776,6 @@
|
|
|
667
776
|
]
|
|
668
777
|
}
|
|
669
778
|
},
|
|
670
|
-
|
|
671
779
|
{
|
|
672
780
|
"version": 1,
|
|
673
781
|
"context_length": 2048,
|
|
@@ -715,8 +823,7 @@
|
|
|
715
823
|
"model_revision": "7f1b7394f74c630f50612a19ba90bd021c373989"
|
|
716
824
|
}
|
|
717
825
|
]
|
|
718
|
-
}
|
|
719
|
-
,
|
|
826
|
+
},
|
|
720
827
|
{
|
|
721
828
|
"version": 1,
|
|
722
829
|
"context_length": 4096,
|
|
@@ -1606,7 +1713,10 @@
|
|
|
1606
1713
|
"model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
|
|
1607
1714
|
"model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
|
|
1608
1715
|
"quantization_parts": {
|
|
1609
|
-
"q4_k_m": [
|
|
1716
|
+
"q4_k_m": [
|
|
1717
|
+
"a",
|
|
1718
|
+
"b"
|
|
1719
|
+
]
|
|
1610
1720
|
}
|
|
1611
1721
|
}
|
|
1612
1722
|
],
|
|
@@ -2658,7 +2768,11 @@
|
|
|
2658
2768
|
"context_length": 32768,
|
|
2659
2769
|
"model_name": "mixtral-v0.1",
|
|
2660
2770
|
"model_lang": [
|
|
2661
|
-
"en",
|
|
2771
|
+
"en",
|
|
2772
|
+
"fr",
|
|
2773
|
+
"it",
|
|
2774
|
+
"de",
|
|
2775
|
+
"es"
|
|
2662
2776
|
],
|
|
2663
2777
|
"model_ability": [
|
|
2664
2778
|
"generate"
|
|
@@ -2699,7 +2813,11 @@
|
|
|
2699
2813
|
"context_length": 32768,
|
|
2700
2814
|
"model_name": "mixtral-instruct-v0.1",
|
|
2701
2815
|
"model_lang": [
|
|
2702
|
-
"en",
|
|
2816
|
+
"en",
|
|
2817
|
+
"fr",
|
|
2818
|
+
"it",
|
|
2819
|
+
"de",
|
|
2820
|
+
"es"
|
|
2703
2821
|
],
|
|
2704
2822
|
"model_ability": [
|
|
2705
2823
|
"chat"
|
|
@@ -3275,9 +3393,107 @@
|
|
|
3275
3393
|
],
|
|
3276
3394
|
"intra_message_sep": "\n",
|
|
3277
3395
|
"inter_message_sep": "\n",
|
|
3278
|
-
"stop_token_ids": [
|
|
3396
|
+
"stop_token_ids": [],
|
|
3397
|
+
"stop": []
|
|
3398
|
+
}
|
|
3399
|
+
},
|
|
3400
|
+
{
|
|
3401
|
+
"version": 1,
|
|
3402
|
+
"context_length": 4096,
|
|
3403
|
+
"model_name": "gorilla-openfunctions-v2",
|
|
3404
|
+
"model_lang": [
|
|
3405
|
+
"en"
|
|
3406
|
+
],
|
|
3407
|
+
"model_ability": [
|
|
3408
|
+
"chat"
|
|
3409
|
+
],
|
|
3410
|
+
"model_description": "OpenFunctions is designed to extend Large Language Model (LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.",
|
|
3411
|
+
"model_specs": [
|
|
3412
|
+
{
|
|
3413
|
+
"model_format": "pytorch",
|
|
3414
|
+
"model_size_in_billions": 7,
|
|
3415
|
+
"quantizations": [
|
|
3416
|
+
"none"
|
|
3417
|
+
],
|
|
3418
|
+
"model_id": "gorilla-llm/gorilla-openfunctions-v2",
|
|
3419
|
+
"model_revision": "0f91d705e64b77fb55e35a7eab5d03bf965c9b5c"
|
|
3420
|
+
},
|
|
3421
|
+
{
|
|
3422
|
+
"model_format": "ggufv2",
|
|
3423
|
+
"model_size_in_billions": 7,
|
|
3424
|
+
"quantizations": [
|
|
3425
|
+
"Q2_K",
|
|
3426
|
+
"Q3_K_L",
|
|
3427
|
+
"Q3_K_M",
|
|
3428
|
+
"Q3_K_S",
|
|
3429
|
+
"Q4_0",
|
|
3430
|
+
"Q4_K_M",
|
|
3431
|
+
"Q4_K_S",
|
|
3432
|
+
"Q5_K_M",
|
|
3433
|
+
"Q5_K_S",
|
|
3434
|
+
"Q6_K"
|
|
3435
|
+
],
|
|
3436
|
+
"model_id": "gorilla-llm//gorilla-openfunctions-v2-GGUF",
|
|
3437
|
+
"model_file_name_template": "gorilla-openfunctions-v2.{quantization}.gguf"
|
|
3438
|
+
}
|
|
3439
|
+
],
|
|
3440
|
+
"prompt_style": {
|
|
3441
|
+
"style_name": "GORILLA_OPENFUNCTIONS",
|
|
3442
|
+
"system_prompt": "",
|
|
3443
|
+
"roles": [
|
|
3444
|
+
"",
|
|
3445
|
+
""
|
|
3446
|
+
],
|
|
3447
|
+
"intra_message_sep": "\n",
|
|
3448
|
+
"inter_message_sep": "\n",
|
|
3449
|
+
"stop_token_ids": [],
|
|
3450
|
+
"stop": []
|
|
3451
|
+
}
|
|
3452
|
+
},
|
|
3453
|
+
{
|
|
3454
|
+
"version": 1,
|
|
3455
|
+
"context_length": 4096,
|
|
3456
|
+
"model_name": "deepseek-vl-chat",
|
|
3457
|
+
"model_lang": [
|
|
3458
|
+
"en",
|
|
3459
|
+
"zh"
|
|
3460
|
+
],
|
|
3461
|
+
"model_ability": [
|
|
3462
|
+
"chat",
|
|
3463
|
+
"vision"
|
|
3464
|
+
],
|
|
3465
|
+
"model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
|
|
3466
|
+
"model_specs": [
|
|
3467
|
+
{
|
|
3468
|
+
"model_format": "pytorch",
|
|
3469
|
+
"model_size_in_billions": "1_3",
|
|
3470
|
+
"quantizations": [
|
|
3471
|
+
"none"
|
|
3472
|
+
],
|
|
3473
|
+
"model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
|
|
3474
|
+
"model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
|
|
3475
|
+
},
|
|
3476
|
+
{
|
|
3477
|
+
"model_format": "pytorch",
|
|
3478
|
+
"model_size_in_billions": 7,
|
|
3479
|
+
"quantizations": [
|
|
3480
|
+
"none"
|
|
3481
|
+
],
|
|
3482
|
+
"model_id": "deepseek-ai/deepseek-vl-7b-chat",
|
|
3483
|
+
"model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
|
|
3484
|
+
}
|
|
3485
|
+
],
|
|
3486
|
+
"prompt_style": {
|
|
3487
|
+
"style_name": "DEEPSEEK_CHAT",
|
|
3488
|
+
"system_prompt": "<|begin▁of▁sentence|>",
|
|
3489
|
+
"roles": [
|
|
3490
|
+
"User",
|
|
3491
|
+
"Assistant"
|
|
3279
3492
|
],
|
|
3493
|
+
"intra_message_sep": "\n\n",
|
|
3494
|
+
"inter_message_sep": "<|end▁of▁sentence|>",
|
|
3280
3495
|
"stop": [
|
|
3496
|
+
"<|end▁of▁sentence|>"
|
|
3281
3497
|
]
|
|
3282
3498
|
}
|
|
3283
3499
|
},
|
|
@@ -3376,7 +3592,8 @@
|
|
|
3376
3592
|
"context_length": 4096,
|
|
3377
3593
|
"model_name": "deepseek-coder-instruct",
|
|
3378
3594
|
"model_lang": [
|
|
3379
|
-
"en",
|
|
3595
|
+
"en",
|
|
3596
|
+
"zh"
|
|
3380
3597
|
],
|
|
3381
3598
|
"model_ability": [
|
|
3382
3599
|
"chat"
|
|
@@ -3588,6 +3805,48 @@
|
|
|
3588
3805
|
]
|
|
3589
3806
|
}
|
|
3590
3807
|
},
|
|
3808
|
+
{
|
|
3809
|
+
"version":1,
|
|
3810
|
+
"context_length":2048,
|
|
3811
|
+
"model_name":"OmniLMM",
|
|
3812
|
+
"model_lang":[
|
|
3813
|
+
"en",
|
|
3814
|
+
"zh"
|
|
3815
|
+
],
|
|
3816
|
+
"model_ability":[
|
|
3817
|
+
"chat",
|
|
3818
|
+
"vision"
|
|
3819
|
+
],
|
|
3820
|
+
"model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
|
|
3821
|
+
"model_specs":[
|
|
3822
|
+
{
|
|
3823
|
+
"model_format":"pytorch",
|
|
3824
|
+
"model_size_in_billions":3,
|
|
3825
|
+
"quantizations":[
|
|
3826
|
+
"none"
|
|
3827
|
+
],
|
|
3828
|
+
"model_id":"openbmb/MiniCPM-V",
|
|
3829
|
+
"model_revision":"bec7d1cd1c9e804c064ec291163e40624825eaaa"
|
|
3830
|
+
},
|
|
3831
|
+
{
|
|
3832
|
+
"model_format":"pytorch",
|
|
3833
|
+
"model_size_in_billions":12,
|
|
3834
|
+
"quantizations":[
|
|
3835
|
+
"none"
|
|
3836
|
+
],
|
|
3837
|
+
"model_id":"openbmb/OmniLMM-12B",
|
|
3838
|
+
"model_revision":"ef62bae5af34be653b9801037cd613e05ab24fdc"
|
|
3839
|
+
}
|
|
3840
|
+
],
|
|
3841
|
+
"prompt_style":{
|
|
3842
|
+
"style_name":"OmniLMM",
|
|
3843
|
+
"system_prompt":"The role of first msg should be user",
|
|
3844
|
+
"roles":[
|
|
3845
|
+
"user",
|
|
3846
|
+
"assistant"
|
|
3847
|
+
]
|
|
3848
|
+
}
|
|
3849
|
+
},
|
|
3591
3850
|
{
|
|
3592
3851
|
"version": 1,
|
|
3593
3852
|
"context_length": 4096,
|
|
@@ -3814,5 +4073,447 @@
|
|
|
3814
4073
|
"<start_of_turn>"
|
|
3815
4074
|
]
|
|
3816
4075
|
}
|
|
4076
|
+
},
|
|
4077
|
+
{
|
|
4078
|
+
"version": 1,
|
|
4079
|
+
"context_length": 4096,
|
|
4080
|
+
"model_name": "platypus2-70b-instruct",
|
|
4081
|
+
"model_lang": [
|
|
4082
|
+
"en"
|
|
4083
|
+
],
|
|
4084
|
+
"model_ability": [
|
|
4085
|
+
"generate"
|
|
4086
|
+
],
|
|
4087
|
+
"model_description": "Platypus-70B-instruct is a merge of garage-bAInd/Platypus2-70B and upstage/Llama-2-70b-instruct-v2.",
|
|
4088
|
+
"model_specs": [
|
|
4089
|
+
{
|
|
4090
|
+
"model_format": "pytorch",
|
|
4091
|
+
"model_size_in_billions": 70,
|
|
4092
|
+
"quantizations": [
|
|
4093
|
+
"none"
|
|
4094
|
+
],
|
|
4095
|
+
"model_id": "garage-bAInd/Platypus2-70B-instruct",
|
|
4096
|
+
"model_revision": "31389b50953688e4e542be53e6d2ab04d5c34e87"
|
|
4097
|
+
}
|
|
4098
|
+
]
|
|
4099
|
+
},
|
|
4100
|
+
{
|
|
4101
|
+
"version": 1,
|
|
4102
|
+
"context_length": 2048,
|
|
4103
|
+
"model_name": "aquila2",
|
|
4104
|
+
"model_lang": [
|
|
4105
|
+
"zh"
|
|
4106
|
+
],
|
|
4107
|
+
"model_ability": [
|
|
4108
|
+
"generate"
|
|
4109
|
+
],
|
|
4110
|
+
"model_description": "Aquila2 series models are the base language models",
|
|
4111
|
+
"model_specs": [
|
|
4112
|
+
{
|
|
4113
|
+
"model_format": "pytorch",
|
|
4114
|
+
"model_size_in_billions": 7,
|
|
4115
|
+
"quantizations": [
|
|
4116
|
+
"none"
|
|
4117
|
+
],
|
|
4118
|
+
"model_id": "BAAI/Aquila2-7B",
|
|
4119
|
+
"model_revision": "9c76e143c6e9621689ca76e078c465b0dee75eb8"
|
|
4120
|
+
},
|
|
4121
|
+
{
|
|
4122
|
+
"model_format": "pytorch",
|
|
4123
|
+
"model_size_in_billions": 34,
|
|
4124
|
+
"quantizations": [
|
|
4125
|
+
"none"
|
|
4126
|
+
],
|
|
4127
|
+
"model_id": "BAAI/Aquila2-34B",
|
|
4128
|
+
"model_revision": "356733caf6221e9dd898cde8ff189a98175526ec"
|
|
4129
|
+
},
|
|
4130
|
+
{
|
|
4131
|
+
"model_format": "pytorch",
|
|
4132
|
+
"model_size_in_billions": 70,
|
|
4133
|
+
"quantizations": [
|
|
4134
|
+
"none"
|
|
4135
|
+
],
|
|
4136
|
+
"model_id": "BAAI/Aquila2-70B-Expr",
|
|
4137
|
+
"model_revision": "32a2897235541b9f5238bbe88f8d76a19993c0ba"
|
|
4138
|
+
}
|
|
4139
|
+
]
|
|
4140
|
+
},
|
|
4141
|
+
{
|
|
4142
|
+
"version": 1,
|
|
4143
|
+
"context_length": 2048,
|
|
4144
|
+
"model_name": "aquila2-chat",
|
|
4145
|
+
"model_lang": [
|
|
4146
|
+
"zh"
|
|
4147
|
+
],
|
|
4148
|
+
"model_ability": [
|
|
4149
|
+
"chat"
|
|
4150
|
+
],
|
|
4151
|
+
"model_description": "Aquila2-chat series models are the chat models",
|
|
4152
|
+
"model_specs": [
|
|
4153
|
+
{
|
|
4154
|
+
"model_format": "pytorch",
|
|
4155
|
+
"model_size_in_billions": 7,
|
|
4156
|
+
"quantizations": [
|
|
4157
|
+
"none"
|
|
4158
|
+
],
|
|
4159
|
+
"model_id": "BAAI/AquilaChat2-7B",
|
|
4160
|
+
"model_revision": "0d060c4edeb4e0febd81130c17f6868653184fb3"
|
|
4161
|
+
},
|
|
4162
|
+
{
|
|
4163
|
+
"model_format": "ggufv2",
|
|
4164
|
+
"model_size_in_billions": 34,
|
|
4165
|
+
"quantizations": [
|
|
4166
|
+
"Q2_K",
|
|
4167
|
+
"Q3_K_L",
|
|
4168
|
+
"Q3_K_M",
|
|
4169
|
+
"Q3_K_S",
|
|
4170
|
+
"Q4_0",
|
|
4171
|
+
"Q4_K_M",
|
|
4172
|
+
"Q4_K_S",
|
|
4173
|
+
"Q5_0",
|
|
4174
|
+
"Q5_K_M",
|
|
4175
|
+
"Q5_K_S",
|
|
4176
|
+
"Q6_K",
|
|
4177
|
+
"Q8_0"
|
|
4178
|
+
],
|
|
4179
|
+
"model_id": "TheBloke/AquilaChat2-34B-GGUF",
|
|
4180
|
+
"model_file_name_template": "aquilachat2-34b.{quantization}.gguf"
|
|
4181
|
+
},
|
|
4182
|
+
{
|
|
4183
|
+
"model_format": "gptq",
|
|
4184
|
+
"model_size_in_billions": 34,
|
|
4185
|
+
"quantizations": [
|
|
4186
|
+
"Int4"
|
|
4187
|
+
],
|
|
4188
|
+
"model_id": "TheBloke/AquilaChat2-34B-GPTQ",
|
|
4189
|
+
"model_revision": "9a9d21424f7db608be51df769885514ab6e052db"
|
|
4190
|
+
},
|
|
4191
|
+
{
|
|
4192
|
+
"model_format": "awq",
|
|
4193
|
+
"model_size_in_billions": "34",
|
|
4194
|
+
"quantizations": [
|
|
4195
|
+
"Int4"
|
|
4196
|
+
],
|
|
4197
|
+
"model_id": "TheBloke/AquilaChat2-34B-AWQ",
|
|
4198
|
+
"model_revision": "ad1dec1c8adb7fa6cb07b7e261aaa04fccf1c4c0"
|
|
4199
|
+
},
|
|
4200
|
+
{
|
|
4201
|
+
"model_format": "pytorch",
|
|
4202
|
+
"model_size_in_billions": 34,
|
|
4203
|
+
"quantizations": [
|
|
4204
|
+
"none"
|
|
4205
|
+
],
|
|
4206
|
+
"model_id": "BAAI/AquilaChat2-34B",
|
|
4207
|
+
"model_revision": "b9cd9c7436435ab9cfa5e4f009be2b0354979ca8"
|
|
4208
|
+
},
|
|
4209
|
+
{
|
|
4210
|
+
"model_format": "pytorch",
|
|
4211
|
+
"model_size_in_billions": 70,
|
|
4212
|
+
"quantizations": [
|
|
4213
|
+
"none"
|
|
4214
|
+
],
|
|
4215
|
+
"model_id": "BAAI/AquilaChat2-70B-Expr",
|
|
4216
|
+
"model_revision": "0df19b6e10f1a19ca663f7cc1141aae10f1825f4"
|
|
4217
|
+
}
|
|
4218
|
+
],
|
|
4219
|
+
"prompt_style": {
|
|
4220
|
+
"style_name": "ADD_COLON_SINGLE",
|
|
4221
|
+
"intra_message_sep": "\n",
|
|
4222
|
+
"system_prompt": "",
|
|
4223
|
+
"roles": [
|
|
4224
|
+
"USER",
|
|
4225
|
+
"ASSISTANT"
|
|
4226
|
+
],
|
|
4227
|
+
"stop_token_ids": [
|
|
4228
|
+
100006,
|
|
4229
|
+
100007
|
|
4230
|
+
],
|
|
4231
|
+
"stop": [
|
|
4232
|
+
"[CLS]",
|
|
4233
|
+
"</s>"
|
|
4234
|
+
]
|
|
4235
|
+
}
|
|
4236
|
+
},
|
|
4237
|
+
{
|
|
4238
|
+
"version": 1,
|
|
4239
|
+
"context_length": 16384,
|
|
4240
|
+
"model_name": "aquila2-chat-16k",
|
|
4241
|
+
"model_lang": [
|
|
4242
|
+
"zh"
|
|
4243
|
+
],
|
|
4244
|
+
"model_ability": [
|
|
4245
|
+
"chat"
|
|
4246
|
+
],
|
|
4247
|
+
"model_description": "AquilaChat2-16k series models are the long-text chat models",
|
|
4248
|
+
"model_specs": [
|
|
4249
|
+
{
|
|
4250
|
+
"model_format": "pytorch",
|
|
4251
|
+
"model_size_in_billions": 7,
|
|
4252
|
+
"quantizations": [
|
|
4253
|
+
"none"
|
|
4254
|
+
],
|
|
4255
|
+
"model_id": "BAAI/AquilaChat2-7B-16K",
|
|
4256
|
+
"model_revision": "fb46d48479d05086ccf6952f19018322fcbb54cd"
|
|
4257
|
+
},
|
|
4258
|
+
{
|
|
4259
|
+
"model_format": "ggufv2",
|
|
4260
|
+
"model_size_in_billions": 34,
|
|
4261
|
+
"quantizations": [
|
|
4262
|
+
"Q2_K",
|
|
4263
|
+
"Q3_K_L",
|
|
4264
|
+
"Q3_K_M",
|
|
4265
|
+
"Q3_K_S",
|
|
4266
|
+
"Q4_0",
|
|
4267
|
+
"Q4_K_M",
|
|
4268
|
+
"Q4_K_S",
|
|
4269
|
+
"Q5_0",
|
|
4270
|
+
"Q5_K_M",
|
|
4271
|
+
"Q5_K_S",
|
|
4272
|
+
"Q6_K",
|
|
4273
|
+
"Q8_0"
|
|
4274
|
+
],
|
|
4275
|
+
"model_id": "TheBloke/AquilaChat2-34B-16K-GGUF",
|
|
4276
|
+
"model_file_name_template": "aquilachat2-34b-16k.{quantization}.gguf"
|
|
4277
|
+
},
|
|
4278
|
+
{
|
|
4279
|
+
"model_format": "gptq",
|
|
4280
|
+
"model_size_in_billions": 34,
|
|
4281
|
+
"quantizations": [
|
|
4282
|
+
"Int4"
|
|
4283
|
+
],
|
|
4284
|
+
"model_id": "TheBloke/AquilaChat2-34B-16K-GPTQ",
|
|
4285
|
+
"model_revision": "0afa1c2a55a4ee1a6f0dba81d9ec296dc7936b91"
|
|
4286
|
+
},
|
|
4287
|
+
{
|
|
4288
|
+
"model_format": "awq",
|
|
4289
|
+
"model_size_in_billions": 34,
|
|
4290
|
+
"quantizations": [
|
|
4291
|
+
"Int4"
|
|
4292
|
+
],
|
|
4293
|
+
"model_id": "TheBloke/AquilaChat2-34B-16K-AWQ",
|
|
4294
|
+
"model_revision": "db7403ca492416903c84a7a38b11cb5506de48b1"
|
|
4295
|
+
},
|
|
4296
|
+
{
|
|
4297
|
+
"model_format": "pytorch",
|
|
4298
|
+
"model_size_in_billions": 34,
|
|
4299
|
+
"quantizations": [
|
|
4300
|
+
"none"
|
|
4301
|
+
],
|
|
4302
|
+
"model_id": "BAAI/AquilaChat2-34B-16K",
|
|
4303
|
+
"model_revision": "a06fd164c7170714924d2881c61c8348425ebc94"
|
|
4304
|
+
}
|
|
4305
|
+
],
|
|
4306
|
+
"prompt_style": {
|
|
4307
|
+
"style_name": "ADD_COLON_SINGLE",
|
|
4308
|
+
"intra_message_sep": "\n",
|
|
4309
|
+
"system_prompt": "",
|
|
4310
|
+
"roles": [
|
|
4311
|
+
"USER",
|
|
4312
|
+
"ASSISTANT"
|
|
4313
|
+
],
|
|
4314
|
+
"stop_token_ids": [
|
|
4315
|
+
100006,
|
|
4316
|
+
100007
|
|
4317
|
+
],
|
|
4318
|
+
"stop": [
|
|
4319
|
+
"[CLS]",
|
|
4320
|
+
"</s>"
|
|
4321
|
+
]
|
|
4322
|
+
}
|
|
4323
|
+
},
|
|
4324
|
+
{
|
|
4325
|
+
"version": 1,
|
|
4326
|
+
"context_length": 4096,
|
|
4327
|
+
"model_name": "minicpm-2b-sft-bf16",
|
|
4328
|
+
"model_lang": [
|
|
4329
|
+
"zh"
|
|
4330
|
+
],
|
|
4331
|
+
"model_ability": [
|
|
4332
|
+
"chat"
|
|
4333
|
+
],
|
|
4334
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4335
|
+
"model_specs": [
|
|
4336
|
+
{
|
|
4337
|
+
"model_format": "pytorch",
|
|
4338
|
+
"model_size_in_billions": 2,
|
|
4339
|
+
"quantizations": [
|
|
4340
|
+
"none"
|
|
4341
|
+
],
|
|
4342
|
+
"model_id": "openbmb/MiniCPM-2B-sft-bf16",
|
|
4343
|
+
"model_revision": "fe1d74027ebdd81cef5f815fa3a2d432a6b5de2a"
|
|
4344
|
+
}
|
|
4345
|
+
],
|
|
4346
|
+
"prompt_style": {
|
|
4347
|
+
"style_name": "MINICPM-2B",
|
|
4348
|
+
"system_prompt": "",
|
|
4349
|
+
"roles": [
|
|
4350
|
+
"user",
|
|
4351
|
+
"assistant"
|
|
4352
|
+
],
|
|
4353
|
+
"stop_token_ids": [
|
|
4354
|
+
1,
|
|
4355
|
+
2
|
|
4356
|
+
],
|
|
4357
|
+
"stop": [
|
|
4358
|
+
"<s>",
|
|
4359
|
+
"</s>"
|
|
4360
|
+
]
|
|
4361
|
+
}
|
|
4362
|
+
},
|
|
4363
|
+
{
|
|
4364
|
+
"version": 1,
|
|
4365
|
+
"context_length": 4096,
|
|
4366
|
+
"model_name": "minicpm-2b-sft-fp32",
|
|
4367
|
+
"model_lang": [
|
|
4368
|
+
"zh"
|
|
4369
|
+
],
|
|
4370
|
+
"model_ability": [
|
|
4371
|
+
"chat"
|
|
4372
|
+
],
|
|
4373
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4374
|
+
"model_specs": [
|
|
4375
|
+
{
|
|
4376
|
+
"model_format": "pytorch",
|
|
4377
|
+
"model_size_in_billions": 2,
|
|
4378
|
+
"quantizations": [
|
|
4379
|
+
"none"
|
|
4380
|
+
],
|
|
4381
|
+
"model_id": "openbmb/MiniCPM-2B-sft-fp32",
|
|
4382
|
+
"model_revision": "35b90dd57d977b6e5bc4907986fa5b77aa15a82e"
|
|
4383
|
+
}
|
|
4384
|
+
],
|
|
4385
|
+
"prompt_style": {
|
|
4386
|
+
"style_name": "MINICPM-2B",
|
|
4387
|
+
"system_prompt": "",
|
|
4388
|
+
"roles": [
|
|
4389
|
+
"user",
|
|
4390
|
+
"assistant"
|
|
4391
|
+
],
|
|
4392
|
+
"stop_token_ids": [
|
|
4393
|
+
1,
|
|
4394
|
+
2
|
|
4395
|
+
],
|
|
4396
|
+
"stop": [
|
|
4397
|
+
"<s>",
|
|
4398
|
+
"</s>"
|
|
4399
|
+
]
|
|
4400
|
+
}
|
|
4401
|
+
},
|
|
4402
|
+
{
|
|
4403
|
+
"version": 1,
|
|
4404
|
+
"context_length": 4096,
|
|
4405
|
+
"model_name": "minicpm-2b-dpo-bf16",
|
|
4406
|
+
"model_lang": [
|
|
4407
|
+
"zh"
|
|
4408
|
+
],
|
|
4409
|
+
"model_ability": [
|
|
4410
|
+
"chat"
|
|
4411
|
+
],
|
|
4412
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4413
|
+
"model_specs": [
|
|
4414
|
+
{
|
|
4415
|
+
"model_format": "pytorch",
|
|
4416
|
+
"model_size_in_billions": 2,
|
|
4417
|
+
"quantizations": [
|
|
4418
|
+
"none"
|
|
4419
|
+
],
|
|
4420
|
+
"model_id": "openbmb/MiniCPM-2B-dpo-bf16",
|
|
4421
|
+
"model_revision": "f4a3ba49f3f18695945c2a7c12400d4da99da498"
|
|
4422
|
+
}
|
|
4423
|
+
],
|
|
4424
|
+
"prompt_style": {
|
|
4425
|
+
"style_name": "MINICPM-2B",
|
|
4426
|
+
"system_prompt": "",
|
|
4427
|
+
"roles": [
|
|
4428
|
+
"user",
|
|
4429
|
+
"assistant"
|
|
4430
|
+
],
|
|
4431
|
+
"stop_token_ids": [
|
|
4432
|
+
1,
|
|
4433
|
+
2
|
|
4434
|
+
],
|
|
4435
|
+
"stop": [
|
|
4436
|
+
"<s>",
|
|
4437
|
+
"</s>"
|
|
4438
|
+
]
|
|
4439
|
+
}
|
|
4440
|
+
},
|
|
4441
|
+
{
|
|
4442
|
+
"version": 1,
|
|
4443
|
+
"context_length": 4096,
|
|
4444
|
+
"model_name": "minicpm-2b-dpo-fp16",
|
|
4445
|
+
"model_lang": [
|
|
4446
|
+
"zh"
|
|
4447
|
+
],
|
|
4448
|
+
"model_ability": [
|
|
4449
|
+
"chat"
|
|
4450
|
+
],
|
|
4451
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4452
|
+
"model_specs": [
|
|
4453
|
+
{
|
|
4454
|
+
"model_format": "pytorch",
|
|
4455
|
+
"model_size_in_billions": 2,
|
|
4456
|
+
"quantizations": [
|
|
4457
|
+
"none"
|
|
4458
|
+
],
|
|
4459
|
+
"model_id": "openbmb/MiniCPM-2B-dpo-fp16",
|
|
4460
|
+
"model_revision": "e7a50289e4f839674cf8d4a5a2ce032ccacf64ac"
|
|
4461
|
+
}
|
|
4462
|
+
],
|
|
4463
|
+
"prompt_style": {
|
|
4464
|
+
"style_name": "MINICPM-2B",
|
|
4465
|
+
"system_prompt": "",
|
|
4466
|
+
"roles": [
|
|
4467
|
+
"user",
|
|
4468
|
+
"assistant"
|
|
4469
|
+
],
|
|
4470
|
+
"stop_token_ids": [
|
|
4471
|
+
1,
|
|
4472
|
+
2
|
|
4473
|
+
],
|
|
4474
|
+
"stop": [
|
|
4475
|
+
"<s>",
|
|
4476
|
+
"</s>"
|
|
4477
|
+
]
|
|
4478
|
+
}
|
|
4479
|
+
},
|
|
4480
|
+
{
|
|
4481
|
+
"version": 1,
|
|
4482
|
+
"context_length": 4096,
|
|
4483
|
+
"model_name": "minicpm-2b-dpo-fp32",
|
|
4484
|
+
"model_lang": [
|
|
4485
|
+
"zh"
|
|
4486
|
+
],
|
|
4487
|
+
"model_ability": [
|
|
4488
|
+
"chat"
|
|
4489
|
+
],
|
|
4490
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4491
|
+
"model_specs": [
|
|
4492
|
+
{
|
|
4493
|
+
"model_format": "pytorch",
|
|
4494
|
+
"model_size_in_billions": 2,
|
|
4495
|
+
"quantizations": [
|
|
4496
|
+
"none"
|
|
4497
|
+
],
|
|
4498
|
+
"model_id": "openbmb/MiniCPM-2B-dpo-fp32",
|
|
4499
|
+
"model_revision": "b560a1593779b735a84a6daf72fba96ae38da288"
|
|
4500
|
+
}
|
|
4501
|
+
],
|
|
4502
|
+
"prompt_style": {
|
|
4503
|
+
"style_name": "MINICPM-2B",
|
|
4504
|
+
"system_prompt": "",
|
|
4505
|
+
"roles": [
|
|
4506
|
+
"user",
|
|
4507
|
+
"assistant"
|
|
4508
|
+
],
|
|
4509
|
+
"stop_token_ids": [
|
|
4510
|
+
1,
|
|
4511
|
+
2
|
|
4512
|
+
],
|
|
4513
|
+
"stop": [
|
|
4514
|
+
"<s>",
|
|
4515
|
+
"</s>"
|
|
4516
|
+
]
|
|
4517
|
+
}
|
|
3817
4518
|
}
|
|
3818
4519
|
]
|