xinference 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (64) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +16 -11
  5. xinference/client/restful/restful_client.py +12 -2
  6. xinference/conftest.py +13 -2
  7. xinference/constants.py +2 -0
  8. xinference/core/supervisor.py +32 -1
  9. xinference/core/worker.py +139 -20
  10. xinference/deploy/cmdline.py +119 -20
  11. xinference/model/llm/__init__.py +6 -0
  12. xinference/model/llm/llm_family.json +711 -10
  13. xinference/model/llm/llm_family_modelscope.json +557 -7
  14. xinference/model/llm/pytorch/chatglm.py +2 -1
  15. xinference/model/llm/pytorch/core.py +2 -0
  16. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  17. xinference/model/llm/pytorch/internlm2.py +2 -1
  18. xinference/model/llm/pytorch/omnilmm.py +153 -0
  19. xinference/model/llm/sglang/__init__.py +13 -0
  20. xinference/model/llm/sglang/core.py +365 -0
  21. xinference/model/llm/utils.py +46 -13
  22. xinference/model/llm/vllm/core.py +10 -0
  23. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  24. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  25. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  26. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  27. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  28. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  29. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  30. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  31. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  32. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  33. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  34. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  35. xinference/thirdparty/omnilmm/__init__.py +0 -0
  36. xinference/thirdparty/omnilmm/chat.py +216 -0
  37. xinference/thirdparty/omnilmm/constants.py +4 -0
  38. xinference/thirdparty/omnilmm/conversation.py +332 -0
  39. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  40. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  41. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  42. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  43. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  44. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  45. xinference/thirdparty/omnilmm/utils.py +134 -0
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.98516614.js +3 -0
  49. xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
  54. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/METADATA +21 -5
  55. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/RECORD +60 -31
  56. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  57. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  60. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
  61. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
  62. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
  63. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
  64. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
@@ -98,6 +98,72 @@
98
98
  ]
99
99
  }
100
100
  },
101
+ {
102
+ "version": 1,
103
+ "context_length": 8194,
104
+ "model_name": "codeshell",
105
+ "model_lang": [
106
+ "en",
107
+ "zh"
108
+ ],
109
+ "model_ability": [
110
+ "generate"
111
+ ],
112
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
113
+ "model_specs": [
114
+ {
115
+ "model_format": "pytorch",
116
+ "model_size_in_billions": 7,
117
+ "quantizations": [
118
+ "none"
119
+ ],
120
+ "model_id": "WisdomShell/CodeShell-7B",
121
+ "model_revision": "1c79ab7fd316a62ab41d764facd3548a23fa5dee"
122
+ }
123
+ ]
124
+ },
125
+ {
126
+ "version": 1,
127
+ "context_length": 8194,
128
+ "model_name": "codeshell-chat",
129
+ "model_lang": [
130
+ "en",
131
+ "zh"
132
+ ],
133
+ "model_ability": [
134
+ "chat"
135
+ ],
136
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
137
+ "model_specs": [
138
+ {
139
+ "model_format": "pytorch",
140
+ "model_size_in_billions": 7,
141
+ "quantizations": [
142
+ "none"
143
+ ],
144
+ "model_id": "WisdomShell/CodeShell-7B-Chat",
145
+ "model_revision": "3cb06f589b7b1e2f8e728c77280b1114191d24de"
146
+ }
147
+ ],
148
+ "prompt_style": {
149
+ "style_name": "CodeShell",
150
+ "system_prompt": "",
151
+ "roles": [
152
+ "## human:",
153
+ "## assistant: "
154
+ ],
155
+ "intra_message_sep": "",
156
+ "inter_message_sep": "",
157
+ "stop_token_ids": [
158
+ 70000
159
+ ],
160
+ "stop": [
161
+ "<|endoftext|>",
162
+ "|||",
163
+ "|<end>|"
164
+ ]
165
+ }
166
+ },
101
167
  {
102
168
  "version": 1,
103
169
  "context_length": 2048,
@@ -573,7 +639,7 @@
573
639
  64797,
574
640
  2
575
641
  ],
576
- "stop":[
642
+ "stop": [
577
643
  "<|user|>",
578
644
  "<|observation|>"
579
645
  ]
@@ -616,7 +682,50 @@
616
682
  64797,
617
683
  2
618
684
  ],
619
- "stop":[
685
+ "stop": [
686
+ "<|user|>",
687
+ "<|observation|>"
688
+ ]
689
+ }
690
+ },
691
+ {
692
+ "version": 1,
693
+ "context_length": 131072,
694
+ "model_name": "chatglm3-128k",
695
+ "model_lang": [
696
+ "en",
697
+ "zh"
698
+ ],
699
+ "model_ability": [
700
+ "chat"
701
+ ],
702
+ "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
703
+ "model_specs": [
704
+ {
705
+ "model_format": "pytorch",
706
+ "model_size_in_billions": 6,
707
+ "quantizations": [
708
+ "4-bit",
709
+ "8-bit",
710
+ "none"
711
+ ],
712
+ "model_id": "THUDM/chatglm3-6b-128k",
713
+ "model_revision": "f0afbe671009abc9e31182170cf60636d5546cda"
714
+ }
715
+ ],
716
+ "prompt_style": {
717
+ "style_name": "CHATGLM3",
718
+ "system_prompt": "",
719
+ "roles": [
720
+ "user",
721
+ "assistant"
722
+ ],
723
+ "stop_token_ids": [
724
+ 64795,
725
+ 64797,
726
+ 2
727
+ ],
728
+ "stop": [
620
729
  "<|user|>",
621
730
  "<|observation|>"
622
731
  ]
@@ -667,7 +776,6 @@
667
776
  ]
668
777
  }
669
778
  },
670
-
671
779
  {
672
780
  "version": 1,
673
781
  "context_length": 2048,
@@ -715,8 +823,7 @@
715
823
  "model_revision": "7f1b7394f74c630f50612a19ba90bd021c373989"
716
824
  }
717
825
  ]
718
- }
719
- ,
826
+ },
720
827
  {
721
828
  "version": 1,
722
829
  "context_length": 4096,
@@ -1606,7 +1713,10 @@
1606
1713
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
1607
1714
  "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
1608
1715
  "quantization_parts": {
1609
- "q4_k_m": ["a", "b"]
1716
+ "q4_k_m": [
1717
+ "a",
1718
+ "b"
1719
+ ]
1610
1720
  }
1611
1721
  }
1612
1722
  ],
@@ -2658,7 +2768,11 @@
2658
2768
  "context_length": 32768,
2659
2769
  "model_name": "mixtral-v0.1",
2660
2770
  "model_lang": [
2661
- "en", "fr", "it", "de", "es"
2771
+ "en",
2772
+ "fr",
2773
+ "it",
2774
+ "de",
2775
+ "es"
2662
2776
  ],
2663
2777
  "model_ability": [
2664
2778
  "generate"
@@ -2699,7 +2813,11 @@
2699
2813
  "context_length": 32768,
2700
2814
  "model_name": "mixtral-instruct-v0.1",
2701
2815
  "model_lang": [
2702
- "en", "fr", "it", "de", "es"
2816
+ "en",
2817
+ "fr",
2818
+ "it",
2819
+ "de",
2820
+ "es"
2703
2821
  ],
2704
2822
  "model_ability": [
2705
2823
  "chat"
@@ -3275,9 +3393,107 @@
3275
3393
  ],
3276
3394
  "intra_message_sep": "\n",
3277
3395
  "inter_message_sep": "\n",
3278
- "stop_token_ids": [
3396
+ "stop_token_ids": [],
3397
+ "stop": []
3398
+ }
3399
+ },
3400
+ {
3401
+ "version": 1,
3402
+ "context_length": 4096,
3403
+ "model_name": "gorilla-openfunctions-v2",
3404
+ "model_lang": [
3405
+ "en"
3406
+ ],
3407
+ "model_ability": [
3408
+ "chat"
3409
+ ],
3410
+ "model_description": "OpenFunctions is designed to extend Large Language Model (LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.",
3411
+ "model_specs": [
3412
+ {
3413
+ "model_format": "pytorch",
3414
+ "model_size_in_billions": 7,
3415
+ "quantizations": [
3416
+ "none"
3417
+ ],
3418
+ "model_id": "gorilla-llm/gorilla-openfunctions-v2",
3419
+ "model_revision": "0f91d705e64b77fb55e35a7eab5d03bf965c9b5c"
3420
+ },
3421
+ {
3422
+ "model_format": "ggufv2",
3423
+ "model_size_in_billions": 7,
3424
+ "quantizations": [
3425
+ "Q2_K",
3426
+ "Q3_K_L",
3427
+ "Q3_K_M",
3428
+ "Q3_K_S",
3429
+ "Q4_0",
3430
+ "Q4_K_M",
3431
+ "Q4_K_S",
3432
+ "Q5_K_M",
3433
+ "Q5_K_S",
3434
+ "Q6_K"
3435
+ ],
3436
+ "model_id": "gorilla-llm//gorilla-openfunctions-v2-GGUF",
3437
+ "model_file_name_template": "gorilla-openfunctions-v2.{quantization}.gguf"
3438
+ }
3439
+ ],
3440
+ "prompt_style": {
3441
+ "style_name": "GORILLA_OPENFUNCTIONS",
3442
+ "system_prompt": "",
3443
+ "roles": [
3444
+ "",
3445
+ ""
3446
+ ],
3447
+ "intra_message_sep": "\n",
3448
+ "inter_message_sep": "\n",
3449
+ "stop_token_ids": [],
3450
+ "stop": []
3451
+ }
3452
+ },
3453
+ {
3454
+ "version": 1,
3455
+ "context_length": 4096,
3456
+ "model_name": "deepseek-vl-chat",
3457
+ "model_lang": [
3458
+ "en",
3459
+ "zh"
3460
+ ],
3461
+ "model_ability": [
3462
+ "chat",
3463
+ "vision"
3464
+ ],
3465
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
3466
+ "model_specs": [
3467
+ {
3468
+ "model_format": "pytorch",
3469
+ "model_size_in_billions": "1_3",
3470
+ "quantizations": [
3471
+ "none"
3472
+ ],
3473
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
3474
+ "model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
3475
+ },
3476
+ {
3477
+ "model_format": "pytorch",
3478
+ "model_size_in_billions": 7,
3479
+ "quantizations": [
3480
+ "none"
3481
+ ],
3482
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
3483
+ "model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
3484
+ }
3485
+ ],
3486
+ "prompt_style": {
3487
+ "style_name": "DEEPSEEK_CHAT",
3488
+ "system_prompt": "<|begin▁of▁sentence|>",
3489
+ "roles": [
3490
+ "User",
3491
+ "Assistant"
3279
3492
  ],
3493
+ "intra_message_sep": "\n\n",
3494
+ "inter_message_sep": "<|end▁of▁sentence|>",
3280
3495
  "stop": [
3496
+ "<|end▁of▁sentence|>"
3281
3497
  ]
3282
3498
  }
3283
3499
  },
@@ -3376,7 +3592,8 @@
3376
3592
  "context_length": 4096,
3377
3593
  "model_name": "deepseek-coder-instruct",
3378
3594
  "model_lang": [
3379
- "en", "zh"
3595
+ "en",
3596
+ "zh"
3380
3597
  ],
3381
3598
  "model_ability": [
3382
3599
  "chat"
@@ -3588,6 +3805,48 @@
3588
3805
  ]
3589
3806
  }
3590
3807
  },
3808
+ {
3809
+ "version":1,
3810
+ "context_length":2048,
3811
+ "model_name":"OmniLMM",
3812
+ "model_lang":[
3813
+ "en",
3814
+ "zh"
3815
+ ],
3816
+ "model_ability":[
3817
+ "chat",
3818
+ "vision"
3819
+ ],
3820
+ "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
3821
+ "model_specs":[
3822
+ {
3823
+ "model_format":"pytorch",
3824
+ "model_size_in_billions":3,
3825
+ "quantizations":[
3826
+ "none"
3827
+ ],
3828
+ "model_id":"openbmb/MiniCPM-V",
3829
+ "model_revision":"bec7d1cd1c9e804c064ec291163e40624825eaaa"
3830
+ },
3831
+ {
3832
+ "model_format":"pytorch",
3833
+ "model_size_in_billions":12,
3834
+ "quantizations":[
3835
+ "none"
3836
+ ],
3837
+ "model_id":"openbmb/OmniLMM-12B",
3838
+ "model_revision":"ef62bae5af34be653b9801037cd613e05ab24fdc"
3839
+ }
3840
+ ],
3841
+ "prompt_style":{
3842
+ "style_name":"OmniLMM",
3843
+ "system_prompt":"The role of first msg should be user",
3844
+ "roles":[
3845
+ "user",
3846
+ "assistant"
3847
+ ]
3848
+ }
3849
+ },
3591
3850
  {
3592
3851
  "version": 1,
3593
3852
  "context_length": 4096,
@@ -3814,5 +4073,447 @@
3814
4073
  "<start_of_turn>"
3815
4074
  ]
3816
4075
  }
4076
+ },
4077
+ {
4078
+ "version": 1,
4079
+ "context_length": 4096,
4080
+ "model_name": "platypus2-70b-instruct",
4081
+ "model_lang": [
4082
+ "en"
4083
+ ],
4084
+ "model_ability": [
4085
+ "generate"
4086
+ ],
4087
+ "model_description": "Platypus-70B-instruct is a merge of garage-bAInd/Platypus2-70B and upstage/Llama-2-70b-instruct-v2.",
4088
+ "model_specs": [
4089
+ {
4090
+ "model_format": "pytorch",
4091
+ "model_size_in_billions": 70,
4092
+ "quantizations": [
4093
+ "none"
4094
+ ],
4095
+ "model_id": "garage-bAInd/Platypus2-70B-instruct",
4096
+ "model_revision": "31389b50953688e4e542be53e6d2ab04d5c34e87"
4097
+ }
4098
+ ]
4099
+ },
4100
+ {
4101
+ "version": 1,
4102
+ "context_length": 2048,
4103
+ "model_name": "aquila2",
4104
+ "model_lang": [
4105
+ "zh"
4106
+ ],
4107
+ "model_ability": [
4108
+ "generate"
4109
+ ],
4110
+ "model_description": "Aquila2 series models are the base language models",
4111
+ "model_specs": [
4112
+ {
4113
+ "model_format": "pytorch",
4114
+ "model_size_in_billions": 7,
4115
+ "quantizations": [
4116
+ "none"
4117
+ ],
4118
+ "model_id": "BAAI/Aquila2-7B",
4119
+ "model_revision": "9c76e143c6e9621689ca76e078c465b0dee75eb8"
4120
+ },
4121
+ {
4122
+ "model_format": "pytorch",
4123
+ "model_size_in_billions": 34,
4124
+ "quantizations": [
4125
+ "none"
4126
+ ],
4127
+ "model_id": "BAAI/Aquila2-34B",
4128
+ "model_revision": "356733caf6221e9dd898cde8ff189a98175526ec"
4129
+ },
4130
+ {
4131
+ "model_format": "pytorch",
4132
+ "model_size_in_billions": 70,
4133
+ "quantizations": [
4134
+ "none"
4135
+ ],
4136
+ "model_id": "BAAI/Aquila2-70B-Expr",
4137
+ "model_revision": "32a2897235541b9f5238bbe88f8d76a19993c0ba"
4138
+ }
4139
+ ]
4140
+ },
4141
+ {
4142
+ "version": 1,
4143
+ "context_length": 2048,
4144
+ "model_name": "aquila2-chat",
4145
+ "model_lang": [
4146
+ "zh"
4147
+ ],
4148
+ "model_ability": [
4149
+ "chat"
4150
+ ],
4151
+ "model_description": "Aquila2-chat series models are the chat models",
4152
+ "model_specs": [
4153
+ {
4154
+ "model_format": "pytorch",
4155
+ "model_size_in_billions": 7,
4156
+ "quantizations": [
4157
+ "none"
4158
+ ],
4159
+ "model_id": "BAAI/AquilaChat2-7B",
4160
+ "model_revision": "0d060c4edeb4e0febd81130c17f6868653184fb3"
4161
+ },
4162
+ {
4163
+ "model_format": "ggufv2",
4164
+ "model_size_in_billions": 34,
4165
+ "quantizations": [
4166
+ "Q2_K",
4167
+ "Q3_K_L",
4168
+ "Q3_K_M",
4169
+ "Q3_K_S",
4170
+ "Q4_0",
4171
+ "Q4_K_M",
4172
+ "Q4_K_S",
4173
+ "Q5_0",
4174
+ "Q5_K_M",
4175
+ "Q5_K_S",
4176
+ "Q6_K",
4177
+ "Q8_0"
4178
+ ],
4179
+ "model_id": "TheBloke/AquilaChat2-34B-GGUF",
4180
+ "model_file_name_template": "aquilachat2-34b.{quantization}.gguf"
4181
+ },
4182
+ {
4183
+ "model_format": "gptq",
4184
+ "model_size_in_billions": 34,
4185
+ "quantizations": [
4186
+ "Int4"
4187
+ ],
4188
+ "model_id": "TheBloke/AquilaChat2-34B-GPTQ",
4189
+ "model_revision": "9a9d21424f7db608be51df769885514ab6e052db"
4190
+ },
4191
+ {
4192
+ "model_format": "awq",
4193
+ "model_size_in_billions": "34",
4194
+ "quantizations": [
4195
+ "Int4"
4196
+ ],
4197
+ "model_id": "TheBloke/AquilaChat2-34B-AWQ",
4198
+ "model_revision": "ad1dec1c8adb7fa6cb07b7e261aaa04fccf1c4c0"
4199
+ },
4200
+ {
4201
+ "model_format": "pytorch",
4202
+ "model_size_in_billions": 34,
4203
+ "quantizations": [
4204
+ "none"
4205
+ ],
4206
+ "model_id": "BAAI/AquilaChat2-34B",
4207
+ "model_revision": "b9cd9c7436435ab9cfa5e4f009be2b0354979ca8"
4208
+ },
4209
+ {
4210
+ "model_format": "pytorch",
4211
+ "model_size_in_billions": 70,
4212
+ "quantizations": [
4213
+ "none"
4214
+ ],
4215
+ "model_id": "BAAI/AquilaChat2-70B-Expr",
4216
+ "model_revision": "0df19b6e10f1a19ca663f7cc1141aae10f1825f4"
4217
+ }
4218
+ ],
4219
+ "prompt_style": {
4220
+ "style_name": "ADD_COLON_SINGLE",
4221
+ "intra_message_sep": "\n",
4222
+ "system_prompt": "",
4223
+ "roles": [
4224
+ "USER",
4225
+ "ASSISTANT"
4226
+ ],
4227
+ "stop_token_ids": [
4228
+ 100006,
4229
+ 100007
4230
+ ],
4231
+ "stop": [
4232
+ "[CLS]",
4233
+ "</s>"
4234
+ ]
4235
+ }
4236
+ },
4237
+ {
4238
+ "version": 1,
4239
+ "context_length": 16384,
4240
+ "model_name": "aquila2-chat-16k",
4241
+ "model_lang": [
4242
+ "zh"
4243
+ ],
4244
+ "model_ability": [
4245
+ "chat"
4246
+ ],
4247
+ "model_description": "AquilaChat2-16k series models are the long-text chat models",
4248
+ "model_specs": [
4249
+ {
4250
+ "model_format": "pytorch",
4251
+ "model_size_in_billions": 7,
4252
+ "quantizations": [
4253
+ "none"
4254
+ ],
4255
+ "model_id": "BAAI/AquilaChat2-7B-16K",
4256
+ "model_revision": "fb46d48479d05086ccf6952f19018322fcbb54cd"
4257
+ },
4258
+ {
4259
+ "model_format": "ggufv2",
4260
+ "model_size_in_billions": 34,
4261
+ "quantizations": [
4262
+ "Q2_K",
4263
+ "Q3_K_L",
4264
+ "Q3_K_M",
4265
+ "Q3_K_S",
4266
+ "Q4_0",
4267
+ "Q4_K_M",
4268
+ "Q4_K_S",
4269
+ "Q5_0",
4270
+ "Q5_K_M",
4271
+ "Q5_K_S",
4272
+ "Q6_K",
4273
+ "Q8_0"
4274
+ ],
4275
+ "model_id": "TheBloke/AquilaChat2-34B-16K-GGUF",
4276
+ "model_file_name_template": "aquilachat2-34b-16k.{quantization}.gguf"
4277
+ },
4278
+ {
4279
+ "model_format": "gptq",
4280
+ "model_size_in_billions": 34,
4281
+ "quantizations": [
4282
+ "Int4"
4283
+ ],
4284
+ "model_id": "TheBloke/AquilaChat2-34B-16K-GPTQ",
4285
+ "model_revision": "0afa1c2a55a4ee1a6f0dba81d9ec296dc7936b91"
4286
+ },
4287
+ {
4288
+ "model_format": "awq",
4289
+ "model_size_in_billions": 34,
4290
+ "quantizations": [
4291
+ "Int4"
4292
+ ],
4293
+ "model_id": "TheBloke/AquilaChat2-34B-16K-AWQ",
4294
+ "model_revision": "db7403ca492416903c84a7a38b11cb5506de48b1"
4295
+ },
4296
+ {
4297
+ "model_format": "pytorch",
4298
+ "model_size_in_billions": 34,
4299
+ "quantizations": [
4300
+ "none"
4301
+ ],
4302
+ "model_id": "BAAI/AquilaChat2-34B-16K",
4303
+ "model_revision": "a06fd164c7170714924d2881c61c8348425ebc94"
4304
+ }
4305
+ ],
4306
+ "prompt_style": {
4307
+ "style_name": "ADD_COLON_SINGLE",
4308
+ "intra_message_sep": "\n",
4309
+ "system_prompt": "",
4310
+ "roles": [
4311
+ "USER",
4312
+ "ASSISTANT"
4313
+ ],
4314
+ "stop_token_ids": [
4315
+ 100006,
4316
+ 100007
4317
+ ],
4318
+ "stop": [
4319
+ "[CLS]",
4320
+ "</s>"
4321
+ ]
4322
+ }
4323
+ },
4324
+ {
4325
+ "version": 1,
4326
+ "context_length": 4096,
4327
+ "model_name": "minicpm-2b-sft-bf16",
4328
+ "model_lang": [
4329
+ "zh"
4330
+ ],
4331
+ "model_ability": [
4332
+ "chat"
4333
+ ],
4334
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4335
+ "model_specs": [
4336
+ {
4337
+ "model_format": "pytorch",
4338
+ "model_size_in_billions": 2,
4339
+ "quantizations": [
4340
+ "none"
4341
+ ],
4342
+ "model_id": "openbmb/MiniCPM-2B-sft-bf16",
4343
+ "model_revision": "fe1d74027ebdd81cef5f815fa3a2d432a6b5de2a"
4344
+ }
4345
+ ],
4346
+ "prompt_style": {
4347
+ "style_name": "MINICPM-2B",
4348
+ "system_prompt": "",
4349
+ "roles": [
4350
+ "user",
4351
+ "assistant"
4352
+ ],
4353
+ "stop_token_ids": [
4354
+ 1,
4355
+ 2
4356
+ ],
4357
+ "stop": [
4358
+ "<s>",
4359
+ "</s>"
4360
+ ]
4361
+ }
4362
+ },
4363
+ {
4364
+ "version": 1,
4365
+ "context_length": 4096,
4366
+ "model_name": "minicpm-2b-sft-fp32",
4367
+ "model_lang": [
4368
+ "zh"
4369
+ ],
4370
+ "model_ability": [
4371
+ "chat"
4372
+ ],
4373
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4374
+ "model_specs": [
4375
+ {
4376
+ "model_format": "pytorch",
4377
+ "model_size_in_billions": 2,
4378
+ "quantizations": [
4379
+ "none"
4380
+ ],
4381
+ "model_id": "openbmb/MiniCPM-2B-sft-fp32",
4382
+ "model_revision": "35b90dd57d977b6e5bc4907986fa5b77aa15a82e"
4383
+ }
4384
+ ],
4385
+ "prompt_style": {
4386
+ "style_name": "MINICPM-2B",
4387
+ "system_prompt": "",
4388
+ "roles": [
4389
+ "user",
4390
+ "assistant"
4391
+ ],
4392
+ "stop_token_ids": [
4393
+ 1,
4394
+ 2
4395
+ ],
4396
+ "stop": [
4397
+ "<s>",
4398
+ "</s>"
4399
+ ]
4400
+ }
4401
+ },
4402
+ {
4403
+ "version": 1,
4404
+ "context_length": 4096,
4405
+ "model_name": "minicpm-2b-dpo-bf16",
4406
+ "model_lang": [
4407
+ "zh"
4408
+ ],
4409
+ "model_ability": [
4410
+ "chat"
4411
+ ],
4412
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4413
+ "model_specs": [
4414
+ {
4415
+ "model_format": "pytorch",
4416
+ "model_size_in_billions": 2,
4417
+ "quantizations": [
4418
+ "none"
4419
+ ],
4420
+ "model_id": "openbmb/MiniCPM-2B-dpo-bf16",
4421
+ "model_revision": "f4a3ba49f3f18695945c2a7c12400d4da99da498"
4422
+ }
4423
+ ],
4424
+ "prompt_style": {
4425
+ "style_name": "MINICPM-2B",
4426
+ "system_prompt": "",
4427
+ "roles": [
4428
+ "user",
4429
+ "assistant"
4430
+ ],
4431
+ "stop_token_ids": [
4432
+ 1,
4433
+ 2
4434
+ ],
4435
+ "stop": [
4436
+ "<s>",
4437
+ "</s>"
4438
+ ]
4439
+ }
4440
+ },
4441
+ {
4442
+ "version": 1,
4443
+ "context_length": 4096,
4444
+ "model_name": "minicpm-2b-dpo-fp16",
4445
+ "model_lang": [
4446
+ "zh"
4447
+ ],
4448
+ "model_ability": [
4449
+ "chat"
4450
+ ],
4451
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4452
+ "model_specs": [
4453
+ {
4454
+ "model_format": "pytorch",
4455
+ "model_size_in_billions": 2,
4456
+ "quantizations": [
4457
+ "none"
4458
+ ],
4459
+ "model_id": "openbmb/MiniCPM-2B-dpo-fp16",
4460
+ "model_revision": "e7a50289e4f839674cf8d4a5a2ce032ccacf64ac"
4461
+ }
4462
+ ],
4463
+ "prompt_style": {
4464
+ "style_name": "MINICPM-2B",
4465
+ "system_prompt": "",
4466
+ "roles": [
4467
+ "user",
4468
+ "assistant"
4469
+ ],
4470
+ "stop_token_ids": [
4471
+ 1,
4472
+ 2
4473
+ ],
4474
+ "stop": [
4475
+ "<s>",
4476
+ "</s>"
4477
+ ]
4478
+ }
4479
+ },
4480
+ {
4481
+ "version": 1,
4482
+ "context_length": 4096,
4483
+ "model_name": "minicpm-2b-dpo-fp32",
4484
+ "model_lang": [
4485
+ "zh"
4486
+ ],
4487
+ "model_ability": [
4488
+ "chat"
4489
+ ],
4490
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4491
+ "model_specs": [
4492
+ {
4493
+ "model_format": "pytorch",
4494
+ "model_size_in_billions": 2,
4495
+ "quantizations": [
4496
+ "none"
4497
+ ],
4498
+ "model_id": "openbmb/MiniCPM-2B-dpo-fp32",
4499
+ "model_revision": "b560a1593779b735a84a6daf72fba96ae38da288"
4500
+ }
4501
+ ],
4502
+ "prompt_style": {
4503
+ "style_name": "MINICPM-2B",
4504
+ "system_prompt": "",
4505
+ "roles": [
4506
+ "user",
4507
+ "assistant"
4508
+ ],
4509
+ "stop_token_ids": [
4510
+ 1,
4511
+ 2
4512
+ ],
4513
+ "stop": [
4514
+ "<s>",
4515
+ "</s>"
4516
+ ]
4517
+ }
3817
4518
  }
3818
4519
  ]