xinference 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (59) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +9 -1
  5. xinference/client/restful/restful_client.py +12 -2
  6. xinference/conftest.py +13 -2
  7. xinference/core/supervisor.py +32 -1
  8. xinference/core/worker.py +139 -20
  9. xinference/deploy/cmdline.py +119 -20
  10. xinference/model/llm/__init__.py +4 -0
  11. xinference/model/llm/llm_family.json +627 -0
  12. xinference/model/llm/llm_family_modelscope.json +471 -0
  13. xinference/model/llm/pytorch/core.py +2 -0
  14. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  15. xinference/model/llm/pytorch/omnilmm.py +153 -0
  16. xinference/model/llm/utils.py +11 -1
  17. xinference/model/llm/vllm/core.py +3 -0
  18. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  19. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  20. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  21. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  22. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  23. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  24. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  25. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  26. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  27. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  28. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  29. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  30. xinference/thirdparty/omnilmm/__init__.py +0 -0
  31. xinference/thirdparty/omnilmm/chat.py +216 -0
  32. xinference/thirdparty/omnilmm/constants.py +4 -0
  33. xinference/thirdparty/omnilmm/conversation.py +332 -0
  34. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  35. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  36. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  37. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  38. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  39. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  40. xinference/thirdparty/omnilmm/utils.py +134 -0
  41. xinference/web/ui/build/asset-manifest.json +3 -3
  42. xinference/web/ui/build/index.html +1 -1
  43. xinference/web/ui/build/static/js/main.98516614.js +3 -0
  44. xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
  49. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/METADATA +18 -5
  50. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/RECORD +55 -28
  51. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  52. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  53. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  54. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  55. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
  56. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
  57. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
  58. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
  59. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
@@ -688,6 +688,49 @@
688
688
  ]
689
689
  }
690
690
  },
691
+ {
692
+ "version": 1,
693
+ "context_length": 131072,
694
+ "model_name": "chatglm3-128k",
695
+ "model_lang": [
696
+ "en",
697
+ "zh"
698
+ ],
699
+ "model_ability": [
700
+ "chat"
701
+ ],
702
+ "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
703
+ "model_specs": [
704
+ {
705
+ "model_format": "pytorch",
706
+ "model_size_in_billions": 6,
707
+ "quantizations": [
708
+ "4-bit",
709
+ "8-bit",
710
+ "none"
711
+ ],
712
+ "model_id": "THUDM/chatglm3-6b-128k",
713
+ "model_revision": "f0afbe671009abc9e31182170cf60636d5546cda"
714
+ }
715
+ ],
716
+ "prompt_style": {
717
+ "style_name": "CHATGLM3",
718
+ "system_prompt": "",
719
+ "roles": [
720
+ "user",
721
+ "assistant"
722
+ ],
723
+ "stop_token_ids": [
724
+ 64795,
725
+ 64797,
726
+ 2
727
+ ],
728
+ "stop": [
729
+ "<|user|>",
730
+ "<|observation|>"
731
+ ]
732
+ }
733
+ },
691
734
  {
692
735
  "version": 1,
693
736
  "context_length": 2048,
@@ -3354,6 +3397,106 @@
3354
3397
  "stop": []
3355
3398
  }
3356
3399
  },
3400
+ {
3401
+ "version": 1,
3402
+ "context_length": 4096,
3403
+ "model_name": "gorilla-openfunctions-v2",
3404
+ "model_lang": [
3405
+ "en"
3406
+ ],
3407
+ "model_ability": [
3408
+ "chat"
3409
+ ],
3410
+ "model_description": "OpenFunctions is designed to extend Large Language Model (LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.",
3411
+ "model_specs": [
3412
+ {
3413
+ "model_format": "pytorch",
3414
+ "model_size_in_billions": 7,
3415
+ "quantizations": [
3416
+ "none"
3417
+ ],
3418
+ "model_id": "gorilla-llm/gorilla-openfunctions-v2",
3419
+ "model_revision": "0f91d705e64b77fb55e35a7eab5d03bf965c9b5c"
3420
+ },
3421
+ {
3422
+ "model_format": "ggufv2",
3423
+ "model_size_in_billions": 7,
3424
+ "quantizations": [
3425
+ "Q2_K",
3426
+ "Q3_K_L",
3427
+ "Q3_K_M",
3428
+ "Q3_K_S",
3429
+ "Q4_0",
3430
+ "Q4_K_M",
3431
+ "Q4_K_S",
3432
+ "Q5_K_M",
3433
+ "Q5_K_S",
3434
+ "Q6_K"
3435
+ ],
3436
+ "model_id": "gorilla-llm//gorilla-openfunctions-v2-GGUF",
3437
+ "model_file_name_template": "gorilla-openfunctions-v2.{quantization}.gguf"
3438
+ }
3439
+ ],
3440
+ "prompt_style": {
3441
+ "style_name": "GORILLA_OPENFUNCTIONS",
3442
+ "system_prompt": "",
3443
+ "roles": [
3444
+ "",
3445
+ ""
3446
+ ],
3447
+ "intra_message_sep": "\n",
3448
+ "inter_message_sep": "\n",
3449
+ "stop_token_ids": [],
3450
+ "stop": []
3451
+ }
3452
+ },
3453
+ {
3454
+ "version": 1,
3455
+ "context_length": 4096,
3456
+ "model_name": "deepseek-vl-chat",
3457
+ "model_lang": [
3458
+ "en",
3459
+ "zh"
3460
+ ],
3461
+ "model_ability": [
3462
+ "chat",
3463
+ "vision"
3464
+ ],
3465
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
3466
+ "model_specs": [
3467
+ {
3468
+ "model_format": "pytorch",
3469
+ "model_size_in_billions": "1_3",
3470
+ "quantizations": [
3471
+ "none"
3472
+ ],
3473
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
3474
+ "model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
3475
+ },
3476
+ {
3477
+ "model_format": "pytorch",
3478
+ "model_size_in_billions": 7,
3479
+ "quantizations": [
3480
+ "none"
3481
+ ],
3482
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
3483
+ "model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
3484
+ }
3485
+ ],
3486
+ "prompt_style": {
3487
+ "style_name": "DEEPSEEK_CHAT",
3488
+ "system_prompt": "<|begin▁of▁sentence|>",
3489
+ "roles": [
3490
+ "User",
3491
+ "Assistant"
3492
+ ],
3493
+ "intra_message_sep": "\n\n",
3494
+ "inter_message_sep": "<|end▁of▁sentence|>",
3495
+ "stop": [
3496
+ "<|end▁of▁sentence|>"
3497
+ ]
3498
+ }
3499
+ },
3357
3500
  {
3358
3501
  "version": 1,
3359
3502
  "context_length": 4096,
@@ -3662,6 +3805,48 @@
3662
3805
  ]
3663
3806
  }
3664
3807
  },
3808
+ {
3809
+ "version":1,
3810
+ "context_length":2048,
3811
+ "model_name":"OmniLMM",
3812
+ "model_lang":[
3813
+ "en",
3814
+ "zh"
3815
+ ],
3816
+ "model_ability":[
3817
+ "chat",
3818
+ "vision"
3819
+ ],
3820
+ "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
3821
+ "model_specs":[
3822
+ {
3823
+ "model_format":"pytorch",
3824
+ "model_size_in_billions":3,
3825
+ "quantizations":[
3826
+ "none"
3827
+ ],
3828
+ "model_id":"openbmb/MiniCPM-V",
3829
+ "model_revision":"bec7d1cd1c9e804c064ec291163e40624825eaaa"
3830
+ },
3831
+ {
3832
+ "model_format":"pytorch",
3833
+ "model_size_in_billions":12,
3834
+ "quantizations":[
3835
+ "none"
3836
+ ],
3837
+ "model_id":"openbmb/OmniLMM-12B",
3838
+ "model_revision":"ef62bae5af34be653b9801037cd613e05ab24fdc"
3839
+ }
3840
+ ],
3841
+ "prompt_style":{
3842
+ "style_name":"OmniLMM",
3843
+ "system_prompt":"The role of first msg should be user",
3844
+ "roles":[
3845
+ "user",
3846
+ "assistant"
3847
+ ]
3848
+ }
3849
+ },
3665
3850
  {
3666
3851
  "version": 1,
3667
3852
  "context_length": 4096,
@@ -3888,5 +4073,447 @@
3888
4073
  "<start_of_turn>"
3889
4074
  ]
3890
4075
  }
4076
+ },
4077
+ {
4078
+ "version": 1,
4079
+ "context_length": 4096,
4080
+ "model_name": "platypus2-70b-instruct",
4081
+ "model_lang": [
4082
+ "en"
4083
+ ],
4084
+ "model_ability": [
4085
+ "generate"
4086
+ ],
4087
+ "model_description": "Platypus-70B-instruct is a merge of garage-bAInd/Platypus2-70B and upstage/Llama-2-70b-instruct-v2.",
4088
+ "model_specs": [
4089
+ {
4090
+ "model_format": "pytorch",
4091
+ "model_size_in_billions": 70,
4092
+ "quantizations": [
4093
+ "none"
4094
+ ],
4095
+ "model_id": "garage-bAInd/Platypus2-70B-instruct",
4096
+ "model_revision": "31389b50953688e4e542be53e6d2ab04d5c34e87"
4097
+ }
4098
+ ]
4099
+ },
4100
+ {
4101
+ "version": 1,
4102
+ "context_length": 2048,
4103
+ "model_name": "aquila2",
4104
+ "model_lang": [
4105
+ "zh"
4106
+ ],
4107
+ "model_ability": [
4108
+ "generate"
4109
+ ],
4110
+ "model_description": "Aquila2 series models are the base language models",
4111
+ "model_specs": [
4112
+ {
4113
+ "model_format": "pytorch",
4114
+ "model_size_in_billions": 7,
4115
+ "quantizations": [
4116
+ "none"
4117
+ ],
4118
+ "model_id": "BAAI/Aquila2-7B",
4119
+ "model_revision": "9c76e143c6e9621689ca76e078c465b0dee75eb8"
4120
+ },
4121
+ {
4122
+ "model_format": "pytorch",
4123
+ "model_size_in_billions": 34,
4124
+ "quantizations": [
4125
+ "none"
4126
+ ],
4127
+ "model_id": "BAAI/Aquila2-34B",
4128
+ "model_revision": "356733caf6221e9dd898cde8ff189a98175526ec"
4129
+ },
4130
+ {
4131
+ "model_format": "pytorch",
4132
+ "model_size_in_billions": 70,
4133
+ "quantizations": [
4134
+ "none"
4135
+ ],
4136
+ "model_id": "BAAI/Aquila2-70B-Expr",
4137
+ "model_revision": "32a2897235541b9f5238bbe88f8d76a19993c0ba"
4138
+ }
4139
+ ]
4140
+ },
4141
+ {
4142
+ "version": 1,
4143
+ "context_length": 2048,
4144
+ "model_name": "aquila2-chat",
4145
+ "model_lang": [
4146
+ "zh"
4147
+ ],
4148
+ "model_ability": [
4149
+ "chat"
4150
+ ],
4151
+ "model_description": "Aquila2-chat series models are the chat models",
4152
+ "model_specs": [
4153
+ {
4154
+ "model_format": "pytorch",
4155
+ "model_size_in_billions": 7,
4156
+ "quantizations": [
4157
+ "none"
4158
+ ],
4159
+ "model_id": "BAAI/AquilaChat2-7B",
4160
+ "model_revision": "0d060c4edeb4e0febd81130c17f6868653184fb3"
4161
+ },
4162
+ {
4163
+ "model_format": "ggufv2",
4164
+ "model_size_in_billions": 34,
4165
+ "quantizations": [
4166
+ "Q2_K",
4167
+ "Q3_K_L",
4168
+ "Q3_K_M",
4169
+ "Q3_K_S",
4170
+ "Q4_0",
4171
+ "Q4_K_M",
4172
+ "Q4_K_S",
4173
+ "Q5_0",
4174
+ "Q5_K_M",
4175
+ "Q5_K_S",
4176
+ "Q6_K",
4177
+ "Q8_0"
4178
+ ],
4179
+ "model_id": "TheBloke/AquilaChat2-34B-GGUF",
4180
+ "model_file_name_template": "aquilachat2-34b.{quantization}.gguf"
4181
+ },
4182
+ {
4183
+ "model_format": "gptq",
4184
+ "model_size_in_billions": 34,
4185
+ "quantizations": [
4186
+ "Int4"
4187
+ ],
4188
+ "model_id": "TheBloke/AquilaChat2-34B-GPTQ",
4189
+ "model_revision": "9a9d21424f7db608be51df769885514ab6e052db"
4190
+ },
4191
+ {
4192
+ "model_format": "awq",
4193
+ "model_size_in_billions": "34",
4194
+ "quantizations": [
4195
+ "Int4"
4196
+ ],
4197
+ "model_id": "TheBloke/AquilaChat2-34B-AWQ",
4198
+ "model_revision": "ad1dec1c8adb7fa6cb07b7e261aaa04fccf1c4c0"
4199
+ },
4200
+ {
4201
+ "model_format": "pytorch",
4202
+ "model_size_in_billions": 34,
4203
+ "quantizations": [
4204
+ "none"
4205
+ ],
4206
+ "model_id": "BAAI/AquilaChat2-34B",
4207
+ "model_revision": "b9cd9c7436435ab9cfa5e4f009be2b0354979ca8"
4208
+ },
4209
+ {
4210
+ "model_format": "pytorch",
4211
+ "model_size_in_billions": 70,
4212
+ "quantizations": [
4213
+ "none"
4214
+ ],
4215
+ "model_id": "BAAI/AquilaChat2-70B-Expr",
4216
+ "model_revision": "0df19b6e10f1a19ca663f7cc1141aae10f1825f4"
4217
+ }
4218
+ ],
4219
+ "prompt_style": {
4220
+ "style_name": "ADD_COLON_SINGLE",
4221
+ "intra_message_sep": "\n",
4222
+ "system_prompt": "",
4223
+ "roles": [
4224
+ "USER",
4225
+ "ASSISTANT"
4226
+ ],
4227
+ "stop_token_ids": [
4228
+ 100006,
4229
+ 100007
4230
+ ],
4231
+ "stop": [
4232
+ "[CLS]",
4233
+ "</s>"
4234
+ ]
4235
+ }
4236
+ },
4237
+ {
4238
+ "version": 1,
4239
+ "context_length": 16384,
4240
+ "model_name": "aquila2-chat-16k",
4241
+ "model_lang": [
4242
+ "zh"
4243
+ ],
4244
+ "model_ability": [
4245
+ "chat"
4246
+ ],
4247
+ "model_description": "AquilaChat2-16k series models are the long-text chat models",
4248
+ "model_specs": [
4249
+ {
4250
+ "model_format": "pytorch",
4251
+ "model_size_in_billions": 7,
4252
+ "quantizations": [
4253
+ "none"
4254
+ ],
4255
+ "model_id": "BAAI/AquilaChat2-7B-16K",
4256
+ "model_revision": "fb46d48479d05086ccf6952f19018322fcbb54cd"
4257
+ },
4258
+ {
4259
+ "model_format": "ggufv2",
4260
+ "model_size_in_billions": 34,
4261
+ "quantizations": [
4262
+ "Q2_K",
4263
+ "Q3_K_L",
4264
+ "Q3_K_M",
4265
+ "Q3_K_S",
4266
+ "Q4_0",
4267
+ "Q4_K_M",
4268
+ "Q4_K_S",
4269
+ "Q5_0",
4270
+ "Q5_K_M",
4271
+ "Q5_K_S",
4272
+ "Q6_K",
4273
+ "Q8_0"
4274
+ ],
4275
+ "model_id": "TheBloke/AquilaChat2-34B-16K-GGUF",
4276
+ "model_file_name_template": "aquilachat2-34b-16k.{quantization}.gguf"
4277
+ },
4278
+ {
4279
+ "model_format": "gptq",
4280
+ "model_size_in_billions": 34,
4281
+ "quantizations": [
4282
+ "Int4"
4283
+ ],
4284
+ "model_id": "TheBloke/AquilaChat2-34B-16K-GPTQ",
4285
+ "model_revision": "0afa1c2a55a4ee1a6f0dba81d9ec296dc7936b91"
4286
+ },
4287
+ {
4288
+ "model_format": "awq",
4289
+ "model_size_in_billions": 34,
4290
+ "quantizations": [
4291
+ "Int4"
4292
+ ],
4293
+ "model_id": "TheBloke/AquilaChat2-34B-16K-AWQ",
4294
+ "model_revision": "db7403ca492416903c84a7a38b11cb5506de48b1"
4295
+ },
4296
+ {
4297
+ "model_format": "pytorch",
4298
+ "model_size_in_billions": 34,
4299
+ "quantizations": [
4300
+ "none"
4301
+ ],
4302
+ "model_id": "BAAI/AquilaChat2-34B-16K",
4303
+ "model_revision": "a06fd164c7170714924d2881c61c8348425ebc94"
4304
+ }
4305
+ ],
4306
+ "prompt_style": {
4307
+ "style_name": "ADD_COLON_SINGLE",
4308
+ "intra_message_sep": "\n",
4309
+ "system_prompt": "",
4310
+ "roles": [
4311
+ "USER",
4312
+ "ASSISTANT"
4313
+ ],
4314
+ "stop_token_ids": [
4315
+ 100006,
4316
+ 100007
4317
+ ],
4318
+ "stop": [
4319
+ "[CLS]",
4320
+ "</s>"
4321
+ ]
4322
+ }
4323
+ },
4324
+ {
4325
+ "version": 1,
4326
+ "context_length": 4096,
4327
+ "model_name": "minicpm-2b-sft-bf16",
4328
+ "model_lang": [
4329
+ "zh"
4330
+ ],
4331
+ "model_ability": [
4332
+ "chat"
4333
+ ],
4334
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4335
+ "model_specs": [
4336
+ {
4337
+ "model_format": "pytorch",
4338
+ "model_size_in_billions": 2,
4339
+ "quantizations": [
4340
+ "none"
4341
+ ],
4342
+ "model_id": "openbmb/MiniCPM-2B-sft-bf16",
4343
+ "model_revision": "fe1d74027ebdd81cef5f815fa3a2d432a6b5de2a"
4344
+ }
4345
+ ],
4346
+ "prompt_style": {
4347
+ "style_name": "MINICPM-2B",
4348
+ "system_prompt": "",
4349
+ "roles": [
4350
+ "user",
4351
+ "assistant"
4352
+ ],
4353
+ "stop_token_ids": [
4354
+ 1,
4355
+ 2
4356
+ ],
4357
+ "stop": [
4358
+ "<s>",
4359
+ "</s>"
4360
+ ]
4361
+ }
4362
+ },
4363
+ {
4364
+ "version": 1,
4365
+ "context_length": 4096,
4366
+ "model_name": "minicpm-2b-sft-fp32",
4367
+ "model_lang": [
4368
+ "zh"
4369
+ ],
4370
+ "model_ability": [
4371
+ "chat"
4372
+ ],
4373
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4374
+ "model_specs": [
4375
+ {
4376
+ "model_format": "pytorch",
4377
+ "model_size_in_billions": 2,
4378
+ "quantizations": [
4379
+ "none"
4380
+ ],
4381
+ "model_id": "openbmb/MiniCPM-2B-sft-fp32",
4382
+ "model_revision": "35b90dd57d977b6e5bc4907986fa5b77aa15a82e"
4383
+ }
4384
+ ],
4385
+ "prompt_style": {
4386
+ "style_name": "MINICPM-2B",
4387
+ "system_prompt": "",
4388
+ "roles": [
4389
+ "user",
4390
+ "assistant"
4391
+ ],
4392
+ "stop_token_ids": [
4393
+ 1,
4394
+ 2
4395
+ ],
4396
+ "stop": [
4397
+ "<s>",
4398
+ "</s>"
4399
+ ]
4400
+ }
4401
+ },
4402
+ {
4403
+ "version": 1,
4404
+ "context_length": 4096,
4405
+ "model_name": "minicpm-2b-dpo-bf16",
4406
+ "model_lang": [
4407
+ "zh"
4408
+ ],
4409
+ "model_ability": [
4410
+ "chat"
4411
+ ],
4412
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4413
+ "model_specs": [
4414
+ {
4415
+ "model_format": "pytorch",
4416
+ "model_size_in_billions": 2,
4417
+ "quantizations": [
4418
+ "none"
4419
+ ],
4420
+ "model_id": "openbmb/MiniCPM-2B-dpo-bf16",
4421
+ "model_revision": "f4a3ba49f3f18695945c2a7c12400d4da99da498"
4422
+ }
4423
+ ],
4424
+ "prompt_style": {
4425
+ "style_name": "MINICPM-2B",
4426
+ "system_prompt": "",
4427
+ "roles": [
4428
+ "user",
4429
+ "assistant"
4430
+ ],
4431
+ "stop_token_ids": [
4432
+ 1,
4433
+ 2
4434
+ ],
4435
+ "stop": [
4436
+ "<s>",
4437
+ "</s>"
4438
+ ]
4439
+ }
4440
+ },
4441
+ {
4442
+ "version": 1,
4443
+ "context_length": 4096,
4444
+ "model_name": "minicpm-2b-dpo-fp16",
4445
+ "model_lang": [
4446
+ "zh"
4447
+ ],
4448
+ "model_ability": [
4449
+ "chat"
4450
+ ],
4451
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4452
+ "model_specs": [
4453
+ {
4454
+ "model_format": "pytorch",
4455
+ "model_size_in_billions": 2,
4456
+ "quantizations": [
4457
+ "none"
4458
+ ],
4459
+ "model_id": "openbmb/MiniCPM-2B-dpo-fp16",
4460
+ "model_revision": "e7a50289e4f839674cf8d4a5a2ce032ccacf64ac"
4461
+ }
4462
+ ],
4463
+ "prompt_style": {
4464
+ "style_name": "MINICPM-2B",
4465
+ "system_prompt": "",
4466
+ "roles": [
4467
+ "user",
4468
+ "assistant"
4469
+ ],
4470
+ "stop_token_ids": [
4471
+ 1,
4472
+ 2
4473
+ ],
4474
+ "stop": [
4475
+ "<s>",
4476
+ "</s>"
4477
+ ]
4478
+ }
4479
+ },
4480
+ {
4481
+ "version": 1,
4482
+ "context_length": 4096,
4483
+ "model_name": "minicpm-2b-dpo-fp32",
4484
+ "model_lang": [
4485
+ "zh"
4486
+ ],
4487
+ "model_ability": [
4488
+ "chat"
4489
+ ],
4490
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4491
+ "model_specs": [
4492
+ {
4493
+ "model_format": "pytorch",
4494
+ "model_size_in_billions": 2,
4495
+ "quantizations": [
4496
+ "none"
4497
+ ],
4498
+ "model_id": "openbmb/MiniCPM-2B-dpo-fp32",
4499
+ "model_revision": "b560a1593779b735a84a6daf72fba96ae38da288"
4500
+ }
4501
+ ],
4502
+ "prompt_style": {
4503
+ "style_name": "MINICPM-2B",
4504
+ "system_prompt": "",
4505
+ "roles": [
4506
+ "user",
4507
+ "assistant"
4508
+ ],
4509
+ "stop_token_ids": [
4510
+ 1,
4511
+ 2
4512
+ ],
4513
+ "stop": [
4514
+ "<s>",
4515
+ "</s>"
4516
+ ]
4517
+ }
3891
4518
  }
3892
4519
  ]