xinference 0.11.2.post1__py3-none-any.whl → 0.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

@@ -3891,6 +3891,201 @@
3891
3891
  ],
3892
3892
  "model_id": "01-ai/Yi-1.5-34B-Chat",
3893
3893
  "model_revision": "fa695ee438bfcd0ec2b378fa1c7e0dea1b40393e"
3894
+ },
3895
+ {
3896
+ "model_format": "ggufv2",
3897
+ "model_size_in_billions": 6,
3898
+ "quantizations": [
3899
+ "Q3_K_L",
3900
+ "Q4_K_M",
3901
+ "Q5_K_M",
3902
+ "Q6_K",
3903
+ "Q8_0",
3904
+ "f32"
3905
+ ],
3906
+ "model_id": "lmstudio-community/Yi-1.5-6B-Chat-GGUF",
3907
+ "model_file_name_template": "Yi-1.5-6B-Chat-{quantization}.gguf"
3908
+ },
3909
+ {
3910
+ "model_format": "ggufv2",
3911
+ "model_size_in_billions": 9,
3912
+ "quantizations": [
3913
+ "Q3_K_L",
3914
+ "Q4_K_M",
3915
+ "Q5_K_M",
3916
+ "Q6_K",
3917
+ "Q8_0",
3918
+ "f32"
3919
+ ],
3920
+ "model_id": "lmstudio-community/Yi-1.5-9B-Chat-GGUF",
3921
+ "model_file_name_template": "Yi-1.5-9B-Chat-{quantization}.gguf"
3922
+ },
3923
+ {
3924
+ "model_format": "ggufv2",
3925
+ "model_size_in_billions": 34,
3926
+ "quantizations": [
3927
+ "Q2_K",
3928
+ "Q3_K_L",
3929
+ "Q4_K_M",
3930
+ "Q5_K_M",
3931
+ "Q6_K",
3932
+ "Q8_0"
3933
+ ],
3934
+ "model_id": "lmstudio-community/Yi-1.5-34B-Chat-GGUF",
3935
+ "model_file_name_template": "Yi-1.5-34B-Chat-{quantization}.gguf"
3936
+ },
3937
+ {
3938
+ "model_format": "gptq",
3939
+ "model_size_in_billions": 6,
3940
+ "quantizations": [
3941
+ "Int4"
3942
+ ],
3943
+ "model_id": "modelscope/Yi-1.5-6B-Chat-GPTQ",
3944
+ "model_revision": "2ad3a602e64d1c79e28e6e92beced2935047367c"
3945
+ },
3946
+ {
3947
+ "model_format": "gptq",
3948
+ "model_size_in_billions": 9,
3949
+ "quantizations": [
3950
+ "Int4"
3951
+ ],
3952
+ "model_id": "modelscope/Yi-1.5-9B-Chat-GPTQ",
3953
+ "model_revision": "76f47d16982923f7b6674c4e23ddac7c3b1d2e03"
3954
+ },
3955
+ {
3956
+ "model_format": "gptq",
3957
+ "model_size_in_billions": 34,
3958
+ "quantizations": [
3959
+ "Int4"
3960
+ ],
3961
+ "model_id": "modelscope/Yi-1.5-34B-Chat-GPTQ",
3962
+ "model_revision": "173fb4036265b2dac1d6296a8e2fd2f652c19968"
3963
+ },
3964
+ {
3965
+ "model_format": "awq",
3966
+ "model_size_in_billions": 6,
3967
+ "quantizations": [
3968
+ "Int4"
3969
+ ],
3970
+ "model_id": "modelscope/Yi-1.5-6B-Chat-AWQ",
3971
+ "model_revision": "23bf37f1666874e15e239422de0d3948d8735fa9"
3972
+ },
3973
+ {
3974
+ "model_format": "awq",
3975
+ "model_size_in_billions": 9,
3976
+ "quantizations": [
3977
+ "Int4"
3978
+ ],
3979
+ "model_id": "modelscope/Yi-1.5-9B-Chat-AWQ",
3980
+ "model_revision": "2605f388332672789eae1f422644add2901b433f"
3981
+ },
3982
+ {
3983
+ "model_format": "awq",
3984
+ "model_size_in_billions": 34,
3985
+ "quantizations": [
3986
+ "Int4"
3987
+ ],
3988
+ "model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
3989
+ "model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
3990
+ }
3991
+ ],
3992
+ "prompt_style": {
3993
+ "style_name": "CHATML",
3994
+ "system_prompt": "",
3995
+ "roles": [
3996
+ "<|im_start|>user",
3997
+ "<|im_start|>assistant"
3998
+ ],
3999
+ "intra_message_sep": "<|im_end|>",
4000
+ "inter_message_sep": "",
4001
+ "stop_token_ids": [
4002
+ 2,
4003
+ 6,
4004
+ 7,
4005
+ 8
4006
+ ],
4007
+ "stop": [
4008
+ "<|endoftext|>",
4009
+ "<|im_start|>",
4010
+ "<|im_end|>",
4011
+ "<|im_sep|>"
4012
+ ]
4013
+ }
4014
+ },
4015
+ {
4016
+ "version": 1,
4017
+ "context_length": 16384,
4018
+ "model_name": "Yi-1.5-chat-16k",
4019
+ "model_lang": [
4020
+ "en",
4021
+ "zh"
4022
+ ],
4023
+ "model_ability": [
4024
+ "chat"
4025
+ ],
4026
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
4027
+ "model_specs": [
4028
+ {
4029
+ "model_format": "pytorch",
4030
+ "model_size_in_billions": 9,
4031
+ "quantizations": [
4032
+ "4-bit",
4033
+ "8-bit",
4034
+ "none"
4035
+ ],
4036
+ "model_id": "01-ai/Yi-1.5-9B-Chat-16K",
4037
+ "model_revision": "551220fb24d69b6bfec5defceeb160395ce5da8d"
4038
+ },
4039
+ {
4040
+ "model_format": "pytorch",
4041
+ "model_size_in_billions": 34,
4042
+ "quantizations": [
4043
+ "4-bit",
4044
+ "8-bit",
4045
+ "none"
4046
+ ],
4047
+ "model_id": "01-ai/Yi-1.5-34B-Chat-16K",
4048
+ "model_revision": "dfdbc67be750972bfcc1ac7ffd7fe48689c856fd"
4049
+ },
4050
+ {
4051
+ "model_format": "ggufv2",
4052
+ "model_size_in_billions": 9,
4053
+ "quantizations": [
4054
+ "Q2_K",
4055
+ "Q3_K_L",
4056
+ "Q3_K_M",
4057
+ "Q3_K_S",
4058
+ "Q4_0",
4059
+ "Q4_1",
4060
+ "Q4_K_M",
4061
+ "Q4_K_S",
4062
+ "Q5_0",
4063
+ "Q5_1",
4064
+ "Q5_K_M",
4065
+ "Q5_K_S",
4066
+ "Q6_K",
4067
+ "Q8_0"
4068
+ ],
4069
+ "model_id": "QuantFactory/Yi-1.5-9B-Chat-16K-GGUF",
4070
+ "model_file_name_template": "Yi-1.5-9B-Chat-16K.{quantization}.gguf"
4071
+ },
4072
+ {
4073
+ "model_format": "ggufv2",
4074
+ "model_size_in_billions": 34,
4075
+ "quantizations": [
4076
+ "Q2_K",
4077
+ "Q3_K_L",
4078
+ "Q3_K_M",
4079
+ "Q3_K_S",
4080
+ "Q4_K_M",
4081
+ "Q4_K_S",
4082
+ "Q5_K_M",
4083
+ "Q5_K_S",
4084
+ "Q6_K",
4085
+ "Q8_0"
4086
+ ],
4087
+ "model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
4088
+ "model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
3894
4089
  }
3895
4090
  ],
3896
4091
  "prompt_style": {
@@ -6009,23 +6204,32 @@
6009
6204
  ],
6010
6205
  "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6011
6206
  "model_specs": [
6207
+ {
6208
+ "model_format": "pytorch",
6209
+ "model_size_in_billions": 2,
6210
+ "quantizations": [
6211
+ "none"
6212
+ ],
6213
+ "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
6214
+ "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
6215
+ },
6012
6216
  {
6013
- "model_format": "pytorch",
6014
- "model_size_in_billions": 26,
6015
- "quantizations": [
6016
- "none"
6017
- ],
6018
- "model_id": "OpenGVLab/InternVL-Chat-V1-5",
6019
- "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
6217
+ "model_format": "pytorch",
6218
+ "model_size_in_billions": 26,
6219
+ "quantizations": [
6220
+ "none"
6221
+ ],
6222
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5",
6223
+ "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
6020
6224
  },
6021
6225
  {
6022
- "model_format": "pytorch",
6023
- "model_size_in_billions": 26,
6024
- "quantizations": [
6025
- "Int8"
6026
- ],
6027
- "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
6028
- "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
6226
+ "model_format": "pytorch",
6227
+ "model_size_in_billions": 26,
6228
+ "quantizations": [
6229
+ "Int8"
6230
+ ],
6231
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
6232
+ "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
6029
6233
  }
6030
6234
  ],
6031
6235
  "prompt_style": {
@@ -6043,11 +6247,11 @@
6043
6247
  "<|im_end|>"
6044
6248
  ]
6045
6249
  }
6046
- },
6250
+ },
6047
6251
  {
6048
6252
  "version": 1,
6049
- "context_length": 32768,
6050
- "model_name": "mini-internvl-chat",
6253
+ "context_length": 8192,
6254
+ "model_name": "cogvlm2",
6051
6255
  "model_lang": [
6052
6256
  "en",
6053
6257
  "zh"
@@ -6056,32 +6260,125 @@
6056
6260
  "chat",
6057
6261
  "vision"
6058
6262
  ],
6059
- "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6263
+ "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
6060
6264
  "model_specs": [
6061
- {
6062
- "model_format": "pytorch",
6063
- "model_size_in_billions": 2,
6064
- "quantizations": [
6065
- "none"
6066
- ],
6067
- "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
6068
- "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
6069
- }
6265
+ {
6266
+ "model_format": "pytorch",
6267
+ "model_size_in_billions": 20,
6268
+ "quantizations": [
6269
+ "none"
6270
+ ],
6271
+ "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B",
6272
+ "model_revision": "d88b352bce5ee58a289b1ac8328553eb31efa2ef"
6273
+ },
6274
+ {
6275
+ "model_format": "pytorch",
6276
+ "model_size_in_billions": 20,
6277
+ "quantizations": [
6278
+ "int4"
6279
+ ],
6280
+ "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantizations}",
6281
+ "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
6282
+ }
6070
6283
  ],
6071
6284
  "prompt_style": {
6072
- "style_name": "INTERNLM2",
6073
- "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6074
- "roles": [
6075
- "<|im_start|>user",
6076
- "<|im_start|>assistant"
6285
+ "style_name": "LLAMA3",
6286
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
6287
+ "roles": [
6288
+ "user",
6289
+ "assistant"
6290
+ ],
6291
+ "intra_message_sep": "\n\n",
6292
+ "inter_message_sep": "<|eot_id|>",
6293
+ "stop_token_ids": [
6294
+ 128001,
6295
+ 128009
6296
+ ],
6297
+ "stop": [
6298
+ "<|end_of_text|>",
6299
+ "<|eot_id|>"
6300
+ ]
6301
+ }
6302
+ },
6303
+ {
6304
+ "version": 1,
6305
+ "context_length": 8192,
6306
+ "model_name": "telechat",
6307
+ "model_lang": [
6308
+ "en",
6309
+ "zh"
6310
+ ],
6311
+ "model_ability": [
6312
+ "chat"
6313
+ ],
6314
+ "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
6315
+ "model_specs": [
6316
+ {
6317
+ "model_format": "pytorch",
6318
+ "model_size_in_billions": 7,
6319
+ "quantizations": [
6320
+ "4-bit",
6321
+ "8-bit",
6322
+ "none"
6077
6323
  ],
6078
- "intra_message_sep": "<|im_end|>",
6079
- "stop_token_ids": [
6080
- 92542
6324
+ "model_id": "Tele-AI/telechat-7B"
6325
+ },
6326
+ {
6327
+ "model_format": "gptq",
6328
+ "model_size_in_billions": 7,
6329
+ "quantizations": [
6330
+ "int4",
6331
+ "int8"
6081
6332
  ],
6082
- "stop": [
6083
- "<|im_end|>"
6084
- ]
6333
+ "model_id": "Tele-AI/telechat-7B-{quantization}"
6334
+ },
6335
+ {
6336
+ "model_format": "pytorch",
6337
+ "model_size_in_billions": 12,
6338
+ "quantizations": [
6339
+ "4-bit",
6340
+ "8-bit",
6341
+ "none"
6342
+ ],
6343
+ "model_id": "Tele-AI/TeleChat-12B"
6344
+ },
6345
+ {
6346
+ "model_format": "gptq",
6347
+ "model_size_in_billions": 12,
6348
+ "quantizations": [
6349
+ "int4",
6350
+ "int8"
6351
+ ],
6352
+ "model_id": "Tele-AI/TeleChat-12B-{quantization}"
6353
+ },
6354
+ {
6355
+ "model_format": "pytorch",
6356
+ "model_size_in_billions": 52,
6357
+ "quantizations": [
6358
+ "4-bit",
6359
+ "8-bit",
6360
+ "none"
6361
+ ],
6362
+ "model_id": "Tele-AI/TeleChat-52B"
6363
+ }
6364
+ ],
6365
+ "prompt_style": {
6366
+ "style_name": "NO_COLON_TWO",
6367
+ "system_prompt": "You are a helpful assistant.",
6368
+ "roles": [
6369
+ "<_user>",
6370
+ "<_bot>"
6371
+ ],
6372
+ "intra_message_sep": "",
6373
+ "inter_message_sep": "",
6374
+ "stop": [
6375
+ "<_end>",
6376
+ "<_start>"
6377
+ ],
6378
+ "stop_token_ids": [
6379
+ 160133,
6380
+ 160132
6381
+ ]
6085
6382
  }
6086
- }
6383
+ }
6087
6384
  ]
@@ -1496,6 +1496,127 @@
1496
1496
  "model_hub": "modelscope",
1497
1497
  "model_id": "01ai/Yi-1.5-34B-Chat",
1498
1498
  "model_revision": "master"
1499
+ },
1500
+ {
1501
+ "model_format": "gptq",
1502
+ "model_size_in_billions": 6,
1503
+ "quantizations": [
1504
+ "Int4"
1505
+ ],
1506
+ "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-GPTQ",
1507
+ "model_hub": "modelscope",
1508
+ "model_revision": "master"
1509
+ },
1510
+ {
1511
+ "model_format": "gptq",
1512
+ "model_size_in_billions": 9,
1513
+ "quantizations": [
1514
+ "Int4"
1515
+ ],
1516
+ "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-GPTQ",
1517
+ "model_hub": "modelscope",
1518
+ "model_revision": "master"
1519
+ },
1520
+ {
1521
+ "model_format": "gptq",
1522
+ "model_size_in_billions": 34,
1523
+ "quantizations": [
1524
+ "Int4"
1525
+ ],
1526
+ "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-GPTQ",
1527
+ "model_hub": "modelscope",
1528
+ "model_revision": "master"
1529
+ },
1530
+ {
1531
+ "model_format": "awq",
1532
+ "model_size_in_billions": 6,
1533
+ "quantizations": [
1534
+ "Int4"
1535
+ ],
1536
+ "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-AWQ",
1537
+ "model_hub": "modelscope",
1538
+ "model_revision": "master"
1539
+ },
1540
+ {
1541
+ "model_format": "awq",
1542
+ "model_size_in_billions": 9,
1543
+ "quantizations": [
1544
+ "Int4"
1545
+ ],
1546
+ "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-AWQ",
1547
+ "model_hub": "modelscope",
1548
+ "model_revision": "master"
1549
+ },
1550
+ {
1551
+ "model_format": "awq",
1552
+ "model_size_in_billions": 34,
1553
+ "quantizations": [
1554
+ "Int4"
1555
+ ],
1556
+ "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-AWQ",
1557
+ "model_hub": "modelscope",
1558
+ "model_revision": "master"
1559
+ }
1560
+ ],
1561
+ "prompt_style": {
1562
+ "style_name": "CHATML",
1563
+ "system_prompt": "",
1564
+ "roles": [
1565
+ "<|im_start|>user",
1566
+ "<|im_start|>assistant"
1567
+ ],
1568
+ "intra_message_sep": "<|im_end|>",
1569
+ "inter_message_sep": "",
1570
+ "stop_token_ids": [
1571
+ 2,
1572
+ 6,
1573
+ 7,
1574
+ 8
1575
+ ],
1576
+ "stop": [
1577
+ "<|endoftext|>",
1578
+ "<|im_start|>",
1579
+ "<|im_end|>",
1580
+ "<|im_sep|>"
1581
+ ]
1582
+ }
1583
+ },
1584
+ {
1585
+ "version": 1,
1586
+ "context_length": 16384,
1587
+ "model_name": "Yi-1.5-chat-16k",
1588
+ "model_lang": [
1589
+ "en",
1590
+ "zh"
1591
+ ],
1592
+ "model_ability": [
1593
+ "chat"
1594
+ ],
1595
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
1596
+ "model_specs": [
1597
+ {
1598
+ "model_format": "pytorch",
1599
+ "model_size_in_billions": 9,
1600
+ "quantizations": [
1601
+ "4-bit",
1602
+ "8-bit",
1603
+ "none"
1604
+ ],
1605
+ "model_hub": "modelscope",
1606
+ "model_id": "01ai/Yi-1.5-9B-Chat-16K",
1607
+ "model_revision": "master"
1608
+ },
1609
+ {
1610
+ "model_format": "pytorch",
1611
+ "model_size_in_billions": 34,
1612
+ "quantizations": [
1613
+ "4-bit",
1614
+ "8-bit",
1615
+ "none"
1616
+ ],
1617
+ "model_hub": "modelscope",
1618
+ "model_id": "01ai/Yi-1.5-34B-Chat-16K",
1619
+ "model_revision": "master"
1499
1620
  }
1500
1621
  ],
1501
1622
  "prompt_style": {
@@ -3739,5 +3860,150 @@
3739
3860
  "<|im_end|>"
3740
3861
  ]
3741
3862
  }
3742
- }
3863
+ },
3864
+ {
3865
+ "version": 1,
3866
+ "context_length": 8192,
3867
+ "model_name": "cogvlm2",
3868
+ "model_lang": [
3869
+ "en",
3870
+ "zh"
3871
+ ],
3872
+ "model_ability": [
3873
+ "chat",
3874
+ "vision"
3875
+ ],
3876
+ "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
3877
+ "model_specs": [
3878
+ {
3879
+ "model_format": "pytorch",
3880
+ "model_size_in_billions": 20,
3881
+ "quantizations": [
3882
+ "none"
3883
+ ],
3884
+ "model_hub": "modelscope",
3885
+ "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B",
3886
+ "model_revision": "master"
3887
+ },
3888
+ {
3889
+ "model_format": "pytorch",
3890
+ "model_size_in_billions": 20,
3891
+ "quantizations": [
3892
+ "int4"
3893
+ ],
3894
+ "model_hub": "modelscope",
3895
+ "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
3896
+ "model_revision": "master"
3897
+ }
3898
+ ],
3899
+ "prompt_style": {
3900
+ "style_name": "LLAMA3",
3901
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
3902
+ "roles": [
3903
+ "user",
3904
+ "assistant"
3905
+ ],
3906
+ "intra_message_sep": "\n\n",
3907
+ "inter_message_sep": "<|eot_id|>",
3908
+ "stop_token_ids": [
3909
+ 128001,
3910
+ 128009
3911
+ ],
3912
+ "stop": [
3913
+ "<|end_of_text|>",
3914
+ "<|eot_id|>"
3915
+ ]
3916
+ }
3917
+ },
3918
+ {
3919
+ "version": 1,
3920
+ "context_length": 8192,
3921
+ "model_name": "telechat",
3922
+ "model_lang": [
3923
+ "en",
3924
+ "zh"
3925
+ ],
3926
+ "model_ability": [
3927
+ "chat"
3928
+ ],
3929
+ "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
3930
+ "model_specs": [
3931
+ {
3932
+ "model_format": "pytorch",
3933
+ "model_size_in_billions": 7,
3934
+ "quantizations": [
3935
+ "4-bit",
3936
+ "8-bit",
3937
+ "none"
3938
+ ],
3939
+ "model_id": "TeleAI/telechat-7B",
3940
+ "model_hub": "modelscope",
3941
+ "model_revision": "master"
3942
+ },
3943
+ {
3944
+ "model_format": "gptq",
3945
+ "model_size_in_billions": 7,
3946
+ "quantizations": [
3947
+ "int4",
3948
+ "int8"
3949
+ ],
3950
+ "model_id": "TeleAI/telechat-7B-{quantization}",
3951
+ "model_hub": "modelscope",
3952
+ "model_revision": "master"
3953
+ },
3954
+ {
3955
+ "model_format": "pytorch",
3956
+ "model_size_in_billions": 12,
3957
+ "quantizations": [
3958
+ "4-bit",
3959
+ "8-bit",
3960
+ "none"
3961
+ ],
3962
+ "model_id": "TeleAI/TeleChat-12B",
3963
+ "model_hub": "modelscope",
3964
+ "model_revision": "master"
3965
+ },
3966
+ {
3967
+ "model_format": "gptq",
3968
+ "model_size_in_billions": 12,
3969
+ "quantizations": [
3970
+ "int4",
3971
+ "int8"
3972
+ ],
3973
+ "model_id": "TeleAI/TeleChat-12B-{quantization}",
3974
+ "model_hub": "modelscope",
3975
+ "model_revision": "master"
3976
+ },
3977
+ {
3978
+ "model_format": "pytorch",
3979
+ "model_size_in_billions": 52,
3980
+ "quantizations": [
3981
+ "4-bit",
3982
+ "8-bit",
3983
+ "none"
3984
+ ],
3985
+ "model_id": "TeleAI/TeleChat-52B",
3986
+ "model_hub": "modelscope",
3987
+ "model_revision": "master"
3988
+ }
3989
+ ],
3990
+ "prompt_style": {
3991
+ "style_name": "NO_COLON_TWO",
3992
+ "system_prompt": "You are a helpful assistant.",
3993
+ "roles": [
3994
+ "<_user>",
3995
+ "<_bot>"
3996
+ ],
3997
+ "intra_message_sep": "",
3998
+ "inter_message_sep": "",
3999
+ "stop": [
4000
+ "<_end>",
4001
+ "<_start>"
4002
+ ],
4003
+ "stop_token_ids": [
4004
+ 160133,
4005
+ 160132
4006
+ ]
4007
+ }
4008
+ }
3743
4009
  ]