xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +34 -7
  5. xinference/client/oscar/actor_client.py +4 -3
  6. xinference/client/restful/restful_client.py +20 -4
  7. xinference/conftest.py +13 -2
  8. xinference/core/supervisor.py +48 -1
  9. xinference/core/worker.py +139 -20
  10. xinference/deploy/cmdline.py +119 -20
  11. xinference/model/embedding/core.py +1 -2
  12. xinference/model/llm/__init__.py +4 -6
  13. xinference/model/llm/ggml/llamacpp.py +2 -10
  14. xinference/model/llm/llm_family.json +877 -13
  15. xinference/model/llm/llm_family.py +15 -0
  16. xinference/model/llm/llm_family_modelscope.json +571 -0
  17. xinference/model/llm/pytorch/chatglm.py +2 -0
  18. xinference/model/llm/pytorch/core.py +22 -26
  19. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  20. xinference/model/llm/pytorch/internlm2.py +2 -0
  21. xinference/model/llm/pytorch/omnilmm.py +153 -0
  22. xinference/model/llm/pytorch/qwen_vl.py +2 -0
  23. xinference/model/llm/pytorch/yi_vl.py +4 -2
  24. xinference/model/llm/utils.py +53 -5
  25. xinference/model/llm/vllm/core.py +54 -6
  26. xinference/model/rerank/core.py +3 -0
  27. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  28. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  29. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  30. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  31. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  32. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  33. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  34. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  35. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  36. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  37. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  38. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  39. xinference/thirdparty/omnilmm/__init__.py +0 -0
  40. xinference/thirdparty/omnilmm/chat.py +216 -0
  41. xinference/thirdparty/omnilmm/constants.py +4 -0
  42. xinference/thirdparty/omnilmm/conversation.py +332 -0
  43. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  44. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  45. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  46. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  47. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  48. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  49. xinference/thirdparty/omnilmm/utils.py +134 -0
  50. xinference/types.py +15 -19
  51. xinference/web/ui/build/asset-manifest.json +3 -3
  52. xinference/web/ui/build/index.html +1 -1
  53. xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
  54. xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
  73. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
  74. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
  75. xinference/model/llm/ggml/ctransformers.py +0 -281
  76. xinference/model/llm/ggml/ctransformers_util.py +0 -161
  77. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  78. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
  97. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  98. xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
  99. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
  100. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
  101. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
  102. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
  103. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
@@ -688,6 +688,49 @@
688
688
  ]
689
689
  }
690
690
  },
691
+ {
692
+ "version": 1,
693
+ "context_length": 131072,
694
+ "model_name": "chatglm3-128k",
695
+ "model_lang": [
696
+ "en",
697
+ "zh"
698
+ ],
699
+ "model_ability": [
700
+ "chat"
701
+ ],
702
+ "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
703
+ "model_specs": [
704
+ {
705
+ "model_format": "pytorch",
706
+ "model_size_in_billions": 6,
707
+ "quantizations": [
708
+ "4-bit",
709
+ "8-bit",
710
+ "none"
711
+ ],
712
+ "model_id": "THUDM/chatglm3-6b-128k",
713
+ "model_revision": "f0afbe671009abc9e31182170cf60636d5546cda"
714
+ }
715
+ ],
716
+ "prompt_style": {
717
+ "style_name": "CHATGLM3",
718
+ "system_prompt": "",
719
+ "roles": [
720
+ "user",
721
+ "assistant"
722
+ ],
723
+ "stop_token_ids": [
724
+ 64795,
725
+ 64797,
726
+ 2
727
+ ],
728
+ "stop": [
729
+ "<|user|>",
730
+ "<|observation|>"
731
+ ]
732
+ }
733
+ },
691
734
  {
692
735
  "version": 1,
693
736
  "context_length": 2048,
@@ -870,6 +913,38 @@
870
913
  "model_id": "meta-llama/Llama-2-7b-chat-hf",
871
914
  "model_revision": "08751db2aca9bf2f7f80d2e516117a53d7450235"
872
915
  },
916
+ {
917
+ "model_format": "gptq",
918
+ "model_size_in_billions": 7,
919
+ "quantizations": [
920
+ "Int4"
921
+ ],
922
+ "model_id": "TheBloke/Llama-2-7B-Chat-GPTQ"
923
+ },
924
+ {
925
+ "model_format": "gptq",
926
+ "model_size_in_billions": 70,
927
+ "quantizations": [
928
+ "Int4"
929
+ ],
930
+ "model_id": "TheBloke/Llama-2-70B-Chat-GPTQ"
931
+ },
932
+ {
933
+ "model_format": "awq",
934
+ "model_size_in_billions": 70,
935
+ "quantizations": [
936
+ "Int4"
937
+ ],
938
+ "model_id": "TheBloke/Llama-2-70B-Chat-AWQ"
939
+ },
940
+ {
941
+ "model_format": "awq",
942
+ "model_size_in_billions": 7,
943
+ "quantizations": [
944
+ "Int4"
945
+ ],
946
+ "model_id": "TheBloke/Llama-2-7B-Chat-AWQ"
947
+ },
873
948
  {
874
949
  "model_format": "pytorch",
875
950
  "model_size_in_billions": 13,
@@ -881,6 +956,22 @@
881
956
  "model_id": "meta-llama/Llama-2-13b-chat-hf",
882
957
  "model_revision": "0ba94ac9b9e1d5a0037780667e8b219adde1908c"
883
958
  },
959
+ {
960
+ "model_format": "gptq",
961
+ "model_size_in_billions": 13,
962
+ "quantizations": [
963
+ "Int4"
964
+ ],
965
+ "model_id": "TheBloke/Llama-2-13B-chat-GPTQ"
966
+ },
967
+ {
968
+ "model_format": "awq",
969
+ "model_size_in_billions": 13,
970
+ "quantizations": [
971
+ "Int4"
972
+ ],
973
+ "model_id": "TheBloke/Llama-2-13B-chat-AWQ"
974
+ },
884
975
  {
885
976
  "model_format": "pytorch",
886
977
  "model_size_in_billions": 70,
@@ -1002,6 +1093,22 @@
1002
1093
  "model_id": "TheBloke/Llama-2-7B-GGML",
1003
1094
  "model_file_name_template": "llama-2-7b.ggmlv3.{quantization}.bin"
1004
1095
  },
1096
+ {
1097
+ "model_format": "gptq",
1098
+ "model_size_in_billions": 7,
1099
+ "quantizations": [
1100
+ "Int4"
1101
+ ],
1102
+ "model_id": "TheBloke/Llama-2-7B-GPTQ"
1103
+ },
1104
+ {
1105
+ "model_format": "awq",
1106
+ "model_size_in_billions": 7,
1107
+ "quantizations": [
1108
+ "Int4"
1109
+ ],
1110
+ "model_id": "TheBloke/Llama-2-7B-AWQ"
1111
+ },
1005
1112
  {
1006
1113
  "model_format": "ggmlv3",
1007
1114
  "model_size_in_billions": 13,
@@ -1068,6 +1175,22 @@
1068
1175
  "model_id": "meta-llama/Llama-2-13b-hf",
1069
1176
  "model_revision": "db6b8eb1feabb38985fdf785a89895959e944936"
1070
1177
  },
1178
+ {
1179
+ "model_format": "gptq",
1180
+ "model_size_in_billions": 13,
1181
+ "quantizations": [
1182
+ "Int4"
1183
+ ],
1184
+ "model_id": "TheBloke/Llama-2-13B-GPTQ"
1185
+ },
1186
+ {
1187
+ "model_format": "awq",
1188
+ "model_size_in_billions": 13,
1189
+ "quantizations": [
1190
+ "Int4"
1191
+ ],
1192
+ "model_id": "TheBloke/Llama-2-13B-AWQ"
1193
+ },
1071
1194
  {
1072
1195
  "model_format": "pytorch",
1073
1196
  "model_size_in_billions": 70,
@@ -1078,6 +1201,22 @@
1078
1201
  ],
1079
1202
  "model_id": "meta-llama/Llama-2-70b-hf",
1080
1203
  "model_revision": "cc8aa03a000ff08b4d5c5b39673321a2a396c396"
1204
+ },
1205
+ {
1206
+ "model_format": "gptq",
1207
+ "model_size_in_billions": 70,
1208
+ "quantizations": [
1209
+ "Int4"
1210
+ ],
1211
+ "model_id": "TheBloke/Llama-2-70B-GPTQ"
1212
+ },
1213
+ {
1214
+ "model_format": "awq",
1215
+ "model_size_in_billions": 70,
1216
+ "quantizations": [
1217
+ "Int4"
1218
+ ],
1219
+ "model_id": "TheBloke/Llama-2-70B-AWQ"
1081
1220
  }
1082
1221
  ]
1083
1222
  },
@@ -1466,6 +1605,16 @@
1466
1605
  ],
1467
1606
  "model_id": "Qwen/Qwen1.5-14B-Chat"
1468
1607
  },
1608
+ {
1609
+ "model_format": "pytorch",
1610
+ "model_size_in_billions": 32,
1611
+ "quantizations": [
1612
+ "4-bit",
1613
+ "8-bit",
1614
+ "none"
1615
+ ],
1616
+ "model_id": "Qwen/Qwen1.5-32B-Chat"
1617
+ },
1469
1618
  {
1470
1619
  "model_format": "pytorch",
1471
1620
  "model_size_in_billions": 72,
@@ -1521,6 +1670,14 @@
1521
1670
  ],
1522
1671
  "model_id": "Qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}"
1523
1672
  },
1673
+ {
1674
+ "model_format": "gptq",
1675
+ "model_size_in_billions": 32,
1676
+ "quantizations": [
1677
+ "Int4"
1678
+ ],
1679
+ "model_id": "Qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}"
1680
+ },
1524
1681
  {
1525
1682
  "model_format": "gptq",
1526
1683
  "model_size_in_billions": 72,
@@ -1570,6 +1727,14 @@
1570
1727
  ],
1571
1728
  "model_id": "Qwen/Qwen1.5-14B-Chat-AWQ"
1572
1729
  },
1730
+ {
1731
+ "model_format": "awq",
1732
+ "model_size_in_billions": 32,
1733
+ "quantizations": [
1734
+ "Int4"
1735
+ ],
1736
+ "model_id": "Qwen/Qwen1.5-32B-Chat-AWQ"
1737
+ },
1573
1738
  {
1574
1739
  "model_format": "awq",
1575
1740
  "model_size_in_billions": 72,
@@ -1658,6 +1823,22 @@
1658
1823
  "model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
1659
1824
  "model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
1660
1825
  },
1826
+ {
1827
+ "model_format": "ggufv2",
1828
+ "model_size_in_billions": 32,
1829
+ "quantizations": [
1830
+ "q2_k",
1831
+ "q3_k_m",
1832
+ "q4_0",
1833
+ "q4_k_m",
1834
+ "q5_0",
1835
+ "q5_k_m",
1836
+ "q6_k",
1837
+ "q8_0"
1838
+ ],
1839
+ "model_id": "Qwen/Qwen1.5-32B-Chat-GGUF",
1840
+ "model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
1841
+ },
1661
1842
  {
1662
1843
  "model_format": "ggufv2",
1663
1844
  "model_size_in_billions": 72,
@@ -1697,6 +1878,58 @@
1697
1878
  ]
1698
1879
  }
1699
1880
  },
1881
+ {
1882
+ "version": 1,
1883
+ "context_length": 32768,
1884
+ "model_name": "qwen1.5-moe-chat",
1885
+ "model_lang": [
1886
+ "en",
1887
+ "zh"
1888
+ ],
1889
+ "model_ability": [
1890
+ "chat"
1891
+ ],
1892
+ "model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
1893
+ "model_specs": [
1894
+ {
1895
+ "model_format": "pytorch",
1896
+ "model_size_in_billions": "2_7",
1897
+ "quantizations": [
1898
+ "4-bit",
1899
+ "8-bit",
1900
+ "none"
1901
+ ],
1902
+ "model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat"
1903
+ },
1904
+ {
1905
+ "model_format": "gptq",
1906
+ "model_size_in_billions": "2_7",
1907
+ "quantizations": [
1908
+ "Int4"
1909
+ ],
1910
+ "model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4"
1911
+ }
1912
+ ],
1913
+ "prompt_style": {
1914
+ "style_name": "QWEN",
1915
+ "system_prompt": "You are a helpful assistant.",
1916
+ "roles": [
1917
+ "user",
1918
+ "assistant"
1919
+ ],
1920
+ "intra_message_sep": "\n",
1921
+ "stop_token_ids": [
1922
+ 151643,
1923
+ 151644,
1924
+ 151645
1925
+ ],
1926
+ "stop": [
1927
+ "<|endoftext|>",
1928
+ "<|im_start|>",
1929
+ "<|im_end|>"
1930
+ ]
1931
+ }
1932
+ },
1700
1933
  {
1701
1934
  "version": 1,
1702
1935
  "context_length": 8192,
@@ -1737,13 +1970,13 @@
1737
1970
  "model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
1738
1971
  "model_specs": [
1739
1972
  {
1740
- "model_format": "ggmlv3",
1741
- "model_size_in_billions": 1,
1973
+ "model_format": "pytorch",
1974
+ "model_size_in_billions": "1_5",
1742
1975
  "quantizations": [
1743
1976
  "none"
1744
1977
  ],
1745
- "model_id": "marella/gpt-2-ggml",
1746
- "model_file_name_template": "ggml-model.bin"
1978
+ "model_id": "openai-community/gpt2",
1979
+ "model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
1747
1980
  }
1748
1981
  ]
1749
1982
  },
@@ -2526,6 +2759,22 @@
2526
2759
  "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
2527
2760
  "model_revision": "54766df6d50e4d3d7ccd66758e5341ba105a6d36"
2528
2761
  },
2762
+ {
2763
+ "model_format": "awq",
2764
+ "model_size_in_billions": 7,
2765
+ "quantizations": [
2766
+ "Int4"
2767
+ ],
2768
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.1-AWQ"
2769
+ },
2770
+ {
2771
+ "model_format": "gptq",
2772
+ "model_size_in_billions": 7,
2773
+ "quantizations": [
2774
+ "Int4"
2775
+ ],
2776
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
2777
+ },
2529
2778
  {
2530
2779
  "model_format": "ggufv2",
2531
2780
  "model_size_in_billions": 7,
@@ -2587,6 +2836,22 @@
2587
2836
  "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
2588
2837
  "model_revision": "b70aa86578567ba3301b21c8a27bea4e8f6d6d61"
2589
2838
  },
2839
+ {
2840
+ "model_format": "gptq",
2841
+ "model_size_in_billions": 7,
2842
+ "quantizations": [
2843
+ "Int4"
2844
+ ],
2845
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
2846
+ },
2847
+ {
2848
+ "model_format": "awq",
2849
+ "model_size_in_billions": 7,
2850
+ "quantizations": [
2851
+ "Int4"
2852
+ ],
2853
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
2854
+ },
2590
2855
  {
2591
2856
  "model_format": "ggufv2",
2592
2857
  "model_size_in_billions": 7,
@@ -2747,6 +3012,14 @@
2747
3012
  "model_id": "mistralai/Mixtral-8x7B-v0.1",
2748
3013
  "model_revision": "58301445dc1378584211722b7ebf8743ec4e192b"
2749
3014
  },
3015
+ {
3016
+ "model_format": "gptq",
3017
+ "model_size_in_billions": "46_7",
3018
+ "quantizations": [
3019
+ "Int4"
3020
+ ],
3021
+ "model_id": "TheBloke/Mixtral-8x7B-v0.1-GPTQ"
3022
+ },
2750
3023
  {
2751
3024
  "model_format": "ggufv2",
2752
3025
  "model_size_in_billions": "46_7",
@@ -2796,10 +3069,17 @@
2796
3069
  "model_format": "awq",
2797
3070
  "model_size_in_billions": "46_7",
2798
3071
  "quantizations": [
2799
- "4-bit"
3072
+ "Int4"
3073
+ ],
3074
+ "model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ"
3075
+ },
3076
+ {
3077
+ "model_format": "gptq",
3078
+ "model_size_in_billions": "46_7",
3079
+ "quantizations": [
3080
+ "Int4"
2800
3081
  ],
2801
- "model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ",
2802
- "model_revision": "9afb6f0a7d7fe9ecebdda1baa4ff4e13e73e97d7"
3082
+ "model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"
2803
3083
  },
2804
3084
  {
2805
3085
  "model_format": "ggufv2",
@@ -3357,22 +3637,122 @@
3357
3637
  {
3358
3638
  "version": 1,
3359
3639
  "context_length": 4096,
3360
- "model_name": "deepseek-chat",
3640
+ "model_name": "gorilla-openfunctions-v2",
3361
3641
  "model_lang": [
3362
- "en",
3363
- "zh"
3642
+ "en"
3364
3643
  ],
3365
3644
  "model_ability": [
3366
3645
  "chat"
3367
3646
  ],
3368
- "model_description": "DeepSeek LLM is an advanced language model comprising 67 billion parameters. It has been trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese.",
3647
+ "model_description": "OpenFunctions is designed to extend Large Language Model (LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.",
3369
3648
  "model_specs": [
3370
3649
  {
3371
3650
  "model_format": "pytorch",
3372
3651
  "model_size_in_billions": 7,
3373
3652
  "quantizations": [
3374
- "4-bit",
3375
- "8-bit",
3653
+ "none"
3654
+ ],
3655
+ "model_id": "gorilla-llm/gorilla-openfunctions-v2",
3656
+ "model_revision": "0f91d705e64b77fb55e35a7eab5d03bf965c9b5c"
3657
+ },
3658
+ {
3659
+ "model_format": "ggufv2",
3660
+ "model_size_in_billions": 7,
3661
+ "quantizations": [
3662
+ "Q2_K",
3663
+ "Q3_K_L",
3664
+ "Q3_K_M",
3665
+ "Q3_K_S",
3666
+ "Q4_0",
3667
+ "Q4_K_M",
3668
+ "Q4_K_S",
3669
+ "Q5_K_M",
3670
+ "Q5_K_S",
3671
+ "Q6_K"
3672
+ ],
3673
+ "model_id": "gorilla-llm//gorilla-openfunctions-v2-GGUF",
3674
+ "model_file_name_template": "gorilla-openfunctions-v2.{quantization}.gguf"
3675
+ }
3676
+ ],
3677
+ "prompt_style": {
3678
+ "style_name": "GORILLA_OPENFUNCTIONS",
3679
+ "system_prompt": "",
3680
+ "roles": [
3681
+ "",
3682
+ ""
3683
+ ],
3684
+ "intra_message_sep": "\n",
3685
+ "inter_message_sep": "\n",
3686
+ "stop_token_ids": [],
3687
+ "stop": []
3688
+ }
3689
+ },
3690
+ {
3691
+ "version": 1,
3692
+ "context_length": 4096,
3693
+ "model_name": "deepseek-vl-chat",
3694
+ "model_lang": [
3695
+ "en",
3696
+ "zh"
3697
+ ],
3698
+ "model_ability": [
3699
+ "chat",
3700
+ "vision"
3701
+ ],
3702
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
3703
+ "model_specs": [
3704
+ {
3705
+ "model_format": "pytorch",
3706
+ "model_size_in_billions": "1_3",
3707
+ "quantizations": [
3708
+ "none"
3709
+ ],
3710
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
3711
+ "model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
3712
+ },
3713
+ {
3714
+ "model_format": "pytorch",
3715
+ "model_size_in_billions": 7,
3716
+ "quantizations": [
3717
+ "none"
3718
+ ],
3719
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
3720
+ "model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
3721
+ }
3722
+ ],
3723
+ "prompt_style": {
3724
+ "style_name": "DEEPSEEK_CHAT",
3725
+ "system_prompt": "<|begin▁of▁sentence|>",
3726
+ "roles": [
3727
+ "User",
3728
+ "Assistant"
3729
+ ],
3730
+ "intra_message_sep": "\n\n",
3731
+ "inter_message_sep": "<|end▁of▁sentence|>",
3732
+ "stop": [
3733
+ "<|end▁of▁sentence|>"
3734
+ ]
3735
+ }
3736
+ },
3737
+ {
3738
+ "version": 1,
3739
+ "context_length": 4096,
3740
+ "model_name": "deepseek-chat",
3741
+ "model_lang": [
3742
+ "en",
3743
+ "zh"
3744
+ ],
3745
+ "model_ability": [
3746
+ "chat"
3747
+ ],
3748
+ "model_description": "DeepSeek LLM is an advanced language model comprising 67 billion parameters. It has been trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese.",
3749
+ "model_specs": [
3750
+ {
3751
+ "model_format": "pytorch",
3752
+ "model_size_in_billions": 7,
3753
+ "quantizations": [
3754
+ "4-bit",
3755
+ "8-bit",
3376
3756
  "none"
3377
3757
  ],
3378
3758
  "model_id": "deepseek-ai/deepseek-llm-7b-chat",
@@ -3662,6 +4042,48 @@
3662
4042
  ]
3663
4043
  }
3664
4044
  },
4045
+ {
4046
+ "version":1,
4047
+ "context_length":2048,
4048
+ "model_name":"OmniLMM",
4049
+ "model_lang":[
4050
+ "en",
4051
+ "zh"
4052
+ ],
4053
+ "model_ability":[
4054
+ "chat",
4055
+ "vision"
4056
+ ],
4057
+ "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
4058
+ "model_specs":[
4059
+ {
4060
+ "model_format":"pytorch",
4061
+ "model_size_in_billions":3,
4062
+ "quantizations":[
4063
+ "none"
4064
+ ],
4065
+ "model_id":"openbmb/MiniCPM-V",
4066
+ "model_revision":"bec7d1cd1c9e804c064ec291163e40624825eaaa"
4067
+ },
4068
+ {
4069
+ "model_format":"pytorch",
4070
+ "model_size_in_billions":12,
4071
+ "quantizations":[
4072
+ "none"
4073
+ ],
4074
+ "model_id":"openbmb/OmniLMM-12B",
4075
+ "model_revision":"ef62bae5af34be653b9801037cd613e05ab24fdc"
4076
+ }
4077
+ ],
4078
+ "prompt_style":{
4079
+ "style_name":"OmniLMM",
4080
+ "system_prompt":"The role of first msg should be user",
4081
+ "roles":[
4082
+ "user",
4083
+ "assistant"
4084
+ ]
4085
+ }
4086
+ },
3665
4087
  {
3666
4088
  "version": 1,
3667
4089
  "context_length": 4096,
@@ -3888,5 +4310,447 @@
3888
4310
  "<start_of_turn>"
3889
4311
  ]
3890
4312
  }
4313
+ },
4314
+ {
4315
+ "version": 1,
4316
+ "context_length": 4096,
4317
+ "model_name": "platypus2-70b-instruct",
4318
+ "model_lang": [
4319
+ "en"
4320
+ ],
4321
+ "model_ability": [
4322
+ "generate"
4323
+ ],
4324
+ "model_description": "Platypus-70B-instruct is a merge of garage-bAInd/Platypus2-70B and upstage/Llama-2-70b-instruct-v2.",
4325
+ "model_specs": [
4326
+ {
4327
+ "model_format": "pytorch",
4328
+ "model_size_in_billions": 70,
4329
+ "quantizations": [
4330
+ "none"
4331
+ ],
4332
+ "model_id": "garage-bAInd/Platypus2-70B-instruct",
4333
+ "model_revision": "31389b50953688e4e542be53e6d2ab04d5c34e87"
4334
+ }
4335
+ ]
4336
+ },
4337
+ {
4338
+ "version": 1,
4339
+ "context_length": 2048,
4340
+ "model_name": "aquila2",
4341
+ "model_lang": [
4342
+ "zh"
4343
+ ],
4344
+ "model_ability": [
4345
+ "generate"
4346
+ ],
4347
+ "model_description": "Aquila2 series models are the base language models",
4348
+ "model_specs": [
4349
+ {
4350
+ "model_format": "pytorch",
4351
+ "model_size_in_billions": 7,
4352
+ "quantizations": [
4353
+ "none"
4354
+ ],
4355
+ "model_id": "BAAI/Aquila2-7B",
4356
+ "model_revision": "9c76e143c6e9621689ca76e078c465b0dee75eb8"
4357
+ },
4358
+ {
4359
+ "model_format": "pytorch",
4360
+ "model_size_in_billions": 34,
4361
+ "quantizations": [
4362
+ "none"
4363
+ ],
4364
+ "model_id": "BAAI/Aquila2-34B",
4365
+ "model_revision": "356733caf6221e9dd898cde8ff189a98175526ec"
4366
+ },
4367
+ {
4368
+ "model_format": "pytorch",
4369
+ "model_size_in_billions": 70,
4370
+ "quantizations": [
4371
+ "none"
4372
+ ],
4373
+ "model_id": "BAAI/Aquila2-70B-Expr",
4374
+ "model_revision": "32a2897235541b9f5238bbe88f8d76a19993c0ba"
4375
+ }
4376
+ ]
4377
+ },
4378
+ {
4379
+ "version": 1,
4380
+ "context_length": 2048,
4381
+ "model_name": "aquila2-chat",
4382
+ "model_lang": [
4383
+ "zh"
4384
+ ],
4385
+ "model_ability": [
4386
+ "chat"
4387
+ ],
4388
+ "model_description": "Aquila2-chat series models are the chat models",
4389
+ "model_specs": [
4390
+ {
4391
+ "model_format": "pytorch",
4392
+ "model_size_in_billions": 7,
4393
+ "quantizations": [
4394
+ "none"
4395
+ ],
4396
+ "model_id": "BAAI/AquilaChat2-7B",
4397
+ "model_revision": "0d060c4edeb4e0febd81130c17f6868653184fb3"
4398
+ },
4399
+ {
4400
+ "model_format": "ggufv2",
4401
+ "model_size_in_billions": 34,
4402
+ "quantizations": [
4403
+ "Q2_K",
4404
+ "Q3_K_L",
4405
+ "Q3_K_M",
4406
+ "Q3_K_S",
4407
+ "Q4_0",
4408
+ "Q4_K_M",
4409
+ "Q4_K_S",
4410
+ "Q5_0",
4411
+ "Q5_K_M",
4412
+ "Q5_K_S",
4413
+ "Q6_K",
4414
+ "Q8_0"
4415
+ ],
4416
+ "model_id": "TheBloke/AquilaChat2-34B-GGUF",
4417
+ "model_file_name_template": "aquilachat2-34b.{quantization}.gguf"
4418
+ },
4419
+ {
4420
+ "model_format": "gptq",
4421
+ "model_size_in_billions": 34,
4422
+ "quantizations": [
4423
+ "Int4"
4424
+ ],
4425
+ "model_id": "TheBloke/AquilaChat2-34B-GPTQ",
4426
+ "model_revision": "9a9d21424f7db608be51df769885514ab6e052db"
4427
+ },
4428
+ {
4429
+ "model_format": "awq",
4430
+ "model_size_in_billions": "34",
4431
+ "quantizations": [
4432
+ "Int4"
4433
+ ],
4434
+ "model_id": "TheBloke/AquilaChat2-34B-AWQ",
4435
+ "model_revision": "ad1dec1c8adb7fa6cb07b7e261aaa04fccf1c4c0"
4436
+ },
4437
+ {
4438
+ "model_format": "pytorch",
4439
+ "model_size_in_billions": 34,
4440
+ "quantizations": [
4441
+ "none"
4442
+ ],
4443
+ "model_id": "BAAI/AquilaChat2-34B",
4444
+ "model_revision": "b9cd9c7436435ab9cfa5e4f009be2b0354979ca8"
4445
+ },
4446
+ {
4447
+ "model_format": "pytorch",
4448
+ "model_size_in_billions": 70,
4449
+ "quantizations": [
4450
+ "none"
4451
+ ],
4452
+ "model_id": "BAAI/AquilaChat2-70B-Expr",
4453
+ "model_revision": "0df19b6e10f1a19ca663f7cc1141aae10f1825f4"
4454
+ }
4455
+ ],
4456
+ "prompt_style": {
4457
+ "style_name": "ADD_COLON_SINGLE",
4458
+ "intra_message_sep": "\n",
4459
+ "system_prompt": "",
4460
+ "roles": [
4461
+ "USER",
4462
+ "ASSISTANT"
4463
+ ],
4464
+ "stop_token_ids": [
4465
+ 100006,
4466
+ 100007
4467
+ ],
4468
+ "stop": [
4469
+ "[CLS]",
4470
+ "</s>"
4471
+ ]
4472
+ }
4473
+ },
4474
+ {
4475
+ "version": 1,
4476
+ "context_length": 16384,
4477
+ "model_name": "aquila2-chat-16k",
4478
+ "model_lang": [
4479
+ "zh"
4480
+ ],
4481
+ "model_ability": [
4482
+ "chat"
4483
+ ],
4484
+ "model_description": "AquilaChat2-16k series models are the long-text chat models",
4485
+ "model_specs": [
4486
+ {
4487
+ "model_format": "pytorch",
4488
+ "model_size_in_billions": 7,
4489
+ "quantizations": [
4490
+ "none"
4491
+ ],
4492
+ "model_id": "BAAI/AquilaChat2-7B-16K",
4493
+ "model_revision": "fb46d48479d05086ccf6952f19018322fcbb54cd"
4494
+ },
4495
+ {
4496
+ "model_format": "ggufv2",
4497
+ "model_size_in_billions": 34,
4498
+ "quantizations": [
4499
+ "Q2_K",
4500
+ "Q3_K_L",
4501
+ "Q3_K_M",
4502
+ "Q3_K_S",
4503
+ "Q4_0",
4504
+ "Q4_K_M",
4505
+ "Q4_K_S",
4506
+ "Q5_0",
4507
+ "Q5_K_M",
4508
+ "Q5_K_S",
4509
+ "Q6_K",
4510
+ "Q8_0"
4511
+ ],
4512
+ "model_id": "TheBloke/AquilaChat2-34B-16K-GGUF",
4513
+ "model_file_name_template": "aquilachat2-34b-16k.{quantization}.gguf"
4514
+ },
4515
+ {
4516
+ "model_format": "gptq",
4517
+ "model_size_in_billions": 34,
4518
+ "quantizations": [
4519
+ "Int4"
4520
+ ],
4521
+ "model_id": "TheBloke/AquilaChat2-34B-16K-GPTQ",
4522
+ "model_revision": "0afa1c2a55a4ee1a6f0dba81d9ec296dc7936b91"
4523
+ },
4524
+ {
4525
+ "model_format": "awq",
4526
+ "model_size_in_billions": 34,
4527
+ "quantizations": [
4528
+ "Int4"
4529
+ ],
4530
+ "model_id": "TheBloke/AquilaChat2-34B-16K-AWQ",
4531
+ "model_revision": "db7403ca492416903c84a7a38b11cb5506de48b1"
4532
+ },
4533
+ {
4534
+ "model_format": "pytorch",
4535
+ "model_size_in_billions": 34,
4536
+ "quantizations": [
4537
+ "none"
4538
+ ],
4539
+ "model_id": "BAAI/AquilaChat2-34B-16K",
4540
+ "model_revision": "a06fd164c7170714924d2881c61c8348425ebc94"
4541
+ }
4542
+ ],
4543
+ "prompt_style": {
4544
+ "style_name": "ADD_COLON_SINGLE",
4545
+ "intra_message_sep": "\n",
4546
+ "system_prompt": "",
4547
+ "roles": [
4548
+ "USER",
4549
+ "ASSISTANT"
4550
+ ],
4551
+ "stop_token_ids": [
4552
+ 100006,
4553
+ 100007
4554
+ ],
4555
+ "stop": [
4556
+ "[CLS]",
4557
+ "</s>"
4558
+ ]
4559
+ }
4560
+ },
4561
+ {
4562
+ "version": 1,
4563
+ "context_length": 4096,
4564
+ "model_name": "minicpm-2b-sft-bf16",
4565
+ "model_lang": [
4566
+ "zh"
4567
+ ],
4568
+ "model_ability": [
4569
+ "chat"
4570
+ ],
4571
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4572
+ "model_specs": [
4573
+ {
4574
+ "model_format": "pytorch",
4575
+ "model_size_in_billions": 2,
4576
+ "quantizations": [
4577
+ "none"
4578
+ ],
4579
+ "model_id": "openbmb/MiniCPM-2B-sft-bf16",
4580
+ "model_revision": "fe1d74027ebdd81cef5f815fa3a2d432a6b5de2a"
4581
+ }
4582
+ ],
4583
+ "prompt_style": {
4584
+ "style_name": "MINICPM-2B",
4585
+ "system_prompt": "",
4586
+ "roles": [
4587
+ "user",
4588
+ "assistant"
4589
+ ],
4590
+ "stop_token_ids": [
4591
+ 1,
4592
+ 2
4593
+ ],
4594
+ "stop": [
4595
+ "<s>",
4596
+ "</s>"
4597
+ ]
4598
+ }
4599
+ },
4600
+ {
4601
+ "version": 1,
4602
+ "context_length": 4096,
4603
+ "model_name": "minicpm-2b-sft-fp32",
4604
+ "model_lang": [
4605
+ "zh"
4606
+ ],
4607
+ "model_ability": [
4608
+ "chat"
4609
+ ],
4610
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4611
+ "model_specs": [
4612
+ {
4613
+ "model_format": "pytorch",
4614
+ "model_size_in_billions": 2,
4615
+ "quantizations": [
4616
+ "none"
4617
+ ],
4618
+ "model_id": "openbmb/MiniCPM-2B-sft-fp32",
4619
+ "model_revision": "35b90dd57d977b6e5bc4907986fa5b77aa15a82e"
4620
+ }
4621
+ ],
4622
+ "prompt_style": {
4623
+ "style_name": "MINICPM-2B",
4624
+ "system_prompt": "",
4625
+ "roles": [
4626
+ "user",
4627
+ "assistant"
4628
+ ],
4629
+ "stop_token_ids": [
4630
+ 1,
4631
+ 2
4632
+ ],
4633
+ "stop": [
4634
+ "<s>",
4635
+ "</s>"
4636
+ ]
4637
+ }
4638
+ },
4639
+ {
4640
+ "version": 1,
4641
+ "context_length": 4096,
4642
+ "model_name": "minicpm-2b-dpo-bf16",
4643
+ "model_lang": [
4644
+ "zh"
4645
+ ],
4646
+ "model_ability": [
4647
+ "chat"
4648
+ ],
4649
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4650
+ "model_specs": [
4651
+ {
4652
+ "model_format": "pytorch",
4653
+ "model_size_in_billions": 2,
4654
+ "quantizations": [
4655
+ "none"
4656
+ ],
4657
+ "model_id": "openbmb/MiniCPM-2B-dpo-bf16",
4658
+ "model_revision": "f4a3ba49f3f18695945c2a7c12400d4da99da498"
4659
+ }
4660
+ ],
4661
+ "prompt_style": {
4662
+ "style_name": "MINICPM-2B",
4663
+ "system_prompt": "",
4664
+ "roles": [
4665
+ "user",
4666
+ "assistant"
4667
+ ],
4668
+ "stop_token_ids": [
4669
+ 1,
4670
+ 2
4671
+ ],
4672
+ "stop": [
4673
+ "<s>",
4674
+ "</s>"
4675
+ ]
4676
+ }
4677
+ },
4678
+ {
4679
+ "version": 1,
4680
+ "context_length": 4096,
4681
+ "model_name": "minicpm-2b-dpo-fp16",
4682
+ "model_lang": [
4683
+ "zh"
4684
+ ],
4685
+ "model_ability": [
4686
+ "chat"
4687
+ ],
4688
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4689
+ "model_specs": [
4690
+ {
4691
+ "model_format": "pytorch",
4692
+ "model_size_in_billions": 2,
4693
+ "quantizations": [
4694
+ "none"
4695
+ ],
4696
+ "model_id": "openbmb/MiniCPM-2B-dpo-fp16",
4697
+ "model_revision": "e7a50289e4f839674cf8d4a5a2ce032ccacf64ac"
4698
+ }
4699
+ ],
4700
+ "prompt_style": {
4701
+ "style_name": "MINICPM-2B",
4702
+ "system_prompt": "",
4703
+ "roles": [
4704
+ "user",
4705
+ "assistant"
4706
+ ],
4707
+ "stop_token_ids": [
4708
+ 1,
4709
+ 2
4710
+ ],
4711
+ "stop": [
4712
+ "<s>",
4713
+ "</s>"
4714
+ ]
4715
+ }
4716
+ },
4717
+ {
4718
+ "version": 1,
4719
+ "context_length": 4096,
4720
+ "model_name": "minicpm-2b-dpo-fp32",
4721
+ "model_lang": [
4722
+ "zh"
4723
+ ],
4724
+ "model_ability": [
4725
+ "chat"
4726
+ ],
4727
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
4728
+ "model_specs": [
4729
+ {
4730
+ "model_format": "pytorch",
4731
+ "model_size_in_billions": 2,
4732
+ "quantizations": [
4733
+ "none"
4734
+ ],
4735
+ "model_id": "openbmb/MiniCPM-2B-dpo-fp32",
4736
+ "model_revision": "b560a1593779b735a84a6daf72fba96ae38da288"
4737
+ }
4738
+ ],
4739
+ "prompt_style": {
4740
+ "style_name": "MINICPM-2B",
4741
+ "system_prompt": "",
4742
+ "roles": [
4743
+ "user",
4744
+ "assistant"
4745
+ ],
4746
+ "stop_token_ids": [
4747
+ 1,
4748
+ 2
4749
+ ],
4750
+ "stop": [
4751
+ "<s>",
4752
+ "</s>"
4753
+ ]
4754
+ }
3891
4755
  }
3892
4756
  ]