xinference 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +15 -34
  3. xinference/client/restful/restful_client.py +2 -2
  4. xinference/core/chat_interface.py +45 -10
  5. xinference/core/image_interface.py +9 -0
  6. xinference/core/model.py +8 -5
  7. xinference/core/scheduler.py +1 -2
  8. xinference/core/worker.py +49 -42
  9. xinference/deploy/cmdline.py +2 -2
  10. xinference/deploy/test/test_cmdline.py +7 -7
  11. xinference/model/audio/chattts.py +24 -9
  12. xinference/model/audio/core.py +8 -2
  13. xinference/model/audio/fish_speech.py +228 -0
  14. xinference/model/audio/model_spec.json +8 -0
  15. xinference/model/embedding/core.py +23 -1
  16. xinference/model/image/model_spec.json +2 -1
  17. xinference/model/image/model_spec_modelscope.json +2 -1
  18. xinference/model/image/stable_diffusion/core.py +49 -1
  19. xinference/model/llm/__init__.py +26 -27
  20. xinference/model/llm/{ggml/llamacpp.py → llama_cpp/core.py} +2 -35
  21. xinference/model/llm/llm_family.json +606 -1266
  22. xinference/model/llm/llm_family.py +16 -139
  23. xinference/model/llm/llm_family_modelscope.json +276 -313
  24. xinference/model/llm/lmdeploy/__init__.py +0 -0
  25. xinference/model/llm/lmdeploy/core.py +557 -0
  26. xinference/model/llm/memory.py +9 -9
  27. xinference/model/llm/sglang/core.py +2 -2
  28. xinference/model/llm/{pytorch → transformers}/chatglm.py +6 -13
  29. xinference/model/llm/{pytorch → transformers}/cogvlm2.py +4 -45
  30. xinference/model/llm/transformers/cogvlm2_video.py +524 -0
  31. xinference/model/llm/{pytorch → transformers}/core.py +3 -10
  32. xinference/model/llm/{pytorch → transformers}/glm4v.py +2 -23
  33. xinference/model/llm/transformers/intern_vl.py +540 -0
  34. xinference/model/llm/{pytorch → transformers}/internlm2.py +4 -8
  35. xinference/model/llm/{pytorch → transformers}/minicpmv25.py +2 -23
  36. xinference/model/llm/{pytorch → transformers}/minicpmv26.py +66 -41
  37. xinference/model/llm/{pytorch → transformers}/utils.py +1 -2
  38. xinference/model/llm/{pytorch → transformers}/yi_vl.py +2 -24
  39. xinference/model/llm/utils.py +85 -70
  40. xinference/model/llm/vllm/core.py +110 -11
  41. xinference/model/utils.py +1 -95
  42. xinference/thirdparty/fish_speech/__init__.py +0 -0
  43. xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
  44. xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
  45. xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
  46. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  47. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  48. xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
  49. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  50. xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
  51. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  52. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
  53. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
  54. xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
  55. xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
  56. xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
  57. xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
  58. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  59. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
  60. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
  61. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
  62. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
  63. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
  64. xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
  65. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  66. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
  67. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
  68. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
  69. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
  70. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
  71. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
  72. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  73. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
  74. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
  75. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
  76. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
  77. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
  78. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
  79. xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
  80. xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
  81. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
  82. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
  83. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
  84. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
  85. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
  86. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
  87. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
  88. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
  89. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
  90. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
  91. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
  92. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
  93. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
  94. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
  95. xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
  96. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
  97. xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
  98. xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
  99. xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
  100. xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
  101. xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
  102. xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
  103. xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
  104. xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
  105. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
  106. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  107. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
  108. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
  109. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  110. xinference/thirdparty/fish_speech/tools/api.py +495 -0
  111. xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
  112. xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
  113. xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
  114. xinference/thirdparty/fish_speech/tools/file.py +108 -0
  115. xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
  116. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  117. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
  118. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
  119. xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
  120. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
  121. xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
  122. xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
  123. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
  124. xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
  125. xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
  126. xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
  127. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
  128. xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
  129. xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
  130. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  131. xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
  132. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
  133. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
  134. xinference/thirdparty/fish_speech/tools/webui.py +619 -0
  135. xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
  136. xinference/thirdparty/internvl/__init__.py +0 -0
  137. xinference/thirdparty/internvl/conversation.py +393 -0
  138. xinference/thirdparty/omnilmm/model/utils.py +16 -1
  139. xinference/web/ui/build/asset-manifest.json +3 -3
  140. xinference/web/ui/build/index.html +1 -1
  141. xinference/web/ui/build/static/js/main.661c7b0a.js +3 -0
  142. xinference/web/ui/build/static/js/{main.17ca0398.js.map → main.661c7b0a.js.map} +1 -1
  143. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +1 -0
  146. xinference/web/ui/node_modules/.cache/babel-loader/5391543180fead1eeef5364300301498d58a7d91d62de3841a32768b67f4552f.json +1 -0
  147. xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +1 -0
  148. xinference/web/ui/node_modules/.cache/babel-loader/714c37ce0ec5b5c591033f02be2f3f491fdd70da3ef568ee4a4f94689a3d5ca2.json +1 -0
  149. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +1 -0
  150. xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +1 -0
  151. xinference/web/ui/node_modules/.cache/babel-loader/a797831de0dc74897f4b50b3426555d748f328b4c2cc391de709eadaf6a5f3e3.json +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +1 -0
  153. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +1 -0
  154. xinference/web/ui/node_modules/.cache/babel-loader/e91938976f229ce986b2907e51e1f00540b584ced0a315d498c172d13220739d.json +1 -0
  155. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +1 -0
  156. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/METADATA +22 -13
  157. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/RECORD +170 -79
  158. xinference/locale/utils.py +0 -39
  159. xinference/locale/zh_CN.json +0 -26
  160. xinference/model/llm/ggml/tools/__init__.py +0 -15
  161. xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +0 -498
  162. xinference/model/llm/ggml/tools/gguf.py +0 -884
  163. xinference/model/llm/pytorch/__init__.py +0 -13
  164. xinference/model/llm/pytorch/baichuan.py +0 -81
  165. xinference/model/llm/pytorch/falcon.py +0 -138
  166. xinference/model/llm/pytorch/intern_vl.py +0 -352
  167. xinference/model/llm/pytorch/vicuna.py +0 -69
  168. xinference/web/ui/build/static/js/main.17ca0398.js +0 -3
  169. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +0 -1
  170. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
  171. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +0 -1
  172. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +0 -1
  173. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +0 -1
  174. xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +0 -1
  175. xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +0 -1
  176. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +0 -1
  177. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +0 -1
  178. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +0 -1
  179. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +0 -1
  180. xinference/web/ui/node_modules/.cache/babel-loader/f28b83886159d83b84f099b05d607a822dca4dd7f2d8aa6d56fe08bab0b5b086.json +0 -1
  181. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +0 -1
  182. /xinference/{locale → model/llm/llama_cpp}/__init__.py +0 -0
  183. /xinference/model/llm/{ggml → transformers}/__init__.py +0 -0
  184. /xinference/model/llm/{pytorch → transformers}/compression.py +0 -0
  185. /xinference/model/llm/{pytorch → transformers}/deepseek_vl.py +0 -0
  186. /xinference/model/llm/{pytorch → transformers}/llama_2.py +0 -0
  187. /xinference/model/llm/{pytorch → transformers}/omnilmm.py +0 -0
  188. /xinference/model/llm/{pytorch → transformers}/qwen_vl.py +0 -0
  189. /xinference/model/llm/{pytorch → transformers}/tensorizer_utils.py +0 -0
  190. /xinference/web/ui/build/static/js/{main.17ca0398.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
  191. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
  192. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
  193. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
  194. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
@@ -1,103 +1,4 @@
1
1
  [
2
- {
3
- "version": 1,
4
- "context_length": 4096,
5
- "model_name": "baichuan",
6
- "model_lang": [
7
- "en",
8
- "zh"
9
- ],
10
- "model_ability": [
11
- "generate"
12
- ],
13
- "model_description": "Baichuan is an open-source Transformer based LLM that is trained on both Chinese and English data.",
14
- "model_specs": [
15
- {
16
- "model_format": "ggmlv3",
17
- "model_size_in_billions": 7,
18
- "quantizations": [
19
- "q2_K",
20
- "q3_K_L",
21
- "q3_K_M",
22
- "q3_K_S",
23
- "q4_0",
24
- "q4_1",
25
- "q4_K_M",
26
- "q4_K_S",
27
- "q5_0",
28
- "q5_1",
29
- "q5_K_M",
30
- "q5_K_S",
31
- "q6_K",
32
- "q8_0"
33
- ],
34
- "model_id": "TheBloke/baichuan-llama-7B-GGML",
35
- "model_file_name_template": "baichuan-llama-7b.ggmlv3.{quantization}.bin"
36
- },
37
- {
38
- "model_format": "pytorch",
39
- "model_size_in_billions": 7,
40
- "quantizations": [
41
- "4-bit",
42
- "8-bit",
43
- "none"
44
- ],
45
- "model_id": "baichuan-inc/Baichuan-7B",
46
- "model_revision": "c1a5c7d5b7f50ecc51bb0e08150a9f12e5656756"
47
- },
48
- {
49
- "model_format": "pytorch",
50
- "model_size_in_billions": 13,
51
- "quantizations": [
52
- "4-bit",
53
- "8-bit",
54
- "none"
55
- ],
56
- "model_id": "baichuan-inc/Baichuan-13B-Base",
57
- "model_revision": "0ef0739c7bdd34df954003ef76d80f3dabca2ff9"
58
- }
59
- ]
60
- },
61
- {
62
- "version": 1,
63
- "context_length": 4096,
64
- "model_name": "baichuan-chat",
65
- "model_lang": [
66
- "en",
67
- "zh"
68
- ],
69
- "model_ability": [
70
- "chat"
71
- ],
72
- "model_description": "Baichuan-chat is a fine-tuned version of the Baichuan LLM, specializing in chatting.",
73
- "model_specs": [
74
- {
75
- "model_format": "pytorch",
76
- "model_size_in_billions": 13,
77
- "quantizations": [
78
- "4-bit",
79
- "8-bit",
80
- "none"
81
- ],
82
- "model_id": "baichuan-inc/Baichuan-13B-Chat",
83
- "model_revision": "19ef51ba5bad8935b03acd20ff04a269210983bc"
84
- }
85
- ],
86
- "prompt_style": {
87
- "style_name": "NO_COLON_TWO",
88
- "system_prompt": "",
89
- "roles": [
90
- " <reserved_102> ",
91
- " <reserved_103> "
92
- ],
93
- "intra_message_sep": "",
94
- "inter_message_sep": "</s>",
95
- "stop_token_ids": [
96
- 2,
97
- 195
98
- ]
99
- }
100
- },
101
2
  {
102
3
  "version": 1,
103
4
  "context_length": 8194,
@@ -164,258 +65,6 @@
164
65
  ]
165
66
  }
166
67
  },
167
- {
168
- "version": 1,
169
- "context_length": 2048,
170
- "model_name": "wizardlm-v1.0",
171
- "model_lang": [
172
- "en"
173
- ],
174
- "model_ability": [
175
- "chat"
176
- ],
177
- "model_description": "WizardLM is an open-source LLM trained by fine-tuning LLaMA with Evol-Instruct.",
178
- "model_specs": [
179
- {
180
- "model_format": "ggmlv3",
181
- "model_size_in_billions": 7,
182
- "quantizations": [
183
- "q2_K",
184
- "q3_K_L",
185
- "q3_K_M",
186
- "q3_K_S",
187
- "q4_0",
188
- "q4_1",
189
- "q4_K_M",
190
- "q4_K_S",
191
- "q5_0",
192
- "q5_1",
193
- "q5_K_M",
194
- "q5_K_S",
195
- "q6_K",
196
- "q8_0"
197
- ],
198
- "model_id": "TheBloke/WizardLM-7B-V1.0-Uncensored-GGML",
199
- "model_file_name_template": "wizardlm-7b-v1.0-uncensored.ggmlv3.{quantization}.bin"
200
- },
201
- {
202
- "model_format": "ggmlv3",
203
- "model_size_in_billions": 13,
204
- "quantizations": [
205
- "q2_K",
206
- "q3_K_L",
207
- "q3_K_M",
208
- "q3_K_S",
209
- "q4_0",
210
- "q4_1",
211
- "q4_K_M",
212
- "q4_K_S",
213
- "q5_0",
214
- "q5_1",
215
- "q5_K_M",
216
- "q5_K_S",
217
- "q6_K",
218
- "q8_0"
219
- ],
220
- "model_id": "TheBloke/WizardLM-13B-V1.0-Uncensored-GGML",
221
- "model_file_name_template": "wizardlm-13b-v1.0-uncensored.ggmlv3.{quantization}.bin"
222
- }
223
- ],
224
- "prompt_style": {
225
- "style_name": "ADD_COLON_SINGLE",
226
- "system_prompt": "You are a helpful AI assistant.",
227
- "roles": [
228
- "USER",
229
- "ASSISTANT"
230
- ],
231
- "intra_message_sep": "\n"
232
- }
233
- },
234
- {
235
- "version": 1,
236
- "context_length": 2048,
237
- "model_name": "vicuna-v1.3",
238
- "model_lang": [
239
- "en"
240
- ],
241
- "model_ability": [
242
- "chat"
243
- ],
244
- "model_description": "Vicuna is an open-source LLM trained by fine-tuning LLaMA on data collected from ShareGPT.",
245
- "model_specs": [
246
- {
247
- "model_format": "ggmlv3",
248
- "model_size_in_billions": 7,
249
- "quantizations": [
250
- "q2_K",
251
- "q3_K_L",
252
- "q3_K_M",
253
- "q3_K_S",
254
- "q4_0",
255
- "q4_1",
256
- "q4_K_M",
257
- "q4_K_S",
258
- "q5_0",
259
- "q5_1",
260
- "q5_K_M",
261
- "q5_K_S",
262
- "q6_K",
263
- "q8_0"
264
- ],
265
- "model_id": "TheBloke/vicuna-7B-v1.3-GGML",
266
- "model_file_name_template": "vicuna-7b-v1.3.ggmlv3.{quantization}.bin"
267
- },
268
- {
269
- "model_format": "ggmlv3",
270
- "model_size_in_billions": 13,
271
- "quantizations": [
272
- "q2_K",
273
- "q3_K_L",
274
- "q3_K_M",
275
- "q3_K_S",
276
- "q4_0",
277
- "q4_1",
278
- "q4_K_M",
279
- "q4_K_S",
280
- "q5_0",
281
- "q5_1",
282
- "q5_K_M",
283
- "q5_K_S",
284
- "q6_K",
285
- "q8_0"
286
- ],
287
- "model_id": "TheBloke/vicuna-13b-v1.3.0-GGML",
288
- "model_file_name_template": "vicuna-13b-v1.3.0.ggmlv3.{quantization}.bin"
289
- },
290
- {
291
- "model_format": "ggmlv3",
292
- "model_size_in_billions": 33,
293
- "quantizations": [
294
- "q2_K",
295
- "q3_K_L",
296
- "q3_K_M",
297
- "q3_K_S",
298
- "q4_0",
299
- "q4_1",
300
- "q4_K_M",
301
- "q4_K_S",
302
- "q5_0",
303
- "q5_1",
304
- "q5_K_M",
305
- "q5_K_S",
306
- "q6_K",
307
- "q8_0"
308
- ],
309
- "model_id": "TheBloke/vicuna-33B-GGML",
310
- "model_file_name_template": "vicuna-33b.ggmlv3.{quantization}.bin"
311
- },
312
- {
313
- "model_format": "pytorch",
314
- "model_size_in_billions": 33,
315
- "quantizations": [
316
- "4-bit",
317
- "8-bit",
318
- "none"
319
- ],
320
- "model_id": "lmsys/vicuna-33b-v1.3",
321
- "model_revision": "ef8d6becf883fb3ce52e3706885f761819477ab4"
322
- },
323
- {
324
- "model_format": "pytorch",
325
- "model_size_in_billions": 13,
326
- "quantizations": [
327
- "4-bit",
328
- "8-bit",
329
- "none"
330
- ],
331
- "model_id": "lmsys/vicuna-13b-v1.3",
332
- "model_revision": "6566e9cb1787585d1147dcf4f9bc48f29e1328d2"
333
- },
334
- {
335
- "model_format": "pytorch",
336
- "model_size_in_billions": 7,
337
- "quantizations": [
338
- "4-bit",
339
- "8-bit",
340
- "none"
341
- ],
342
- "model_id": "lmsys/vicuna-7b-v1.3",
343
- "model_revision": "236eeeab96f0dc2e463f2bebb7bb49809279c6d6"
344
- }
345
- ],
346
- "prompt_style": {
347
- "style_name": "ADD_COLON_TWO",
348
- "system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
349
- "roles": [
350
- "USER",
351
- "ASSISTANT"
352
- ],
353
- "intra_message_sep": " ",
354
- "inter_message_sep": "</s>"
355
- }
356
- },
357
- {
358
- "version": 1,
359
- "context_length": 2048,
360
- "model_name": "orca",
361
- "model_lang": [
362
- "en"
363
- ],
364
- "model_ability": [
365
- "chat"
366
- ],
367
- "model_description": "Orca is an LLM trained by fine-tuning LLaMA on explanation traces obtained from GPT-4.",
368
- "model_specs": [
369
- {
370
- "model_format": "ggmlv3",
371
- "model_size_in_billions": 3,
372
- "quantizations": [
373
- "q4_0",
374
- "q4_1",
375
- "q5_0",
376
- "q5_1",
377
- "q8_0"
378
- ],
379
- "model_id": "TheBloke/orca_mini_3B-GGML",
380
- "model_file_name_template": "orca-mini-3b.ggmlv3.{quantization}.bin"
381
- },
382
- {
383
- "model_format": "ggmlv3",
384
- "model_size_in_billions": 7,
385
- "quantizations": [
386
- "q4_0",
387
- "q4_1",
388
- "q5_0",
389
- "q5_1",
390
- "q8_0"
391
- ],
392
- "model_id": "TheBloke/orca_mini_7B-GGML",
393
- "model_file_name_template": "orca-mini-7b.ggmlv3.{quantization}.bin"
394
- },
395
- {
396
- "model_format": "ggmlv3",
397
- "model_size_in_billions": 13,
398
- "quantizations": [
399
- "q4_0",
400
- "q4_1",
401
- "q5_0",
402
- "q5_1",
403
- "q8_0"
404
- ],
405
- "model_id": "TheBloke/orca_mini_13B-GGML",
406
- "model_file_name_template": "orca-mini-13b.ggmlv3.{quantization}.bin"
407
- }
408
- ],
409
- "prompt_style": {
410
- "style_name": "ADD_COLON_SINGLE",
411
- "system_prompt": "You are an AI assistant that follows instruction extremely well. Help as much as you can.",
412
- "roles": [
413
- "User",
414
- "Response"
415
- ],
416
- "intra_message_sep": "\n\n### "
417
- }
418
- },
419
68
  {
420
69
  "version": 1,
421
70
  "context_length": 2048,
@@ -561,111 +210,6 @@
561
210
  ]
562
211
  }
563
212
  },
564
- {
565
- "version": 1,
566
- "context_length": 2048,
567
- "model_name": "chatglm",
568
- "model_lang": [
569
- "en",
570
- "zh"
571
- ],
572
- "model_ability": [
573
- "chat"
574
- ],
575
- "model_description": "ChatGLM is an open-source General Language Model (GLM) based LLM trained on both Chinese and English data.",
576
- "model_specs": [
577
- {
578
- "model_format": "pytorch",
579
- "model_size_in_billions": 6,
580
- "quantizations": [
581
- "4-bit",
582
- "8-bit",
583
- "none"
584
- ],
585
- "model_id": "THUDM/chatglm-6b",
586
- "model_revision": "8b7d33596d18c5e83e2da052d05ca4db02e60620"
587
- }
588
- ],
589
- "prompt_style": {
590
- "style_name": "CHATGLM",
591
- "system_prompt": "",
592
- "roles": [
593
- "问",
594
- "答"
595
- ],
596
- "intra_message_sep": "\n"
597
- }
598
- },
599
- {
600
- "version": 1,
601
- "context_length": 8192,
602
- "model_name": "chatglm2",
603
- "model_lang": [
604
- "en",
605
- "zh"
606
- ],
607
- "model_ability": [
608
- "chat"
609
- ],
610
- "model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
611
- "model_specs": [
612
- {
613
- "model_format": "pytorch",
614
- "model_size_in_billions": 6,
615
- "quantizations": [
616
- "4-bit",
617
- "8-bit",
618
- "none"
619
- ],
620
- "model_id": "THUDM/chatglm2-6b",
621
- "model_revision": "7fabe56db91e085c9c027f56f1c654d137bdba40"
622
- }
623
- ],
624
- "prompt_style": {
625
- "style_name": "CHATGLM",
626
- "system_prompt": "",
627
- "roles": [
628
- "问",
629
- "答"
630
- ],
631
- "intra_message_sep": "\n\n"
632
- }
633
- },
634
- {
635
- "version": 1,
636
- "context_length": 32768,
637
- "model_name": "chatglm2-32k",
638
- "model_lang": [
639
- "en",
640
- "zh"
641
- ],
642
- "model_ability": [
643
- "chat"
644
- ],
645
- "model_description": "ChatGLM2-32k is a special version of ChatGLM2, with a context window of 32k tokens instead of 8k.",
646
- "model_specs": [
647
- {
648
- "model_format": "pytorch",
649
- "model_size_in_billions": 6,
650
- "quantizations": [
651
- "4-bit",
652
- "8-bit",
653
- "none"
654
- ],
655
- "model_id": "THUDM/chatglm2-6b-32k",
656
- "model_revision": "a2065f5dc8253f036a209e642d7220a942d92765"
657
- }
658
- ],
659
- "prompt_style": {
660
- "style_name": "CHATGLM",
661
- "system_prompt": "",
662
- "roles": [
663
- "问",
664
- "答"
665
- ],
666
- "intra_message_sep": "\n\n"
667
- }
668
- },
669
213
  {
670
214
  "version": 1,
671
215
  "context_length": 8192,
@@ -819,7 +363,7 @@
819
363
  "none"
820
364
  ],
821
365
  "model_id": "THUDM/glm-4-9b-chat",
822
- "model_revision": "76f3474a854145aa4a9ed2612fee9bc8d4a8966b"
366
+ "model_revision": "aae8bd74af5c6dff63a49d7fbdcc89349ebf87aa"
823
367
  },
824
368
  {
825
369
  "model_format": "ggufv2",
@@ -890,7 +434,7 @@
890
434
  "none"
891
435
  ],
892
436
  "model_id": "THUDM/glm-4-9b-chat-1m",
893
- "model_revision": "715ddbe91082f976ff6a4ca06d59e5bbff6c3642"
437
+ "model_revision": "0aa722c7e0745dd21453427dd44c257dd253304f"
894
438
  },
895
439
  {
896
440
  "model_format": "ggufv2",
@@ -1148,70 +692,73 @@
1148
692
  "model_description": "Llama-2-Chat is a fine-tuned version of the Llama-2 LLM, specializing in chatting.",
1149
693
  "model_specs": [
1150
694
  {
1151
- "model_format": "ggmlv3",
695
+ "model_format": "ggufv2",
1152
696
  "model_size_in_billions": 7,
1153
697
  "quantizations": [
1154
- "q2_K",
1155
- "q3_K_L",
1156
- "q3_K_M",
1157
- "q3_K_S",
1158
- "q4_0",
1159
- "q4_1",
1160
- "q4_K_M",
1161
- "q4_K_S",
1162
- "q5_0",
1163
- "q5_1",
1164
- "q5_K_M",
1165
- "q5_K_S",
1166
- "q6_K",
1167
- "q8_0"
698
+ "Q2_K",
699
+ "Q3_K_S",
700
+ "Q3_K_M",
701
+ "Q3_K_L",
702
+ "Q4_0",
703
+ "Q4_K_S",
704
+ "Q4_K_M",
705
+ "Q5_0",
706
+ "Q5_K_S",
707
+ "Q5_K_M",
708
+ "Q6_K",
709
+ "Q8_0"
1168
710
  ],
1169
- "model_id": "TheBloke/Llama-2-7B-Chat-GGML",
1170
- "model_file_name_template": "llama-2-7b-chat.ggmlv3.{quantization}.bin"
711
+ "model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
712
+ "model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
1171
713
  },
1172
714
  {
1173
- "model_format": "ggmlv3",
715
+ "model_format": "ggufv2",
1174
716
  "model_size_in_billions": 13,
1175
717
  "quantizations": [
1176
- "q2_K",
1177
- "q3_K_L",
1178
- "q3_K_M",
1179
- "q3_K_S",
1180
- "q4_0",
1181
- "q4_1",
1182
- "q4_K_M",
1183
- "q4_K_S",
1184
- "q5_0",
1185
- "q5_1",
1186
- "q5_K_M",
1187
- "q5_K_S",
1188
- "q6_K",
1189
- "q8_0"
718
+ "Q2_K",
719
+ "Q3_K_S",
720
+ "Q3_K_M",
721
+ "Q3_K_L",
722
+ "Q4_0",
723
+ "Q4_K_S",
724
+ "Q4_K_M",
725
+ "Q5_0",
726
+ "Q5_K_S",
727
+ "Q5_K_M",
728
+ "Q6_K",
729
+ "Q8_0"
1190
730
  ],
1191
- "model_id": "TheBloke/Llama-2-13B-chat-GGML",
1192
- "model_file_name_template": "llama-2-13b-chat.ggmlv3.{quantization}.bin"
731
+ "model_id": "TheBloke/Llama-2-13B-chat-GGUF",
732
+ "model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
1193
733
  },
1194
734
  {
1195
- "model_format": "ggmlv3",
735
+ "model_format": "ggufv2",
1196
736
  "model_size_in_billions": 70,
1197
737
  "quantizations": [
1198
- "q2_K",
1199
- "q3_K_L",
1200
- "q3_K_M",
1201
- "q3_K_S",
1202
- "q4_0",
1203
- "q4_1",
1204
- "q4_K_M",
1205
- "q4_K_S",
1206
- "q5_0",
1207
- "q5_1",
1208
- "q5_K_M",
1209
- "q5_K_S",
1210
- "q6_K",
1211
- "q8_0"
738
+ "Q2_K",
739
+ "Q3_K_S",
740
+ "Q3_K_M",
741
+ "Q3_K_L",
742
+ "Q4_0",
743
+ "Q4_K_S",
744
+ "Q4_K_M",
745
+ "Q5_0",
746
+ "Q5_K_S",
747
+ "Q5_K_M"
1212
748
  ],
1213
- "model_id": "TheBloke/Llama-2-70B-Chat-GGML",
1214
- "model_file_name_template": "llama-2-70b-chat.ggmlv3.{quantization}.bin"
749
+ "quantization_parts": {
750
+ "Q6_K": [
751
+ "split-a",
752
+ "split-b"
753
+ ],
754
+ "Q8_0": [
755
+ "split-a",
756
+ "split-b"
757
+ ]
758
+ },
759
+ "model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
760
+ "model_file_name_template": "llama-2-70b-chat.{quantization}.gguf",
761
+ "model_file_name_split_template": "llama-2-70b-chat.{quantization}.gguf-{part}"
1215
762
  },
1216
763
  {
1217
764
  "model_format": "pytorch",
@@ -1293,64 +840,6 @@
1293
840
  ],
1294
841
  "model_id": "meta-llama/Llama-2-70b-chat-hf",
1295
842
  "model_revision": "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
1296
- },
1297
- {
1298
- "model_format": "ggufv2",
1299
- "model_size_in_billions": 7,
1300
- "quantizations": [
1301
- "Q2_K",
1302
- "Q3_K_S",
1303
- "Q3_K_M",
1304
- "Q3_K_L",
1305
- "Q4_0",
1306
- "Q4_K_S",
1307
- "Q4_K_M",
1308
- "Q5_0",
1309
- "Q5_K_S",
1310
- "Q5_K_M",
1311
- "Q6_K",
1312
- "Q8_0"
1313
- ],
1314
- "model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
1315
- "model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
1316
- },
1317
- {
1318
- "model_format": "ggufv2",
1319
- "model_size_in_billions": 13,
1320
- "quantizations": [
1321
- "Q2_K",
1322
- "Q3_K_S",
1323
- "Q3_K_M",
1324
- "Q3_K_L",
1325
- "Q4_0",
1326
- "Q4_K_S",
1327
- "Q4_K_M",
1328
- "Q5_0",
1329
- "Q5_K_S",
1330
- "Q5_K_M",
1331
- "Q6_K",
1332
- "Q8_0"
1333
- ],
1334
- "model_id": "TheBloke/Llama-2-13B-chat-GGUF",
1335
- "model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
1336
- },
1337
- {
1338
- "model_format": "ggufv2",
1339
- "model_size_in_billions": 70,
1340
- "quantizations": [
1341
- "Q2_K",
1342
- "Q3_K_S",
1343
- "Q3_K_M",
1344
- "Q3_K_L",
1345
- "Q4_0",
1346
- "Q4_K_S",
1347
- "Q4_K_M",
1348
- "Q5_0",
1349
- "Q5_K_S",
1350
- "Q5_K_M"
1351
- ],
1352
- "model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
1353
- "model_file_name_template": "llama-2-70b-chat.{quantization}.gguf"
1354
843
  }
1355
844
  ],
1356
845
  "prompt_style": {
@@ -1383,26 +872,24 @@
1383
872
  "model_description": "Llama-2 is the second generation of Llama, open-source and trained on a larger amount of data.",
1384
873
  "model_specs": [
1385
874
  {
1386
- "model_format": "ggmlv3",
875
+ "model_format": "ggufv2",
1387
876
  "model_size_in_billions": 7,
1388
877
  "quantizations": [
1389
- "q2_K",
1390
- "q3_K_L",
1391
- "q3_K_M",
1392
- "q3_K_S",
1393
- "q4_0",
1394
- "q4_1",
1395
- "q4_K_M",
1396
- "q4_K_S",
1397
- "q5_0",
1398
- "q5_1",
1399
- "q5_K_M",
1400
- "q5_K_S",
1401
- "q6_K",
1402
- "q8_0"
878
+ "Q2_K",
879
+ "Q3_K_S",
880
+ "Q3_K_M",
881
+ "Q3_K_L",
882
+ "Q4_0",
883
+ "Q4_K_S",
884
+ "Q4_K_M",
885
+ "Q5_0",
886
+ "Q5_K_S",
887
+ "Q5_K_M",
888
+ "Q6_K",
889
+ "Q8_0"
1403
890
  ],
1404
- "model_id": "TheBloke/Llama-2-7B-GGML",
1405
- "model_file_name_template": "llama-2-7b.ggmlv3.{quantization}.bin"
891
+ "model_id": "TheBloke/Llama-2-7B-GGUF",
892
+ "model_file_name_template": "llama-2-7b.{quantization}.gguf"
1406
893
  },
1407
894
  {
1408
895
  "model_format": "gptq",
@@ -1421,48 +908,53 @@
1421
908
  "model_id": "TheBloke/Llama-2-7B-AWQ"
1422
909
  },
1423
910
  {
1424
- "model_format": "ggmlv3",
911
+ "model_format": "ggufv2",
1425
912
  "model_size_in_billions": 13,
1426
913
  "quantizations": [
1427
- "q2_K",
1428
- "q3_K_L",
1429
- "q3_K_M",
1430
- "q3_K_S",
1431
- "q4_0",
1432
- "q4_1",
1433
- "q4_K_M",
1434
- "q4_K_S",
1435
- "q5_0",
1436
- "q5_1",
1437
- "q5_K_M",
1438
- "q5_K_S",
1439
- "q6_K",
1440
- "q8_0"
914
+ "Q2_K",
915
+ "Q3_K_S",
916
+ "Q3_K_M",
917
+ "Q3_K_L",
918
+ "Q4_0",
919
+ "Q4_K_S",
920
+ "Q4_K_M",
921
+ "Q5_0",
922
+ "Q5_K_S",
923
+ "Q5_K_M",
924
+ "Q6_K",
925
+ "Q8_0"
1441
926
  ],
1442
- "model_id": "TheBloke/Llama-2-13B-GGML",
1443
- "model_file_name_template": "llama-2-13b.ggmlv3.{quantization}.bin"
927
+ "model_id": "TheBloke/Llama-2-13B-GGUF",
928
+ "model_file_name_template": "llama-2-13b.{quantization}.gguf"
1444
929
  },
1445
930
  {
1446
- "model_format": "ggmlv3",
931
+ "model_format": "ggufv2",
1447
932
  "model_size_in_billions": 70,
1448
933
  "quantizations": [
1449
- "q2_K",
1450
- "q3_K_L",
1451
- "q3_K_M",
1452
- "q3_K_S",
1453
- "q4_0",
1454
- "q4_1",
1455
- "q4_K_M",
1456
- "q4_K_S",
1457
- "q5_0",
1458
- "q5_1",
1459
- "q5_K_M",
1460
- "q5_K_S",
1461
- "q6_K",
1462
- "q8_0"
934
+ "Q2_K",
935
+ "Q3_K_S",
936
+ "Q3_K_M",
937
+ "Q3_K_L",
938
+ "Q4_0",
939
+ "Q4_K_S",
940
+ "Q4_K_M",
941
+ "Q5_0",
942
+ "Q5_K_S",
943
+ "Q5_K_M"
1463
944
  ],
1464
- "model_id": "TheBloke/Llama-2-70B-GGML",
1465
- "model_file_name_template": "llama-2-70b.ggmlv3.{quantization}.bin"
945
+ "quantization_parts": {
946
+ "Q6_K": [
947
+ "split-a",
948
+ "split-b"
949
+ ],
950
+ "Q8_0": [
951
+ "split-a",
952
+ "split-b"
953
+ ]
954
+ },
955
+ "model_id": "TheBloke/Llama-2-70B-GGUF",
956
+ "model_file_name_template": "llama-2-70b.{quantization}.gguf",
957
+ "model_file_name_split_template": "llama-2-70b.{quantization}.gguf-{part}"
1466
958
  },
1467
959
  {
1468
960
  "model_format": "pytorch",
@@ -2015,210 +1507,47 @@
2015
1507
  ],
2016
1508
  "prompt_style": {
2017
1509
  "style_name": "LLAMA3",
2018
- "system_prompt": "You are a helpful assistant.",
2019
- "roles": [
2020
- "user",
2021
- "assistant"
2022
- ],
2023
- "intra_message_sep": "\n\n",
2024
- "inter_message_sep": "<|eot_id|>",
2025
- "stop_token_ids": [
2026
- 128001,
2027
- 128009
2028
- ],
2029
- "stop": [
2030
- "<|end_of_text|>",
2031
- "<|eot_id|>"
2032
- ]
2033
- }
2034
- },
2035
- {
2036
- "version": 1,
2037
- "context_length": 2048,
2038
- "model_name": "opt",
2039
- "model_lang": [
2040
- "en"
2041
- ],
2042
- "model_ability": [
2043
- "generate"
2044
- ],
2045
- "model_description": "Opt is an open-source, decoder-only, Transformer based LLM that was designed to replicate GPT-3.",
2046
- "model_specs": [
2047
- {
2048
- "model_format": "pytorch",
2049
- "model_size_in_billions": 1,
2050
- "quantizations": [
2051
- "4-bit",
2052
- "8-bit",
2053
- "none"
2054
- ],
2055
- "model_id": "facebook/opt-125m",
2056
- "model_revision": "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32"
2057
- }
2058
- ]
2059
- },
2060
- {
2061
- "version": 1,
2062
- "context_length": 2048,
2063
- "model_name": "falcon",
2064
- "model_lang": [
2065
- "en"
2066
- ],
2067
- "model_ability": [
2068
- "generate"
2069
- ],
2070
- "model_description": "Falcon is an open-source Transformer based LLM trained on the RefinedWeb dataset.",
2071
- "model_specs": [
2072
- {
2073
- "model_format": "pytorch",
2074
- "model_size_in_billions": 40,
2075
- "quantizations": [
2076
- "4-bit",
2077
- "8-bit",
2078
- "none"
2079
- ],
2080
- "model_id": "tiiuae/falcon-40b",
2081
- "model_revision": "561820f7eef0cc56a31ea38af15ca1acb07fab5d"
2082
- },
2083
- {
2084
- "model_format": "pytorch",
2085
- "model_size_in_billions": 7,
2086
- "quantizations": [
2087
- "4-bit",
2088
- "8-bit",
2089
- "none"
2090
- ],
2091
- "model_id": "tiiuae/falcon-7b",
2092
- "model_revision": "378337427557d1df3e742264a2901a49f25d4eb1"
2093
- }
2094
- ]
2095
- },
2096
- {
2097
- "version": 1,
2098
- "context_length": 2048,
2099
- "model_name": "falcon-instruct",
2100
- "model_lang": [
2101
- "en"
2102
- ],
2103
- "model_ability": [
2104
- "chat"
2105
- ],
2106
- "model_description": "Falcon-instruct is a fine-tuned version of the Falcon LLM, specializing in chatting.",
2107
- "model_specs": [
2108
- {
2109
- "model_format": "pytorch",
2110
- "model_size_in_billions": 7,
2111
- "quantizations": [
2112
- "4-bit",
2113
- "8-bit",
2114
- "none"
2115
- ],
2116
- "model_id": "tiiuae/falcon-7b-instruct",
2117
- "model_revision": "eb410fb6ffa9028e97adb801f0d6ec46d02f8b07"
2118
- },
2119
- {
2120
- "model_format": "pytorch",
2121
- "model_size_in_billions": 40,
2122
- "quantizations": [
2123
- "4-bit",
2124
- "8-bit",
2125
- "none"
2126
- ],
2127
- "model_id": "tiiuae/falcon-40b-instruct",
2128
- "model_revision": "ca78eac0ed45bf64445ff0687fabba1598daebf3"
2129
- }
2130
- ],
2131
- "prompt_style": {
2132
- "style_name": "FALCON",
2133
- "system_prompt": "",
2134
- "roles": [
2135
- "User",
2136
- "Assistant"
2137
- ],
2138
- "intra_message_sep": "\n",
2139
- "inter_message_sep": "<|endoftext|>",
2140
- "stop": [
2141
- "\nUser"
2142
- ],
2143
- "stop_token_ids": [
2144
- 0,
2145
- 1,
2146
- 2,
2147
- 3,
2148
- 4,
2149
- 5,
2150
- 6,
2151
- 7,
2152
- 8,
2153
- 9,
2154
- 10,
2155
- 11
2156
- ]
2157
- }
2158
- },
2159
- {
2160
- "version": 1,
2161
- "context_length": 8192,
2162
- "model_name": "starcoderplus",
2163
- "model_lang": [
2164
- "en"
2165
- ],
2166
- "model_ability": [
2167
- "generate"
2168
- ],
2169
- "model_description": "Starcoderplus is an open-source LLM trained by fine-tuning Starcoder on RedefinedWeb and StarCoderData datasets.",
2170
- "model_specs": [
2171
- {
2172
- "model_format": "pytorch",
2173
- "model_size_in_billions": 16,
2174
- "quantizations": [
2175
- "4-bit",
2176
- "8-bit",
2177
- "none"
2178
- ],
2179
- "model_id": "bigcode/starcoderplus",
2180
- "model_revision": "95be82087c33f14ee9941c812a154a9dd66efe72"
2181
- }
2182
- ],
2183
- "prompt_style": null
1510
+ "system_prompt": "You are a helpful assistant.",
1511
+ "roles": [
1512
+ "user",
1513
+ "assistant"
1514
+ ],
1515
+ "intra_message_sep": "\n\n",
1516
+ "inter_message_sep": "<|eot_id|>",
1517
+ "stop_token_ids": [
1518
+ 128001,
1519
+ 128009
1520
+ ],
1521
+ "stop": [
1522
+ "<|end_of_text|>",
1523
+ "<|eot_id|>"
1524
+ ]
1525
+ }
2184
1526
  },
2185
1527
  {
2186
1528
  "version": 1,
2187
- "context_length": 8192,
2188
- "model_name": "starchat-beta",
1529
+ "context_length": 2048,
1530
+ "model_name": "opt",
2189
1531
  "model_lang": [
2190
1532
  "en"
2191
1533
  ],
2192
1534
  "model_ability": [
2193
- "chat"
1535
+ "generate"
2194
1536
  ],
2195
- "model_description": "Starchat-beta is a fine-tuned version of the Starcoderplus LLM, specializing in coding assistance.",
1537
+ "model_description": "Opt is an open-source, decoder-only, Transformer based LLM that was designed to replicate GPT-3.",
2196
1538
  "model_specs": [
2197
1539
  {
2198
1540
  "model_format": "pytorch",
2199
- "model_size_in_billions": 16,
1541
+ "model_size_in_billions": 1,
2200
1542
  "quantizations": [
2201
1543
  "4-bit",
2202
1544
  "8-bit",
2203
1545
  "none"
2204
1546
  ],
2205
- "model_id": "HuggingFaceH4/starchat-beta",
2206
- "model_revision": "b1bcda690655777373f57ea6614eb095ec2c886f"
1547
+ "model_id": "facebook/opt-125m",
1548
+ "model_revision": "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32"
2207
1549
  }
2208
- ],
2209
- "prompt_style": {
2210
- "style_name": "CHATML",
2211
- "system_prompt": "<system>{system_message}\n",
2212
- "roles": [
2213
- "<|user|>",
2214
- "<|assistant|>"
2215
- ],
2216
- "intra_message_sep": "<|end|>",
2217
- "stop_token_ids": [
2218
- 0,
2219
- 49155
2220
- ]
2221
- }
1550
+ ]
2222
1551
  },
2223
1552
  {
2224
1553
  "version": 1,
@@ -2984,6 +2313,46 @@
2984
2313
  ],
2985
2314
  "model_id": "Qwen/Qwen2-72B-Instruct-AWQ"
2986
2315
  },
2316
+ {
2317
+ "model_format": "fp8",
2318
+ "model_size_in_billions": "0_5",
2319
+ "quantizations": [
2320
+ "fp8"
2321
+ ],
2322
+ "model_id": "neuralmagic/Qwen2-0.5B-Instruct-FP8"
2323
+ },
2324
+ {
2325
+ "model_format": "fp8",
2326
+ "model_size_in_billions": "0_5",
2327
+ "quantizations": [
2328
+ "fp8"
2329
+ ],
2330
+ "model_id": "neuralmagic/Qwen2-0.5B-Instruct-FP8"
2331
+ },
2332
+ {
2333
+ "model_format": "fp8",
2334
+ "model_size_in_billions": "1_5",
2335
+ "quantizations": [
2336
+ "fp8"
2337
+ ],
2338
+ "model_id": "neuralmagic/Qwen2-1.5B-Instruct-FP8"
2339
+ },
2340
+ {
2341
+ "model_format": "fp8",
2342
+ "model_size_in_billions": 7,
2343
+ "quantizations": [
2344
+ "fp8"
2345
+ ],
2346
+ "model_id": "neuralmagic/Qwen2-7B-Instruct-FP8"
2347
+ },
2348
+ {
2349
+ "model_format": "fp8",
2350
+ "model_size_in_billions": 72,
2351
+ "quantizations": [
2352
+ "fp8"
2353
+ ],
2354
+ "model_id": "neuralmagic/Qwen2-72B-Instruct-FP8"
2355
+ },
2987
2356
  {
2988
2357
  "model_format": "mlx",
2989
2358
  "model_size_in_billions": "0_5",
@@ -3098,398 +2467,141 @@
3098
2467
  "00002-of-00002"
3099
2468
  ],
3100
2469
  "q8_0": [
3101
- "00001-of-00002",
3102
- "00002-of-00002"
3103
- ],
3104
- "fp16": [
3105
- "00001-of-00004",
3106
- "00002-of-00004",
3107
- "00003-of-00004",
3108
- "00004-of-00004"
3109
- ]
3110
- }
3111
- }
3112
- ],
3113
- "prompt_style": {
3114
- "style_name": "QWEN",
3115
- "system_prompt": "You are a helpful assistant.",
3116
- "roles": [
3117
- "user",
3118
- "assistant"
3119
- ],
3120
- "intra_message_sep": "\n",
3121
- "stop_token_ids": [
3122
- 151643,
3123
- 151644,
3124
- 151645
3125
- ],
3126
- "stop": [
3127
- "<|endoftext|>",
3128
- "<|im_start|>",
3129
- "<|im_end|>"
3130
- ]
3131
- }
3132
- },
3133
- {
3134
- "version": 1,
3135
- "context_length": 32768,
3136
- "model_name": "qwen2-moe-instruct",
3137
- "model_lang": [
3138
- "en",
3139
- "zh"
3140
- ],
3141
- "model_ability": [
3142
- "chat",
3143
- "tools"
3144
- ],
3145
- "model_description": "Qwen2 is the new series of Qwen large language models. ",
3146
- "model_specs": [
3147
- {
3148
- "model_format": "pytorch",
3149
- "model_size_in_billions": 14,
3150
- "quantizations": [
3151
- "4-bit",
3152
- "8-bit",
3153
- "none"
3154
- ],
3155
- "model_id": "Qwen/Qwen2-57B-A14B-Instruct"
3156
- },
3157
- {
3158
- "model_format": "gptq",
3159
- "model_size_in_billions": 14,
3160
- "quantizations": [
3161
- "Int4"
3162
- ],
3163
- "model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
3164
- },
3165
- {
3166
- "model_format": "ggufv2",
3167
- "model_size_in_billions": 14,
3168
- "quantizations": [
3169
- "q3_k_m",
3170
- "q4_0",
3171
- "q4_k_m",
3172
- "q5_0",
3173
- "q5_k_m",
3174
- "q6_k",
3175
- "q8_0",
3176
- "fp16"
3177
- ],
3178
- "model_id": "Qwen/Qwen2-57B-A14B-Instruct-GGUF",
3179
- "model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
3180
- "model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
3181
- "quantization_parts": {
3182
- "q8_0": [
3183
- "00001-of-00002",
3184
- "00002-of-00002"
3185
- ],
3186
- "fp16": [
3187
- "00001-of-00003",
3188
- "00002-of-00003",
3189
- "00003-of-00003"
3190
- ]
3191
- }
3192
- }
3193
- ],
3194
- "prompt_style": {
3195
- "style_name": "QWEN",
3196
- "system_prompt": "You are a helpful assistant.",
3197
- "roles": [
3198
- "user",
3199
- "assistant"
3200
- ],
3201
- "intra_message_sep": "\n",
3202
- "stop_token_ids": [
3203
- 151643,
3204
- 151644,
3205
- 151645
3206
- ],
3207
- "stop": [
3208
- "<|endoftext|>",
3209
- "<|im_start|>",
3210
- "<|im_end|>"
3211
- ]
3212
- }
3213
- },
3214
- {
3215
- "version": 1,
3216
- "context_length": 8192,
3217
- "model_name": "starcoder",
3218
- "model_lang": [
3219
- "en"
3220
- ],
3221
- "model_ability": [
3222
- "generate"
3223
- ],
3224
- "model_description": "Starcoder is an open-source Transformer based LLM that is trained on permissively licensed data from GitHub.",
3225
- "model_specs": [
3226
- {
3227
- "model_format": "ggmlv3",
3228
- "model_size_in_billions": 16,
3229
- "quantizations": [
3230
- "q4_0",
3231
- "q4_1",
3232
- "q5_0",
3233
- "q5_1",
3234
- "q8_0"
3235
- ],
3236
- "model_id": "TheBloke/starcoder-GGML",
3237
- "model_file_name_template": "starcoder.ggmlv3.{quantization}.bin"
3238
- }
3239
- ]
3240
- },
3241
- {
3242
- "version": 1,
3243
- "context_length": 1024,
3244
- "model_name": "gpt-2",
3245
- "model_lang": [
3246
- "en"
3247
- ],
3248
- "model_ability": [
3249
- "generate"
3250
- ],
3251
- "model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
3252
- "model_specs": [
3253
- {
3254
- "model_format": "pytorch",
3255
- "model_size_in_billions": "1_5",
3256
- "quantizations": [
3257
- "none"
3258
- ],
3259
- "model_id": "openai-community/gpt2",
3260
- "model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
3261
- }
3262
- ]
3263
- },
3264
- {
3265
- "version": 1,
3266
- "context_length": 8192,
3267
- "model_name": "internlm-7b",
3268
- "model_lang": [
3269
- "en",
3270
- "zh"
3271
- ],
3272
- "model_ability": [
3273
- "generate"
3274
- ],
3275
- "model_description": "InternLM is a Transformer-based LLM that is trained on both Chinese and English data, focusing on practical scenarios.",
3276
- "model_specs": [
3277
- {
3278
- "model_format": "pytorch",
3279
- "model_size_in_billions": 7,
3280
- "quantizations": [
3281
- "4-bit",
3282
- "8-bit",
3283
- "none"
3284
- ],
3285
- "model_id": "internlm/internlm-7b",
3286
- "model_revision": "592b0efc83be3eb1cba8990c4caf41ce604b958c"
3287
- }
3288
- ]
3289
- },
3290
- {
3291
- "version": 1,
3292
- "context_length": 4096,
3293
- "model_name": "internlm-chat-7b",
3294
- "model_lang": [
3295
- "en",
3296
- "zh"
3297
- ],
3298
- "model_ability": [
3299
- "chat"
3300
- ],
3301
- "model_description": "Internlm-chat is a fine-tuned version of the Internlm LLM, specializing in chatting.",
3302
- "model_specs": [
3303
- {
3304
- "model_format": "pytorch",
3305
- "model_size_in_billions": 7,
3306
- "quantizations": [
3307
- "4-bit",
3308
- "8-bit",
3309
- "none"
3310
- ],
3311
- "model_id": "internlm/internlm-chat-7b",
3312
- "model_revision": "d4fa2dbcbd2fa4edfa6735aa2ba0f0577fed6a62"
2470
+ "00001-of-00002",
2471
+ "00002-of-00002"
2472
+ ],
2473
+ "fp16": [
2474
+ "00001-of-00004",
2475
+ "00002-of-00004",
2476
+ "00003-of-00004",
2477
+ "00004-of-00004"
2478
+ ]
2479
+ }
3313
2480
  }
3314
2481
  ],
3315
2482
  "prompt_style": {
3316
- "style_name": "INTERNLM",
3317
- "system_prompt": "",
2483
+ "style_name": "QWEN",
2484
+ "system_prompt": "You are a helpful assistant.",
3318
2485
  "roles": [
3319
- "<|User|>",
3320
- "<|Bot|>"
2486
+ "user",
2487
+ "assistant"
3321
2488
  ],
3322
- "intra_message_sep": "<eoh>\n",
3323
- "inter_message_sep": "<eoa>\n",
2489
+ "intra_message_sep": "\n",
3324
2490
  "stop_token_ids": [
3325
- 1,
3326
- 103028
2491
+ 151643,
2492
+ 151644,
2493
+ 151645
3327
2494
  ],
3328
2495
  "stop": [
3329
- "<eoa>"
2496
+ "<|endoftext|>",
2497
+ "<|im_start|>",
2498
+ "<|im_end|>"
3330
2499
  ]
3331
2500
  }
3332
2501
  },
3333
2502
  {
3334
2503
  "version": 1,
3335
- "context_length": 16384,
3336
- "model_name": "internlm-20b",
3337
- "model_lang": [
3338
- "en",
3339
- "zh"
3340
- ],
3341
- "model_ability": [
3342
- "generate"
3343
- ],
3344
- "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
3345
- "model_specs": [
3346
- {
3347
- "model_format": "pytorch",
3348
- "model_size_in_billions": 20,
3349
- "quantizations": [
3350
- "4-bit",
3351
- "8-bit",
3352
- "none"
3353
- ],
3354
- "model_id": "internlm/internlm-20b",
3355
- "model_revision": "c56a72957239b490ea206ea857e86611b3f65f3a"
3356
- }
3357
- ]
3358
- },
3359
- {
3360
- "version": 1,
3361
- "context_length": 16384,
3362
- "model_name": "internlm-chat-20b",
2504
+ "context_length": 32768,
2505
+ "model_name": "qwen2-moe-instruct",
3363
2506
  "model_lang": [
3364
2507
  "en",
3365
2508
  "zh"
3366
2509
  ],
3367
2510
  "model_ability": [
3368
- "chat"
2511
+ "chat",
2512
+ "tools"
3369
2513
  ],
3370
- "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data. The Chat version has undergone SFT and RLHF training.",
2514
+ "model_description": "Qwen2 is the new series of Qwen large language models. ",
3371
2515
  "model_specs": [
3372
2516
  {
3373
2517
  "model_format": "pytorch",
3374
- "model_size_in_billions": 20,
2518
+ "model_size_in_billions": 14,
3375
2519
  "quantizations": [
3376
2520
  "4-bit",
3377
2521
  "8-bit",
3378
2522
  "none"
3379
2523
  ],
3380
- "model_id": "internlm/internlm-chat-20b",
3381
- "model_revision": "c67e80e42c4950ebae18a955c9fe138c5ceb5b10"
3382
- }
3383
- ],
3384
- "prompt_style": {
3385
- "style_name": "INTERNLM",
3386
- "system_prompt": "",
3387
- "roles": [
3388
- "<|User|>",
3389
- "<|Bot|>"
3390
- ],
3391
- "intra_message_sep": "<eoh>\n",
3392
- "inter_message_sep": "<eoa>\n",
3393
- "stop_token_ids": [
3394
- 1,
3395
- 103028
3396
- ],
3397
- "stop": [
3398
- "<eoa>"
3399
- ]
3400
- }
3401
- },
3402
- {
3403
- "version": 1,
3404
- "context_length": 4096,
3405
- "model_name": "vicuna-v1.5",
3406
- "model_lang": [
3407
- "en"
3408
- ],
3409
- "model_ability": [
3410
- "chat"
3411
- ],
3412
- "model_description": "Vicuna is an open-source LLM trained by fine-tuning LLaMA on data collected from ShareGPT.",
3413
- "model_specs": [
2524
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct"
2525
+ },
3414
2526
  {
3415
- "model_format": "pytorch",
3416
- "model_size_in_billions": 7,
2527
+ "model_format": "gptq",
2528
+ "model_size_in_billions": 14,
3417
2529
  "quantizations": [
3418
- "4-bit",
3419
- "8-bit",
3420
- "none"
2530
+ "Int4"
3421
2531
  ],
3422
- "model_id": "lmsys/vicuna-7b-v1.5",
3423
- "model_revision": "de56c35b1763eaae20f4d60efd64af0a9091ebe5"
2532
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
3424
2533
  },
3425
2534
  {
3426
- "model_format": "pytorch",
3427
- "model_size_in_billions": 13,
2535
+ "model_format": "ggufv2",
2536
+ "model_size_in_billions": 14,
3428
2537
  "quantizations": [
3429
- "4-bit",
3430
- "8-bit",
3431
- "none"
2538
+ "q3_k_m",
2539
+ "q4_0",
2540
+ "q4_k_m",
2541
+ "q5_0",
2542
+ "q5_k_m",
2543
+ "q6_k",
2544
+ "q8_0",
2545
+ "fp16"
3432
2546
  ],
3433
- "model_id": "lmsys/vicuna-13b-v1.5",
3434
- "model_revision": "3deb0106f72a3a433f0c6ea0cb978bdf14bcd3a6"
2547
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct-GGUF",
2548
+ "model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
2549
+ "model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
2550
+ "quantization_parts": {
2551
+ "q8_0": [
2552
+ "00001-of-00002",
2553
+ "00002-of-00002"
2554
+ ],
2555
+ "fp16": [
2556
+ "00001-of-00003",
2557
+ "00002-of-00003",
2558
+ "00003-of-00003"
2559
+ ]
2560
+ }
3435
2561
  }
3436
2562
  ],
3437
2563
  "prompt_style": {
3438
- "style_name": "ADD_COLON_TWO",
3439
- "system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
2564
+ "style_name": "QWEN",
2565
+ "system_prompt": "You are a helpful assistant.",
3440
2566
  "roles": [
3441
- "USER",
3442
- "ASSISTANT"
2567
+ "user",
2568
+ "assistant"
3443
2569
  ],
3444
- "intra_message_sep": " ",
3445
- "inter_message_sep": "</s>"
2570
+ "intra_message_sep": "\n",
2571
+ "stop_token_ids": [
2572
+ 151643,
2573
+ 151644,
2574
+ 151645
2575
+ ],
2576
+ "stop": [
2577
+ "<|endoftext|>",
2578
+ "<|im_start|>",
2579
+ "<|im_end|>"
2580
+ ]
3446
2581
  }
3447
2582
  },
3448
2583
  {
3449
2584
  "version": 1,
3450
- "context_length": 16384,
3451
- "model_name": "vicuna-v1.5-16k",
2585
+ "context_length": 1024,
2586
+ "model_name": "gpt-2",
3452
2587
  "model_lang": [
3453
2588
  "en"
3454
2589
  ],
3455
2590
  "model_ability": [
3456
- "chat"
2591
+ "generate"
3457
2592
  ],
3458
- "model_description": "Vicuna-v1.5-16k is a special version of Vicuna-v1.5, with a context window of 16k tokens instead of 4k.",
2593
+ "model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
3459
2594
  "model_specs": [
3460
2595
  {
3461
2596
  "model_format": "pytorch",
3462
- "model_size_in_billions": 7,
3463
- "quantizations": [
3464
- "4-bit",
3465
- "8-bit",
3466
- "none"
3467
- ],
3468
- "model_id": "lmsys/vicuna-7b-v1.5-16k",
3469
- "model_revision": "9a93d7d11fac7f3f9074510b80092b53bc1a5bec"
3470
- },
3471
- {
3472
- "model_format": "pytorch",
3473
- "model_size_in_billions": 13,
2597
+ "model_size_in_billions": "1_5",
3474
2598
  "quantizations": [
3475
- "4-bit",
3476
- "8-bit",
3477
2599
  "none"
3478
2600
  ],
3479
- "model_id": "lmsys/vicuna-13b-v1.5-16k",
3480
- "model_revision": "277697af19d4b267626ebc9f4e078d19a9a0fddf"
2601
+ "model_id": "openai-community/gpt2",
2602
+ "model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
3481
2603
  }
3482
- ],
3483
- "prompt_style": {
3484
- "style_name": "ADD_COLON_TWO",
3485
- "system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
3486
- "roles": [
3487
- "USER",
3488
- "ASSISTANT"
3489
- ],
3490
- "intra_message_sep": " ",
3491
- "inter_message_sep": "</s>"
3492
- }
2604
+ ]
3493
2605
  },
3494
2606
  {
3495
2607
  "version": 1,
@@ -5463,131 +4575,44 @@
5463
4575
  "model_file_name_template": "Yi-1.5-9B-Chat-16K.{quantization}.gguf"
5464
4576
  },
5465
4577
  {
5466
- "model_format": "ggufv2",
5467
- "model_size_in_billions": 34,
5468
- "quantizations": [
5469
- "Q2_K",
5470
- "Q3_K_L",
5471
- "Q3_K_M",
5472
- "Q3_K_S",
5473
- "Q4_K_M",
5474
- "Q4_K_S",
5475
- "Q5_K_M",
5476
- "Q5_K_S",
5477
- "Q6_K",
5478
- "Q8_0"
5479
- ],
5480
- "model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
5481
- "model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
5482
- }
5483
- ],
5484
- "prompt_style": {
5485
- "style_name": "CHATML",
5486
- "system_prompt": "",
5487
- "roles": [
5488
- "<|im_start|>user",
5489
- "<|im_start|>assistant"
5490
- ],
5491
- "intra_message_sep": "<|im_end|>",
5492
- "inter_message_sep": "",
5493
- "stop_token_ids": [
5494
- 2,
5495
- 6,
5496
- 7,
5497
- 8
5498
- ],
5499
- "stop": [
5500
- "<|endoftext|>",
5501
- "<|im_start|>",
5502
- "<|im_end|>",
5503
- "<|im_sep|>"
5504
- ]
5505
- }
5506
- },
5507
- {
5508
- "version": 1,
5509
- "context_length": 2048,
5510
- "model_name": "OpenBuddy",
5511
- "model_lang": [
5512
- "en"
5513
- ],
5514
- "model_ability": [
5515
- "chat"
5516
- ],
5517
- "model_description": "OpenBuddy is a powerful open multilingual chatbot model aimed at global users.",
5518
- "model_specs": [
5519
- {
5520
- "model_format": "ggmlv3",
5521
- "model_size_in_billions": 13,
5522
- "quantizations": [
5523
- "Q2_K",
5524
- "Q3_K_S",
5525
- "Q3_K_M",
5526
- "Q3_K_L",
5527
- "Q4_0",
5528
- "Q4_1",
5529
- "Q4_K_S",
5530
- "Q4_K_M",
5531
- "Q5_0",
5532
- "Q5_1",
5533
- "Q5_K_S",
5534
- "Q5_K_M",
5535
- "Q6_K",
5536
- "Q8_0"
5537
- ],
5538
- "model_id": "TheBloke/OpenBuddy-Llama2-13B-v11.1-GGML",
5539
- "model_file_name_template": "openbuddy-llama2-13b-v11.1.ggmlv3.{quantization}.bin"
5540
- }
5541
- ],
5542
- "prompt_style": {
5543
- "style_name": "INSTRUCTION",
5544
- "system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:",
5545
- "roles": [
5546
- "User",
5547
- "Assistant"
5548
- ],
5549
- "intra_message_sep": "",
5550
- "inter_message_sep": ""
5551
- }
5552
- },
5553
- {
5554
- "version": 1,
5555
- "context_length": 16384,
5556
- "model_name": "glaive-coder",
5557
- "model_description": "A code model trained on a dataset of ~140k programming related problems and solutions generated from Glaive’s synthetic data generation platform.",
5558
- "model_lang": [
5559
- "en"
5560
- ],
5561
- "model_ability": [
5562
- "chat"
5563
- ],
5564
- "model_specs": [
5565
- {
5566
- "model_format": "pytorch",
5567
- "model_size_in_billions": 7,
4578
+ "model_format": "ggufv2",
4579
+ "model_size_in_billions": 34,
5568
4580
  "quantizations": [
5569
- "4-bit",
5570
- "8-bit",
5571
- "none"
4581
+ "Q2_K",
4582
+ "Q3_K_L",
4583
+ "Q3_K_M",
4584
+ "Q3_K_S",
4585
+ "Q4_K_M",
4586
+ "Q4_K_S",
4587
+ "Q5_K_M",
4588
+ "Q5_K_S",
4589
+ "Q6_K",
4590
+ "Q8_0"
5572
4591
  ],
5573
- "model_id": "glaiveai/glaive-coder-7b",
5574
- "model_revision": "72a255a58480ef0713eed988312fe82f77f94f37"
4592
+ "model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
4593
+ "model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
5575
4594
  }
5576
4595
  ],
5577
4596
  "prompt_style": {
5578
- "style_name": "LLAMA2",
5579
- "system_prompt": "<s>[INST] <<SYS>>\nWrite code to solve the following coding problem that obeys the constraints and passes the example test cases. Please wrap your code answer using ```:\n<</SYS>>\n\n",
4597
+ "style_name": "CHATML",
4598
+ "system_prompt": "",
5580
4599
  "roles": [
5581
- "[INST]",
5582
- "[/INST]"
4600
+ "<|im_start|>user",
4601
+ "<|im_start|>assistant"
5583
4602
  ],
5584
- "intra_message_sep": " ",
5585
- "inter_message_sep": " </s><s>",
4603
+ "intra_message_sep": "<|im_end|>",
4604
+ "inter_message_sep": "",
5586
4605
  "stop_token_ids": [
5587
- 2
4606
+ 2,
4607
+ 6,
4608
+ 7,
4609
+ 8
5588
4610
  ],
5589
4611
  "stop": [
5590
- "</s>"
4612
+ "<|endoftext|>",
4613
+ "<|im_start|>",
4614
+ "<|im_end|>",
4615
+ "<|im_sep|>"
5591
4616
  ]
5592
4617
  }
5593
4618
  },
@@ -6624,6 +5649,15 @@
6624
5649
  ],
6625
5650
  "model_description": "InternLM2.5 series of the InternLM model.",
6626
5651
  "model_specs": [
5652
+ {
5653
+ "model_format": "pytorch",
5654
+ "model_size_in_billions": "1_8",
5655
+ "quantizations": [
5656
+ "none"
5657
+ ],
5658
+ "model_id": "internlm/internlm2_5-1_8b-chat",
5659
+ "model_revision": "4426f00b854561fa60d555d2b628064b56bcb758"
5660
+ },
6627
5661
  {
6628
5662
  "model_format": "pytorch",
6629
5663
  "model_size_in_billions": 7,
@@ -6633,6 +5667,15 @@
6633
5667
  "model_id": "internlm/internlm2_5-7b-chat",
6634
5668
  "model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
6635
5669
  },
5670
+ {
5671
+ "model_format": "pytorch",
5672
+ "model_size_in_billions": 20,
5673
+ "quantizations": [
5674
+ "none"
5675
+ ],
5676
+ "model_id": "internlm/internlm2_5-20b-chat",
5677
+ "model_revision": "ef17bde929761255fee76d95e2c25969ccd93b0d"
5678
+ },
6636
5679
  {
6637
5680
  "model_format": "gptq",
6638
5681
  "model_size_in_billions": 7,
@@ -6642,6 +5685,23 @@
6642
5685
  "model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
6643
5686
  "model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
6644
5687
  },
5688
+ {
5689
+ "model_format": "ggufv2",
5690
+ "model_size_in_billions": "1_8",
5691
+ "quantizations": [
5692
+ "q2_k",
5693
+ "q3_k_m",
5694
+ "q4_0",
5695
+ "q4_k_m",
5696
+ "q5_0",
5697
+ "q5_k_m",
5698
+ "q6_k",
5699
+ "q8_0",
5700
+ "fp16"
5701
+ ],
5702
+ "model_id": "internlm/internlm2_5-1_8b-chat-gguf",
5703
+ "model_file_name_template": "internlm2_5-1_8b-chat-{quantization}.gguf"
5704
+ },
6645
5705
  {
6646
5706
  "model_format": "ggufv2",
6647
5707
  "model_size_in_billions": 7,
@@ -6659,6 +5719,23 @@
6659
5719
  "model_id": "internlm/internlm2_5-7b-chat-gguf",
6660
5720
  "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
6661
5721
  },
5722
+ {
5723
+ "model_format": "ggufv2",
5724
+ "model_size_in_billions": 20,
5725
+ "quantizations": [
5726
+ "q2_k",
5727
+ "q3_k_m",
5728
+ "q4_0",
5729
+ "q4_k_m",
5730
+ "q5_0",
5731
+ "q5_k_m",
5732
+ "q6_k",
5733
+ "q8_0",
5734
+ "fp16"
5735
+ ],
5736
+ "model_id": "internlm/internlm2_5-20b-chat-gguf",
5737
+ "model_file_name_template": "internlm2_5-20b-chat-{quantization}.gguf"
5738
+ },
6662
5739
  {
6663
5740
  "model_format": "mlx",
6664
5741
  "model_size_in_billions": 7,
@@ -7142,6 +6219,16 @@
7142
6219
  ],
7143
6220
  "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
7144
6221
  "model_specs": [
6222
+ {
6223
+ "model_format": "pytorch",
6224
+ "model_size_in_billions": 2,
6225
+ "quantizations": [
6226
+ "none",
6227
+ "4-bit",
6228
+ "8-bit"
6229
+ ],
6230
+ "model_id": "google/gemma-2-2b-it"
6231
+ },
7145
6232
  {
7146
6233
  "model_format": "pytorch",
7147
6234
  "model_size_in_billions": 9,
@@ -7162,6 +6249,23 @@
7162
6249
  ],
7163
6250
  "model_id": "google/gemma-2-27b-it"
7164
6251
  },
6252
+ {
6253
+ "model_format": "ggufv2",
6254
+ "model_size_in_billions": 2,
6255
+ "quantizations": [
6256
+ "Q3_K_L",
6257
+ "Q4_K_M",
6258
+ "Q4_K_S",
6259
+ "Q5_K_M",
6260
+ "Q5_K_S",
6261
+ "Q6_K",
6262
+ "Q6_K_L",
6263
+ "Q8_0",
6264
+ "f32"
6265
+ ],
6266
+ "model_id": "bartowski/gemma-2-2b-it-GGUF",
6267
+ "model_file_name_template": "gemma-2-2b-it-{quantization}.gguf"
6268
+ },
7165
6269
  {
7166
6270
  "model_format": "ggufv2",
7167
6271
  "model_size_in_billions": 9,
@@ -7208,6 +6312,30 @@
7208
6312
  "model_id": "bartowski/gemma-2-27b-it-GGUF",
7209
6313
  "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
7210
6314
  },
6315
+ {
6316
+ "model_format": "mlx",
6317
+ "model_size_in_billions": 2,
6318
+ "quantizations": [
6319
+ "4-bit"
6320
+ ],
6321
+ "model_id": "mlx-community/gemma-2-2b-it-4bit"
6322
+ },
6323
+ {
6324
+ "model_format": "mlx",
6325
+ "model_size_in_billions": 2,
6326
+ "quantizations": [
6327
+ "8-bit"
6328
+ ],
6329
+ "model_id": "mlx-community/gemma-2-2b-it-8bit"
6330
+ },
6331
+ {
6332
+ "model_format": "mlx",
6333
+ "model_size_in_billions": 2,
6334
+ "quantizations": [
6335
+ "None"
6336
+ ],
6337
+ "model_id": "mlx-community/gemma-2-2b-it"
6338
+ },
7211
6339
  {
7212
6340
  "model_format": "mlx",
7213
6341
  "model_size_in_billions": 9,
@@ -7955,32 +7083,195 @@
7955
7083
  "model_format": "pytorch",
7956
7084
  "model_size_in_billions": 2,
7957
7085
  "quantizations": [
7958
- "none"
7086
+ "4-bit",
7087
+ "8-bit",
7088
+ "none"
7959
7089
  ],
7960
7090
  "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
7961
- "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
7091
+ "model_revision": "ecbbd21dcf38caa74d925967b997167b0c7b3f47"
7092
+ },
7093
+ {
7094
+ "model_format": "pytorch",
7095
+ "model_size_in_billions": 4,
7096
+ "quantizations": [
7097
+ "4-bit",
7098
+ "8-bit",
7099
+ "none"
7100
+ ],
7101
+ "model_id": "OpenGVLab/Mini-InternVL-Chat-4B-V1-5",
7102
+ "model_revision": "ce1559ddf9d87f5130aa5233b0e93b95e4e4161a"
7962
7103
  },
7963
7104
  {
7964
7105
  "model_format": "pytorch",
7965
7106
  "model_size_in_billions": 26,
7966
7107
  "quantizations": [
7967
- "none"
7108
+ "4-bit",
7109
+ "8-bit",
7110
+ "none"
7968
7111
  ],
7969
7112
  "model_id": "OpenGVLab/InternVL-Chat-V1-5",
7970
- "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
7113
+ "model_revision": "9db32d9127cac0c85961e169d75da57a18a847b1"
7114
+ }
7115
+ ],
7116
+ "prompt_style": {
7117
+ "style_name": "INTERNVL",
7118
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
7119
+ "roles": [
7120
+ "<|im_start|>user",
7121
+ "<|im_start|>assistant"
7122
+ ],
7123
+ "intra_message_sep": "<|im_end|>",
7124
+ "stop_token_ids": [
7125
+ 2,
7126
+ 92543,
7127
+ 92542
7128
+ ],
7129
+ "stop": [
7130
+ "</s>",
7131
+ "<|im_end|>",
7132
+ "<|im_start|>"
7133
+ ]
7134
+ }
7135
+ },
7136
+ {
7137
+ "version": 1,
7138
+ "context_length": 32768,
7139
+ "model_name": "internvl2",
7140
+ "model_lang": [
7141
+ "en",
7142
+ "zh"
7143
+ ],
7144
+ "model_ability": [
7145
+ "chat",
7146
+ "vision"
7147
+ ],
7148
+ "model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
7149
+ "model_specs": [
7150
+ {
7151
+ "model_format": "pytorch",
7152
+ "model_size_in_billions": 1,
7153
+ "quantizations": [
7154
+ "4-bit",
7155
+ "8-bit",
7156
+ "none"
7157
+ ],
7158
+ "model_id": "OpenGVLab/InternVL2-1B",
7159
+ "model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
7160
+ },
7161
+ {
7162
+ "model_format": "pytorch",
7163
+ "model_size_in_billions": 2,
7164
+ "quantizations": [
7165
+ "4-bit",
7166
+ "8-bit",
7167
+ "none"
7168
+ ],
7169
+ "model_id": "OpenGVLab/InternVL2-2B",
7170
+ "model_revision": "422ad7c6335917bfb514958233955512338485a6"
7171
+ },
7172
+ {
7173
+ "model_format": "awq",
7174
+ "model_size_in_billions": 2,
7175
+ "quantizations": [
7176
+ "Int4"
7177
+ ],
7178
+ "model_id": "OpenGVLab/InternVL2-2B-AWQ",
7179
+ "model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
7180
+ },
7181
+ {
7182
+ "model_format": "pytorch",
7183
+ "model_size_in_billions": 4,
7184
+ "quantizations": [
7185
+ "4-bit",
7186
+ "8-bit",
7187
+ "none"
7188
+ ],
7189
+ "model_id": "OpenGVLab/InternVL2-4B",
7190
+ "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
7191
+ },
7192
+ {
7193
+ "model_format": "pytorch",
7194
+ "model_size_in_billions": 8,
7195
+ "quantizations": [
7196
+ "4-bit",
7197
+ "8-bit",
7198
+ "none"
7199
+ ],
7200
+ "model_id": "OpenGVLab/InternVL2-8B",
7201
+ "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
7202
+ },
7203
+ {
7204
+ "model_format": "awq",
7205
+ "model_size_in_billions": 8,
7206
+ "quantizations": [
7207
+ "Int4"
7208
+ ],
7209
+ "model_id": "OpenGVLab/InternVL2-8B-AWQ",
7210
+ "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
7971
7211
  },
7972
7212
  {
7973
7213
  "model_format": "pytorch",
7974
7214
  "model_size_in_billions": 26,
7975
7215
  "quantizations": [
7976
- "Int8"
7216
+ "4-bit",
7217
+ "8-bit",
7218
+ "none"
7977
7219
  ],
7978
- "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
7979
- "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
7220
+ "model_id": "OpenGVLab/InternVL2-26B",
7221
+ "model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
7222
+ },
7223
+ {
7224
+ "model_format": "awq",
7225
+ "model_size_in_billions": 26,
7226
+ "quantizations": [
7227
+ "Int4"
7228
+ ],
7229
+ "model_id": "OpenGVLab/InternVL2-26B-AWQ",
7230
+ "model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
7231
+ },
7232
+ {
7233
+ "model_format": "pytorch",
7234
+ "model_size_in_billions": 40,
7235
+ "quantizations": [
7236
+ "4-bit",
7237
+ "8-bit",
7238
+ "none"
7239
+ ],
7240
+ "model_id": "OpenGVLab/InternVL2-40B",
7241
+ "model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
7242
+ },
7243
+ {
7244
+ "model_format": "awq",
7245
+ "model_size_in_billions": 40,
7246
+ "quantizations": [
7247
+ "Int4"
7248
+ ],
7249
+ "model_id": "OpenGVLab/InternVL2-40B-AWQ",
7250
+ "model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
7251
+ },
7252
+ {
7253
+ "model_format": "pytorch",
7254
+ "model_size_in_billions": 76,
7255
+ "quantizations": [
7256
+ "4-bit",
7257
+ "8-bit",
7258
+ "none"
7259
+ ],
7260
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B",
7261
+ "model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
7262
+ },
7263
+ {
7264
+ "model_format": "awq",
7265
+ "model_size_in_billions": 76,
7266
+ "quantizations": [
7267
+ "Int4"
7268
+ ],
7269
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
7270
+ "model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
7980
7271
  }
7981
7272
  ],
7982
7273
  "prompt_style": {
7983
- "style_name": "INTERNLM2",
7274
+ "style_name": "INTERNVL",
7984
7275
  "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
7985
7276
  "roles": [
7986
7277
  "<|im_start|>user",
@@ -7988,10 +7279,14 @@
7988
7279
  ],
7989
7280
  "intra_message_sep": "<|im_end|>",
7990
7281
  "stop_token_ids": [
7282
+ 2,
7283
+ 92543,
7991
7284
  92542
7992
7285
  ],
7993
7286
  "stop": [
7994
- "<|im_end|>"
7287
+ "</s>",
7288
+ "<|im_end|>",
7289
+ "<|im_start|>"
7995
7290
  ]
7996
7291
  }
7997
7292
  },
@@ -8047,6 +7342,51 @@
8047
7342
  ]
8048
7343
  }
8049
7344
  },
7345
+ {
7346
+ "version": 1,
7347
+ "context_length": 8192,
7348
+ "model_name": "cogvlm2-video-llama3-chat",
7349
+ "model_lang": [
7350
+ "en",
7351
+ "zh"
7352
+ ],
7353
+ "model_ability": [
7354
+ "chat",
7355
+ "vision"
7356
+ ],
7357
+ "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
7358
+ "model_specs": [
7359
+ {
7360
+ "model_format": "pytorch",
7361
+ "model_size_in_billions": 12,
7362
+ "quantizations": [
7363
+ "4-bit",
7364
+ "8-bit",
7365
+ "none"
7366
+ ],
7367
+ "model_id": "THUDM/cogvlm2-video-llama3-chat",
7368
+ "model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
7369
+ }
7370
+ ],
7371
+ "prompt_style": {
7372
+ "style_name": "LLAMA3",
7373
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
7374
+ "roles": [
7375
+ "user",
7376
+ "assistant"
7377
+ ],
7378
+ "intra_message_sep": "\n\n",
7379
+ "inter_message_sep": "<|eot_id|>",
7380
+ "stop_token_ids": [
7381
+ 128001,
7382
+ 128009
7383
+ ],
7384
+ "stop": [
7385
+ "<|end_of_text|>",
7386
+ "<|eot_id|>"
7387
+ ]
7388
+ }
7389
+ },
8050
7390
  {
8051
7391
  "version": 1,
8052
7392
  "context_length": 8192,