xinference 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +15 -34
- xinference/client/restful/restful_client.py +2 -2
- xinference/core/chat_interface.py +45 -10
- xinference/core/image_interface.py +9 -0
- xinference/core/model.py +8 -5
- xinference/core/scheduler.py +1 -2
- xinference/core/worker.py +49 -42
- xinference/deploy/cmdline.py +2 -2
- xinference/deploy/test/test_cmdline.py +7 -7
- xinference/model/audio/chattts.py +24 -9
- xinference/model/audio/core.py +8 -2
- xinference/model/audio/fish_speech.py +228 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/embedding/core.py +23 -1
- xinference/model/image/model_spec.json +2 -1
- xinference/model/image/model_spec_modelscope.json +2 -1
- xinference/model/image/stable_diffusion/core.py +49 -1
- xinference/model/llm/__init__.py +26 -27
- xinference/model/llm/{ggml/llamacpp.py → llama_cpp/core.py} +2 -35
- xinference/model/llm/llm_family.json +606 -1266
- xinference/model/llm/llm_family.py +16 -139
- xinference/model/llm/llm_family_modelscope.json +276 -313
- xinference/model/llm/lmdeploy/__init__.py +0 -0
- xinference/model/llm/lmdeploy/core.py +557 -0
- xinference/model/llm/memory.py +9 -9
- xinference/model/llm/sglang/core.py +2 -2
- xinference/model/llm/{pytorch → transformers}/chatglm.py +6 -13
- xinference/model/llm/{pytorch → transformers}/cogvlm2.py +4 -45
- xinference/model/llm/transformers/cogvlm2_video.py +524 -0
- xinference/model/llm/{pytorch → transformers}/core.py +3 -10
- xinference/model/llm/{pytorch → transformers}/glm4v.py +2 -23
- xinference/model/llm/transformers/intern_vl.py +540 -0
- xinference/model/llm/{pytorch → transformers}/internlm2.py +4 -8
- xinference/model/llm/{pytorch → transformers}/minicpmv25.py +2 -23
- xinference/model/llm/{pytorch → transformers}/minicpmv26.py +66 -41
- xinference/model/llm/{pytorch → transformers}/utils.py +1 -2
- xinference/model/llm/{pytorch → transformers}/yi_vl.py +2 -24
- xinference/model/llm/utils.py +85 -70
- xinference/model/llm/vllm/core.py +110 -11
- xinference/model/utils.py +1 -95
- xinference/thirdparty/fish_speech/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
- xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
- xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
- xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +495 -0
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
- xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
- xinference/thirdparty/fish_speech/tools/file.py +108 -0
- xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
- xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
- xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
- xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
- xinference/thirdparty/fish_speech/tools/webui.py +619 -0
- xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
- xinference/thirdparty/internvl/__init__.py +0 -0
- xinference/thirdparty/internvl/conversation.py +393 -0
- xinference/thirdparty/omnilmm/model/utils.py +16 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.661c7b0a.js +3 -0
- xinference/web/ui/build/static/js/{main.17ca0398.js.map → main.661c7b0a.js.map} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5391543180fead1eeef5364300301498d58a7d91d62de3841a32768b67f4552f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/714c37ce0ec5b5c591033f02be2f3f491fdd70da3ef568ee4a4f94689a3d5ca2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a797831de0dc74897f4b50b3426555d748f328b4c2cc391de709eadaf6a5f3e3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e91938976f229ce986b2907e51e1f00540b584ced0a315d498c172d13220739d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +1 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/METADATA +22 -13
- {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/RECORD +170 -79
- xinference/locale/utils.py +0 -39
- xinference/locale/zh_CN.json +0 -26
- xinference/model/llm/ggml/tools/__init__.py +0 -15
- xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +0 -498
- xinference/model/llm/ggml/tools/gguf.py +0 -884
- xinference/model/llm/pytorch/__init__.py +0 -13
- xinference/model/llm/pytorch/baichuan.py +0 -81
- xinference/model/llm/pytorch/falcon.py +0 -138
- xinference/model/llm/pytorch/intern_vl.py +0 -352
- xinference/model/llm/pytorch/vicuna.py +0 -69
- xinference/web/ui/build/static/js/main.17ca0398.js +0 -3
- xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f28b83886159d83b84f099b05d607a822dca4dd7f2d8aa6d56fe08bab0b5b086.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +0 -1
- /xinference/{locale → model/llm/llama_cpp}/__init__.py +0 -0
- /xinference/model/llm/{ggml → transformers}/__init__.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/compression.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/deepseek_vl.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/llama_2.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/omnilmm.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/qwen_vl.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/tensorizer_utils.py +0 -0
- /xinference/web/ui/build/static/js/{main.17ca0398.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
|
@@ -1,103 +1,4 @@
|
|
|
1
1
|
[
|
|
2
|
-
{
|
|
3
|
-
"version": 1,
|
|
4
|
-
"context_length": 4096,
|
|
5
|
-
"model_name": "baichuan",
|
|
6
|
-
"model_lang": [
|
|
7
|
-
"en",
|
|
8
|
-
"zh"
|
|
9
|
-
],
|
|
10
|
-
"model_ability": [
|
|
11
|
-
"generate"
|
|
12
|
-
],
|
|
13
|
-
"model_description": "Baichuan is an open-source Transformer based LLM that is trained on both Chinese and English data.",
|
|
14
|
-
"model_specs": [
|
|
15
|
-
{
|
|
16
|
-
"model_format": "ggmlv3",
|
|
17
|
-
"model_size_in_billions": 7,
|
|
18
|
-
"quantizations": [
|
|
19
|
-
"q2_K",
|
|
20
|
-
"q3_K_L",
|
|
21
|
-
"q3_K_M",
|
|
22
|
-
"q3_K_S",
|
|
23
|
-
"q4_0",
|
|
24
|
-
"q4_1",
|
|
25
|
-
"q4_K_M",
|
|
26
|
-
"q4_K_S",
|
|
27
|
-
"q5_0",
|
|
28
|
-
"q5_1",
|
|
29
|
-
"q5_K_M",
|
|
30
|
-
"q5_K_S",
|
|
31
|
-
"q6_K",
|
|
32
|
-
"q8_0"
|
|
33
|
-
],
|
|
34
|
-
"model_id": "TheBloke/baichuan-llama-7B-GGML",
|
|
35
|
-
"model_file_name_template": "baichuan-llama-7b.ggmlv3.{quantization}.bin"
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
"model_format": "pytorch",
|
|
39
|
-
"model_size_in_billions": 7,
|
|
40
|
-
"quantizations": [
|
|
41
|
-
"4-bit",
|
|
42
|
-
"8-bit",
|
|
43
|
-
"none"
|
|
44
|
-
],
|
|
45
|
-
"model_id": "baichuan-inc/Baichuan-7B",
|
|
46
|
-
"model_revision": "c1a5c7d5b7f50ecc51bb0e08150a9f12e5656756"
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
"model_format": "pytorch",
|
|
50
|
-
"model_size_in_billions": 13,
|
|
51
|
-
"quantizations": [
|
|
52
|
-
"4-bit",
|
|
53
|
-
"8-bit",
|
|
54
|
-
"none"
|
|
55
|
-
],
|
|
56
|
-
"model_id": "baichuan-inc/Baichuan-13B-Base",
|
|
57
|
-
"model_revision": "0ef0739c7bdd34df954003ef76d80f3dabca2ff9"
|
|
58
|
-
}
|
|
59
|
-
]
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"version": 1,
|
|
63
|
-
"context_length": 4096,
|
|
64
|
-
"model_name": "baichuan-chat",
|
|
65
|
-
"model_lang": [
|
|
66
|
-
"en",
|
|
67
|
-
"zh"
|
|
68
|
-
],
|
|
69
|
-
"model_ability": [
|
|
70
|
-
"chat"
|
|
71
|
-
],
|
|
72
|
-
"model_description": "Baichuan-chat is a fine-tuned version of the Baichuan LLM, specializing in chatting.",
|
|
73
|
-
"model_specs": [
|
|
74
|
-
{
|
|
75
|
-
"model_format": "pytorch",
|
|
76
|
-
"model_size_in_billions": 13,
|
|
77
|
-
"quantizations": [
|
|
78
|
-
"4-bit",
|
|
79
|
-
"8-bit",
|
|
80
|
-
"none"
|
|
81
|
-
],
|
|
82
|
-
"model_id": "baichuan-inc/Baichuan-13B-Chat",
|
|
83
|
-
"model_revision": "19ef51ba5bad8935b03acd20ff04a269210983bc"
|
|
84
|
-
}
|
|
85
|
-
],
|
|
86
|
-
"prompt_style": {
|
|
87
|
-
"style_name": "NO_COLON_TWO",
|
|
88
|
-
"system_prompt": "",
|
|
89
|
-
"roles": [
|
|
90
|
-
" <reserved_102> ",
|
|
91
|
-
" <reserved_103> "
|
|
92
|
-
],
|
|
93
|
-
"intra_message_sep": "",
|
|
94
|
-
"inter_message_sep": "</s>",
|
|
95
|
-
"stop_token_ids": [
|
|
96
|
-
2,
|
|
97
|
-
195
|
|
98
|
-
]
|
|
99
|
-
}
|
|
100
|
-
},
|
|
101
2
|
{
|
|
102
3
|
"version": 1,
|
|
103
4
|
"context_length": 8194,
|
|
@@ -164,258 +65,6 @@
|
|
|
164
65
|
]
|
|
165
66
|
}
|
|
166
67
|
},
|
|
167
|
-
{
|
|
168
|
-
"version": 1,
|
|
169
|
-
"context_length": 2048,
|
|
170
|
-
"model_name": "wizardlm-v1.0",
|
|
171
|
-
"model_lang": [
|
|
172
|
-
"en"
|
|
173
|
-
],
|
|
174
|
-
"model_ability": [
|
|
175
|
-
"chat"
|
|
176
|
-
],
|
|
177
|
-
"model_description": "WizardLM is an open-source LLM trained by fine-tuning LLaMA with Evol-Instruct.",
|
|
178
|
-
"model_specs": [
|
|
179
|
-
{
|
|
180
|
-
"model_format": "ggmlv3",
|
|
181
|
-
"model_size_in_billions": 7,
|
|
182
|
-
"quantizations": [
|
|
183
|
-
"q2_K",
|
|
184
|
-
"q3_K_L",
|
|
185
|
-
"q3_K_M",
|
|
186
|
-
"q3_K_S",
|
|
187
|
-
"q4_0",
|
|
188
|
-
"q4_1",
|
|
189
|
-
"q4_K_M",
|
|
190
|
-
"q4_K_S",
|
|
191
|
-
"q5_0",
|
|
192
|
-
"q5_1",
|
|
193
|
-
"q5_K_M",
|
|
194
|
-
"q5_K_S",
|
|
195
|
-
"q6_K",
|
|
196
|
-
"q8_0"
|
|
197
|
-
],
|
|
198
|
-
"model_id": "TheBloke/WizardLM-7B-V1.0-Uncensored-GGML",
|
|
199
|
-
"model_file_name_template": "wizardlm-7b-v1.0-uncensored.ggmlv3.{quantization}.bin"
|
|
200
|
-
},
|
|
201
|
-
{
|
|
202
|
-
"model_format": "ggmlv3",
|
|
203
|
-
"model_size_in_billions": 13,
|
|
204
|
-
"quantizations": [
|
|
205
|
-
"q2_K",
|
|
206
|
-
"q3_K_L",
|
|
207
|
-
"q3_K_M",
|
|
208
|
-
"q3_K_S",
|
|
209
|
-
"q4_0",
|
|
210
|
-
"q4_1",
|
|
211
|
-
"q4_K_M",
|
|
212
|
-
"q4_K_S",
|
|
213
|
-
"q5_0",
|
|
214
|
-
"q5_1",
|
|
215
|
-
"q5_K_M",
|
|
216
|
-
"q5_K_S",
|
|
217
|
-
"q6_K",
|
|
218
|
-
"q8_0"
|
|
219
|
-
],
|
|
220
|
-
"model_id": "TheBloke/WizardLM-13B-V1.0-Uncensored-GGML",
|
|
221
|
-
"model_file_name_template": "wizardlm-13b-v1.0-uncensored.ggmlv3.{quantization}.bin"
|
|
222
|
-
}
|
|
223
|
-
],
|
|
224
|
-
"prompt_style": {
|
|
225
|
-
"style_name": "ADD_COLON_SINGLE",
|
|
226
|
-
"system_prompt": "You are a helpful AI assistant.",
|
|
227
|
-
"roles": [
|
|
228
|
-
"USER",
|
|
229
|
-
"ASSISTANT"
|
|
230
|
-
],
|
|
231
|
-
"intra_message_sep": "\n"
|
|
232
|
-
}
|
|
233
|
-
},
|
|
234
|
-
{
|
|
235
|
-
"version": 1,
|
|
236
|
-
"context_length": 2048,
|
|
237
|
-
"model_name": "vicuna-v1.3",
|
|
238
|
-
"model_lang": [
|
|
239
|
-
"en"
|
|
240
|
-
],
|
|
241
|
-
"model_ability": [
|
|
242
|
-
"chat"
|
|
243
|
-
],
|
|
244
|
-
"model_description": "Vicuna is an open-source LLM trained by fine-tuning LLaMA on data collected from ShareGPT.",
|
|
245
|
-
"model_specs": [
|
|
246
|
-
{
|
|
247
|
-
"model_format": "ggmlv3",
|
|
248
|
-
"model_size_in_billions": 7,
|
|
249
|
-
"quantizations": [
|
|
250
|
-
"q2_K",
|
|
251
|
-
"q3_K_L",
|
|
252
|
-
"q3_K_M",
|
|
253
|
-
"q3_K_S",
|
|
254
|
-
"q4_0",
|
|
255
|
-
"q4_1",
|
|
256
|
-
"q4_K_M",
|
|
257
|
-
"q4_K_S",
|
|
258
|
-
"q5_0",
|
|
259
|
-
"q5_1",
|
|
260
|
-
"q5_K_M",
|
|
261
|
-
"q5_K_S",
|
|
262
|
-
"q6_K",
|
|
263
|
-
"q8_0"
|
|
264
|
-
],
|
|
265
|
-
"model_id": "TheBloke/vicuna-7B-v1.3-GGML",
|
|
266
|
-
"model_file_name_template": "vicuna-7b-v1.3.ggmlv3.{quantization}.bin"
|
|
267
|
-
},
|
|
268
|
-
{
|
|
269
|
-
"model_format": "ggmlv3",
|
|
270
|
-
"model_size_in_billions": 13,
|
|
271
|
-
"quantizations": [
|
|
272
|
-
"q2_K",
|
|
273
|
-
"q3_K_L",
|
|
274
|
-
"q3_K_M",
|
|
275
|
-
"q3_K_S",
|
|
276
|
-
"q4_0",
|
|
277
|
-
"q4_1",
|
|
278
|
-
"q4_K_M",
|
|
279
|
-
"q4_K_S",
|
|
280
|
-
"q5_0",
|
|
281
|
-
"q5_1",
|
|
282
|
-
"q5_K_M",
|
|
283
|
-
"q5_K_S",
|
|
284
|
-
"q6_K",
|
|
285
|
-
"q8_0"
|
|
286
|
-
],
|
|
287
|
-
"model_id": "TheBloke/vicuna-13b-v1.3.0-GGML",
|
|
288
|
-
"model_file_name_template": "vicuna-13b-v1.3.0.ggmlv3.{quantization}.bin"
|
|
289
|
-
},
|
|
290
|
-
{
|
|
291
|
-
"model_format": "ggmlv3",
|
|
292
|
-
"model_size_in_billions": 33,
|
|
293
|
-
"quantizations": [
|
|
294
|
-
"q2_K",
|
|
295
|
-
"q3_K_L",
|
|
296
|
-
"q3_K_M",
|
|
297
|
-
"q3_K_S",
|
|
298
|
-
"q4_0",
|
|
299
|
-
"q4_1",
|
|
300
|
-
"q4_K_M",
|
|
301
|
-
"q4_K_S",
|
|
302
|
-
"q5_0",
|
|
303
|
-
"q5_1",
|
|
304
|
-
"q5_K_M",
|
|
305
|
-
"q5_K_S",
|
|
306
|
-
"q6_K",
|
|
307
|
-
"q8_0"
|
|
308
|
-
],
|
|
309
|
-
"model_id": "TheBloke/vicuna-33B-GGML",
|
|
310
|
-
"model_file_name_template": "vicuna-33b.ggmlv3.{quantization}.bin"
|
|
311
|
-
},
|
|
312
|
-
{
|
|
313
|
-
"model_format": "pytorch",
|
|
314
|
-
"model_size_in_billions": 33,
|
|
315
|
-
"quantizations": [
|
|
316
|
-
"4-bit",
|
|
317
|
-
"8-bit",
|
|
318
|
-
"none"
|
|
319
|
-
],
|
|
320
|
-
"model_id": "lmsys/vicuna-33b-v1.3",
|
|
321
|
-
"model_revision": "ef8d6becf883fb3ce52e3706885f761819477ab4"
|
|
322
|
-
},
|
|
323
|
-
{
|
|
324
|
-
"model_format": "pytorch",
|
|
325
|
-
"model_size_in_billions": 13,
|
|
326
|
-
"quantizations": [
|
|
327
|
-
"4-bit",
|
|
328
|
-
"8-bit",
|
|
329
|
-
"none"
|
|
330
|
-
],
|
|
331
|
-
"model_id": "lmsys/vicuna-13b-v1.3",
|
|
332
|
-
"model_revision": "6566e9cb1787585d1147dcf4f9bc48f29e1328d2"
|
|
333
|
-
},
|
|
334
|
-
{
|
|
335
|
-
"model_format": "pytorch",
|
|
336
|
-
"model_size_in_billions": 7,
|
|
337
|
-
"quantizations": [
|
|
338
|
-
"4-bit",
|
|
339
|
-
"8-bit",
|
|
340
|
-
"none"
|
|
341
|
-
],
|
|
342
|
-
"model_id": "lmsys/vicuna-7b-v1.3",
|
|
343
|
-
"model_revision": "236eeeab96f0dc2e463f2bebb7bb49809279c6d6"
|
|
344
|
-
}
|
|
345
|
-
],
|
|
346
|
-
"prompt_style": {
|
|
347
|
-
"style_name": "ADD_COLON_TWO",
|
|
348
|
-
"system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
|
|
349
|
-
"roles": [
|
|
350
|
-
"USER",
|
|
351
|
-
"ASSISTANT"
|
|
352
|
-
],
|
|
353
|
-
"intra_message_sep": " ",
|
|
354
|
-
"inter_message_sep": "</s>"
|
|
355
|
-
}
|
|
356
|
-
},
|
|
357
|
-
{
|
|
358
|
-
"version": 1,
|
|
359
|
-
"context_length": 2048,
|
|
360
|
-
"model_name": "orca",
|
|
361
|
-
"model_lang": [
|
|
362
|
-
"en"
|
|
363
|
-
],
|
|
364
|
-
"model_ability": [
|
|
365
|
-
"chat"
|
|
366
|
-
],
|
|
367
|
-
"model_description": "Orca is an LLM trained by fine-tuning LLaMA on explanation traces obtained from GPT-4.",
|
|
368
|
-
"model_specs": [
|
|
369
|
-
{
|
|
370
|
-
"model_format": "ggmlv3",
|
|
371
|
-
"model_size_in_billions": 3,
|
|
372
|
-
"quantizations": [
|
|
373
|
-
"q4_0",
|
|
374
|
-
"q4_1",
|
|
375
|
-
"q5_0",
|
|
376
|
-
"q5_1",
|
|
377
|
-
"q8_0"
|
|
378
|
-
],
|
|
379
|
-
"model_id": "TheBloke/orca_mini_3B-GGML",
|
|
380
|
-
"model_file_name_template": "orca-mini-3b.ggmlv3.{quantization}.bin"
|
|
381
|
-
},
|
|
382
|
-
{
|
|
383
|
-
"model_format": "ggmlv3",
|
|
384
|
-
"model_size_in_billions": 7,
|
|
385
|
-
"quantizations": [
|
|
386
|
-
"q4_0",
|
|
387
|
-
"q4_1",
|
|
388
|
-
"q5_0",
|
|
389
|
-
"q5_1",
|
|
390
|
-
"q8_0"
|
|
391
|
-
],
|
|
392
|
-
"model_id": "TheBloke/orca_mini_7B-GGML",
|
|
393
|
-
"model_file_name_template": "orca-mini-7b.ggmlv3.{quantization}.bin"
|
|
394
|
-
},
|
|
395
|
-
{
|
|
396
|
-
"model_format": "ggmlv3",
|
|
397
|
-
"model_size_in_billions": 13,
|
|
398
|
-
"quantizations": [
|
|
399
|
-
"q4_0",
|
|
400
|
-
"q4_1",
|
|
401
|
-
"q5_0",
|
|
402
|
-
"q5_1",
|
|
403
|
-
"q8_0"
|
|
404
|
-
],
|
|
405
|
-
"model_id": "TheBloke/orca_mini_13B-GGML",
|
|
406
|
-
"model_file_name_template": "orca-mini-13b.ggmlv3.{quantization}.bin"
|
|
407
|
-
}
|
|
408
|
-
],
|
|
409
|
-
"prompt_style": {
|
|
410
|
-
"style_name": "ADD_COLON_SINGLE",
|
|
411
|
-
"system_prompt": "You are an AI assistant that follows instruction extremely well. Help as much as you can.",
|
|
412
|
-
"roles": [
|
|
413
|
-
"User",
|
|
414
|
-
"Response"
|
|
415
|
-
],
|
|
416
|
-
"intra_message_sep": "\n\n### "
|
|
417
|
-
}
|
|
418
|
-
},
|
|
419
68
|
{
|
|
420
69
|
"version": 1,
|
|
421
70
|
"context_length": 2048,
|
|
@@ -561,111 +210,6 @@
|
|
|
561
210
|
]
|
|
562
211
|
}
|
|
563
212
|
},
|
|
564
|
-
{
|
|
565
|
-
"version": 1,
|
|
566
|
-
"context_length": 2048,
|
|
567
|
-
"model_name": "chatglm",
|
|
568
|
-
"model_lang": [
|
|
569
|
-
"en",
|
|
570
|
-
"zh"
|
|
571
|
-
],
|
|
572
|
-
"model_ability": [
|
|
573
|
-
"chat"
|
|
574
|
-
],
|
|
575
|
-
"model_description": "ChatGLM is an open-source General Language Model (GLM) based LLM trained on both Chinese and English data.",
|
|
576
|
-
"model_specs": [
|
|
577
|
-
{
|
|
578
|
-
"model_format": "pytorch",
|
|
579
|
-
"model_size_in_billions": 6,
|
|
580
|
-
"quantizations": [
|
|
581
|
-
"4-bit",
|
|
582
|
-
"8-bit",
|
|
583
|
-
"none"
|
|
584
|
-
],
|
|
585
|
-
"model_id": "THUDM/chatglm-6b",
|
|
586
|
-
"model_revision": "8b7d33596d18c5e83e2da052d05ca4db02e60620"
|
|
587
|
-
}
|
|
588
|
-
],
|
|
589
|
-
"prompt_style": {
|
|
590
|
-
"style_name": "CHATGLM",
|
|
591
|
-
"system_prompt": "",
|
|
592
|
-
"roles": [
|
|
593
|
-
"问",
|
|
594
|
-
"答"
|
|
595
|
-
],
|
|
596
|
-
"intra_message_sep": "\n"
|
|
597
|
-
}
|
|
598
|
-
},
|
|
599
|
-
{
|
|
600
|
-
"version": 1,
|
|
601
|
-
"context_length": 8192,
|
|
602
|
-
"model_name": "chatglm2",
|
|
603
|
-
"model_lang": [
|
|
604
|
-
"en",
|
|
605
|
-
"zh"
|
|
606
|
-
],
|
|
607
|
-
"model_ability": [
|
|
608
|
-
"chat"
|
|
609
|
-
],
|
|
610
|
-
"model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
611
|
-
"model_specs": [
|
|
612
|
-
{
|
|
613
|
-
"model_format": "pytorch",
|
|
614
|
-
"model_size_in_billions": 6,
|
|
615
|
-
"quantizations": [
|
|
616
|
-
"4-bit",
|
|
617
|
-
"8-bit",
|
|
618
|
-
"none"
|
|
619
|
-
],
|
|
620
|
-
"model_id": "THUDM/chatglm2-6b",
|
|
621
|
-
"model_revision": "7fabe56db91e085c9c027f56f1c654d137bdba40"
|
|
622
|
-
}
|
|
623
|
-
],
|
|
624
|
-
"prompt_style": {
|
|
625
|
-
"style_name": "CHATGLM",
|
|
626
|
-
"system_prompt": "",
|
|
627
|
-
"roles": [
|
|
628
|
-
"问",
|
|
629
|
-
"答"
|
|
630
|
-
],
|
|
631
|
-
"intra_message_sep": "\n\n"
|
|
632
|
-
}
|
|
633
|
-
},
|
|
634
|
-
{
|
|
635
|
-
"version": 1,
|
|
636
|
-
"context_length": 32768,
|
|
637
|
-
"model_name": "chatglm2-32k",
|
|
638
|
-
"model_lang": [
|
|
639
|
-
"en",
|
|
640
|
-
"zh"
|
|
641
|
-
],
|
|
642
|
-
"model_ability": [
|
|
643
|
-
"chat"
|
|
644
|
-
],
|
|
645
|
-
"model_description": "ChatGLM2-32k is a special version of ChatGLM2, with a context window of 32k tokens instead of 8k.",
|
|
646
|
-
"model_specs": [
|
|
647
|
-
{
|
|
648
|
-
"model_format": "pytorch",
|
|
649
|
-
"model_size_in_billions": 6,
|
|
650
|
-
"quantizations": [
|
|
651
|
-
"4-bit",
|
|
652
|
-
"8-bit",
|
|
653
|
-
"none"
|
|
654
|
-
],
|
|
655
|
-
"model_id": "THUDM/chatglm2-6b-32k",
|
|
656
|
-
"model_revision": "a2065f5dc8253f036a209e642d7220a942d92765"
|
|
657
|
-
}
|
|
658
|
-
],
|
|
659
|
-
"prompt_style": {
|
|
660
|
-
"style_name": "CHATGLM",
|
|
661
|
-
"system_prompt": "",
|
|
662
|
-
"roles": [
|
|
663
|
-
"问",
|
|
664
|
-
"答"
|
|
665
|
-
],
|
|
666
|
-
"intra_message_sep": "\n\n"
|
|
667
|
-
}
|
|
668
|
-
},
|
|
669
213
|
{
|
|
670
214
|
"version": 1,
|
|
671
215
|
"context_length": 8192,
|
|
@@ -819,7 +363,7 @@
|
|
|
819
363
|
"none"
|
|
820
364
|
],
|
|
821
365
|
"model_id": "THUDM/glm-4-9b-chat",
|
|
822
|
-
"model_revision": "
|
|
366
|
+
"model_revision": "aae8bd74af5c6dff63a49d7fbdcc89349ebf87aa"
|
|
823
367
|
},
|
|
824
368
|
{
|
|
825
369
|
"model_format": "ggufv2",
|
|
@@ -890,7 +434,7 @@
|
|
|
890
434
|
"none"
|
|
891
435
|
],
|
|
892
436
|
"model_id": "THUDM/glm-4-9b-chat-1m",
|
|
893
|
-
"model_revision": "
|
|
437
|
+
"model_revision": "0aa722c7e0745dd21453427dd44c257dd253304f"
|
|
894
438
|
},
|
|
895
439
|
{
|
|
896
440
|
"model_format": "ggufv2",
|
|
@@ -1148,70 +692,73 @@
|
|
|
1148
692
|
"model_description": "Llama-2-Chat is a fine-tuned version of the Llama-2 LLM, specializing in chatting.",
|
|
1149
693
|
"model_specs": [
|
|
1150
694
|
{
|
|
1151
|
-
"model_format": "
|
|
695
|
+
"model_format": "ggufv2",
|
|
1152
696
|
"model_size_in_billions": 7,
|
|
1153
697
|
"quantizations": [
|
|
1154
|
-
"
|
|
1155
|
-
"
|
|
1156
|
-
"
|
|
1157
|
-
"
|
|
1158
|
-
"
|
|
1159
|
-
"
|
|
1160
|
-
"
|
|
1161
|
-
"
|
|
1162
|
-
"
|
|
1163
|
-
"
|
|
1164
|
-
"
|
|
1165
|
-
"
|
|
1166
|
-
"q6_K",
|
|
1167
|
-
"q8_0"
|
|
698
|
+
"Q2_K",
|
|
699
|
+
"Q3_K_S",
|
|
700
|
+
"Q3_K_M",
|
|
701
|
+
"Q3_K_L",
|
|
702
|
+
"Q4_0",
|
|
703
|
+
"Q4_K_S",
|
|
704
|
+
"Q4_K_M",
|
|
705
|
+
"Q5_0",
|
|
706
|
+
"Q5_K_S",
|
|
707
|
+
"Q5_K_M",
|
|
708
|
+
"Q6_K",
|
|
709
|
+
"Q8_0"
|
|
1168
710
|
],
|
|
1169
|
-
"model_id": "TheBloke/Llama-2-7B-Chat-
|
|
1170
|
-
"model_file_name_template": "llama-2-7b-chat.
|
|
711
|
+
"model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
|
|
712
|
+
"model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
|
|
1171
713
|
},
|
|
1172
714
|
{
|
|
1173
|
-
"model_format": "
|
|
715
|
+
"model_format": "ggufv2",
|
|
1174
716
|
"model_size_in_billions": 13,
|
|
1175
717
|
"quantizations": [
|
|
1176
|
-
"
|
|
1177
|
-
"
|
|
1178
|
-
"
|
|
1179
|
-
"
|
|
1180
|
-
"
|
|
1181
|
-
"
|
|
1182
|
-
"
|
|
1183
|
-
"
|
|
1184
|
-
"
|
|
1185
|
-
"
|
|
1186
|
-
"
|
|
1187
|
-
"
|
|
1188
|
-
"q6_K",
|
|
1189
|
-
"q8_0"
|
|
718
|
+
"Q2_K",
|
|
719
|
+
"Q3_K_S",
|
|
720
|
+
"Q3_K_M",
|
|
721
|
+
"Q3_K_L",
|
|
722
|
+
"Q4_0",
|
|
723
|
+
"Q4_K_S",
|
|
724
|
+
"Q4_K_M",
|
|
725
|
+
"Q5_0",
|
|
726
|
+
"Q5_K_S",
|
|
727
|
+
"Q5_K_M",
|
|
728
|
+
"Q6_K",
|
|
729
|
+
"Q8_0"
|
|
1190
730
|
],
|
|
1191
|
-
"model_id": "TheBloke/Llama-2-13B-chat-
|
|
1192
|
-
"model_file_name_template": "llama-2-13b-chat.
|
|
731
|
+
"model_id": "TheBloke/Llama-2-13B-chat-GGUF",
|
|
732
|
+
"model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
|
|
1193
733
|
},
|
|
1194
734
|
{
|
|
1195
|
-
"model_format": "
|
|
735
|
+
"model_format": "ggufv2",
|
|
1196
736
|
"model_size_in_billions": 70,
|
|
1197
737
|
"quantizations": [
|
|
1198
|
-
"
|
|
1199
|
-
"
|
|
1200
|
-
"
|
|
1201
|
-
"
|
|
1202
|
-
"
|
|
1203
|
-
"
|
|
1204
|
-
"
|
|
1205
|
-
"
|
|
1206
|
-
"
|
|
1207
|
-
"
|
|
1208
|
-
"q5_K_M",
|
|
1209
|
-
"q5_K_S",
|
|
1210
|
-
"q6_K",
|
|
1211
|
-
"q8_0"
|
|
738
|
+
"Q2_K",
|
|
739
|
+
"Q3_K_S",
|
|
740
|
+
"Q3_K_M",
|
|
741
|
+
"Q3_K_L",
|
|
742
|
+
"Q4_0",
|
|
743
|
+
"Q4_K_S",
|
|
744
|
+
"Q4_K_M",
|
|
745
|
+
"Q5_0",
|
|
746
|
+
"Q5_K_S",
|
|
747
|
+
"Q5_K_M"
|
|
1212
748
|
],
|
|
1213
|
-
"
|
|
1214
|
-
|
|
749
|
+
"quantization_parts": {
|
|
750
|
+
"Q6_K": [
|
|
751
|
+
"split-a",
|
|
752
|
+
"split-b"
|
|
753
|
+
],
|
|
754
|
+
"Q8_0": [
|
|
755
|
+
"split-a",
|
|
756
|
+
"split-b"
|
|
757
|
+
]
|
|
758
|
+
},
|
|
759
|
+
"model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
|
|
760
|
+
"model_file_name_template": "llama-2-70b-chat.{quantization}.gguf",
|
|
761
|
+
"model_file_name_split_template": "llama-2-70b-chat.{quantization}.gguf-{part}"
|
|
1215
762
|
},
|
|
1216
763
|
{
|
|
1217
764
|
"model_format": "pytorch",
|
|
@@ -1293,64 +840,6 @@
|
|
|
1293
840
|
],
|
|
1294
841
|
"model_id": "meta-llama/Llama-2-70b-chat-hf",
|
|
1295
842
|
"model_revision": "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
|
|
1296
|
-
},
|
|
1297
|
-
{
|
|
1298
|
-
"model_format": "ggufv2",
|
|
1299
|
-
"model_size_in_billions": 7,
|
|
1300
|
-
"quantizations": [
|
|
1301
|
-
"Q2_K",
|
|
1302
|
-
"Q3_K_S",
|
|
1303
|
-
"Q3_K_M",
|
|
1304
|
-
"Q3_K_L",
|
|
1305
|
-
"Q4_0",
|
|
1306
|
-
"Q4_K_S",
|
|
1307
|
-
"Q4_K_M",
|
|
1308
|
-
"Q5_0",
|
|
1309
|
-
"Q5_K_S",
|
|
1310
|
-
"Q5_K_M",
|
|
1311
|
-
"Q6_K",
|
|
1312
|
-
"Q8_0"
|
|
1313
|
-
],
|
|
1314
|
-
"model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
|
|
1315
|
-
"model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
|
|
1316
|
-
},
|
|
1317
|
-
{
|
|
1318
|
-
"model_format": "ggufv2",
|
|
1319
|
-
"model_size_in_billions": 13,
|
|
1320
|
-
"quantizations": [
|
|
1321
|
-
"Q2_K",
|
|
1322
|
-
"Q3_K_S",
|
|
1323
|
-
"Q3_K_M",
|
|
1324
|
-
"Q3_K_L",
|
|
1325
|
-
"Q4_0",
|
|
1326
|
-
"Q4_K_S",
|
|
1327
|
-
"Q4_K_M",
|
|
1328
|
-
"Q5_0",
|
|
1329
|
-
"Q5_K_S",
|
|
1330
|
-
"Q5_K_M",
|
|
1331
|
-
"Q6_K",
|
|
1332
|
-
"Q8_0"
|
|
1333
|
-
],
|
|
1334
|
-
"model_id": "TheBloke/Llama-2-13B-chat-GGUF",
|
|
1335
|
-
"model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
|
|
1336
|
-
},
|
|
1337
|
-
{
|
|
1338
|
-
"model_format": "ggufv2",
|
|
1339
|
-
"model_size_in_billions": 70,
|
|
1340
|
-
"quantizations": [
|
|
1341
|
-
"Q2_K",
|
|
1342
|
-
"Q3_K_S",
|
|
1343
|
-
"Q3_K_M",
|
|
1344
|
-
"Q3_K_L",
|
|
1345
|
-
"Q4_0",
|
|
1346
|
-
"Q4_K_S",
|
|
1347
|
-
"Q4_K_M",
|
|
1348
|
-
"Q5_0",
|
|
1349
|
-
"Q5_K_S",
|
|
1350
|
-
"Q5_K_M"
|
|
1351
|
-
],
|
|
1352
|
-
"model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
|
|
1353
|
-
"model_file_name_template": "llama-2-70b-chat.{quantization}.gguf"
|
|
1354
843
|
}
|
|
1355
844
|
],
|
|
1356
845
|
"prompt_style": {
|
|
@@ -1383,26 +872,24 @@
|
|
|
1383
872
|
"model_description": "Llama-2 is the second generation of Llama, open-source and trained on a larger amount of data.",
|
|
1384
873
|
"model_specs": [
|
|
1385
874
|
{
|
|
1386
|
-
"model_format": "
|
|
875
|
+
"model_format": "ggufv2",
|
|
1387
876
|
"model_size_in_billions": 7,
|
|
1388
877
|
"quantizations": [
|
|
1389
|
-
"
|
|
1390
|
-
"
|
|
1391
|
-
"
|
|
1392
|
-
"
|
|
1393
|
-
"
|
|
1394
|
-
"
|
|
1395
|
-
"
|
|
1396
|
-
"
|
|
1397
|
-
"
|
|
1398
|
-
"
|
|
1399
|
-
"
|
|
1400
|
-
"
|
|
1401
|
-
"q6_K",
|
|
1402
|
-
"q8_0"
|
|
878
|
+
"Q2_K",
|
|
879
|
+
"Q3_K_S",
|
|
880
|
+
"Q3_K_M",
|
|
881
|
+
"Q3_K_L",
|
|
882
|
+
"Q4_0",
|
|
883
|
+
"Q4_K_S",
|
|
884
|
+
"Q4_K_M",
|
|
885
|
+
"Q5_0",
|
|
886
|
+
"Q5_K_S",
|
|
887
|
+
"Q5_K_M",
|
|
888
|
+
"Q6_K",
|
|
889
|
+
"Q8_0"
|
|
1403
890
|
],
|
|
1404
|
-
"model_id": "TheBloke/Llama-2-7B-
|
|
1405
|
-
"model_file_name_template": "llama-2-7b.
|
|
891
|
+
"model_id": "TheBloke/Llama-2-7B-GGUF",
|
|
892
|
+
"model_file_name_template": "llama-2-7b.{quantization}.gguf"
|
|
1406
893
|
},
|
|
1407
894
|
{
|
|
1408
895
|
"model_format": "gptq",
|
|
@@ -1421,48 +908,53 @@
|
|
|
1421
908
|
"model_id": "TheBloke/Llama-2-7B-AWQ"
|
|
1422
909
|
},
|
|
1423
910
|
{
|
|
1424
|
-
"model_format": "
|
|
911
|
+
"model_format": "ggufv2",
|
|
1425
912
|
"model_size_in_billions": 13,
|
|
1426
913
|
"quantizations": [
|
|
1427
|
-
"
|
|
1428
|
-
"
|
|
1429
|
-
"
|
|
1430
|
-
"
|
|
1431
|
-
"
|
|
1432
|
-
"
|
|
1433
|
-
"
|
|
1434
|
-
"
|
|
1435
|
-
"
|
|
1436
|
-
"
|
|
1437
|
-
"
|
|
1438
|
-
"
|
|
1439
|
-
"q6_K",
|
|
1440
|
-
"q8_0"
|
|
914
|
+
"Q2_K",
|
|
915
|
+
"Q3_K_S",
|
|
916
|
+
"Q3_K_M",
|
|
917
|
+
"Q3_K_L",
|
|
918
|
+
"Q4_0",
|
|
919
|
+
"Q4_K_S",
|
|
920
|
+
"Q4_K_M",
|
|
921
|
+
"Q5_0",
|
|
922
|
+
"Q5_K_S",
|
|
923
|
+
"Q5_K_M",
|
|
924
|
+
"Q6_K",
|
|
925
|
+
"Q8_0"
|
|
1441
926
|
],
|
|
1442
|
-
"model_id": "TheBloke/Llama-2-13B-
|
|
1443
|
-
"model_file_name_template": "llama-2-13b.
|
|
927
|
+
"model_id": "TheBloke/Llama-2-13B-GGUF",
|
|
928
|
+
"model_file_name_template": "llama-2-13b.{quantization}.gguf"
|
|
1444
929
|
},
|
|
1445
930
|
{
|
|
1446
|
-
"model_format": "
|
|
931
|
+
"model_format": "ggufv2",
|
|
1447
932
|
"model_size_in_billions": 70,
|
|
1448
933
|
"quantizations": [
|
|
1449
|
-
"
|
|
1450
|
-
"
|
|
1451
|
-
"
|
|
1452
|
-
"
|
|
1453
|
-
"
|
|
1454
|
-
"
|
|
1455
|
-
"
|
|
1456
|
-
"
|
|
1457
|
-
"
|
|
1458
|
-
"
|
|
1459
|
-
"q5_K_M",
|
|
1460
|
-
"q5_K_S",
|
|
1461
|
-
"q6_K",
|
|
1462
|
-
"q8_0"
|
|
934
|
+
"Q2_K",
|
|
935
|
+
"Q3_K_S",
|
|
936
|
+
"Q3_K_M",
|
|
937
|
+
"Q3_K_L",
|
|
938
|
+
"Q4_0",
|
|
939
|
+
"Q4_K_S",
|
|
940
|
+
"Q4_K_M",
|
|
941
|
+
"Q5_0",
|
|
942
|
+
"Q5_K_S",
|
|
943
|
+
"Q5_K_M"
|
|
1463
944
|
],
|
|
1464
|
-
"
|
|
1465
|
-
|
|
945
|
+
"quantization_parts": {
|
|
946
|
+
"Q6_K": [
|
|
947
|
+
"split-a",
|
|
948
|
+
"split-b"
|
|
949
|
+
],
|
|
950
|
+
"Q8_0": [
|
|
951
|
+
"split-a",
|
|
952
|
+
"split-b"
|
|
953
|
+
]
|
|
954
|
+
},
|
|
955
|
+
"model_id": "TheBloke/Llama-2-70B-GGUF",
|
|
956
|
+
"model_file_name_template": "llama-2-70b.{quantization}.gguf",
|
|
957
|
+
"model_file_name_split_template": "llama-2-70b.{quantization}.gguf-{part}"
|
|
1466
958
|
},
|
|
1467
959
|
{
|
|
1468
960
|
"model_format": "pytorch",
|
|
@@ -2015,210 +1507,47 @@
|
|
|
2015
1507
|
],
|
|
2016
1508
|
"prompt_style": {
|
|
2017
1509
|
"style_name": "LLAMA3",
|
|
2018
|
-
"system_prompt": "You are a helpful assistant.",
|
|
2019
|
-
"roles": [
|
|
2020
|
-
"user",
|
|
2021
|
-
"assistant"
|
|
2022
|
-
],
|
|
2023
|
-
"intra_message_sep": "\n\n",
|
|
2024
|
-
"inter_message_sep": "<|eot_id|>",
|
|
2025
|
-
"stop_token_ids": [
|
|
2026
|
-
128001,
|
|
2027
|
-
128009
|
|
2028
|
-
],
|
|
2029
|
-
"stop": [
|
|
2030
|
-
"<|end_of_text|>",
|
|
2031
|
-
"<|eot_id|>"
|
|
2032
|
-
]
|
|
2033
|
-
}
|
|
2034
|
-
},
|
|
2035
|
-
{
|
|
2036
|
-
"version": 1,
|
|
2037
|
-
"context_length": 2048,
|
|
2038
|
-
"model_name": "opt",
|
|
2039
|
-
"model_lang": [
|
|
2040
|
-
"en"
|
|
2041
|
-
],
|
|
2042
|
-
"model_ability": [
|
|
2043
|
-
"generate"
|
|
2044
|
-
],
|
|
2045
|
-
"model_description": "Opt is an open-source, decoder-only, Transformer based LLM that was designed to replicate GPT-3.",
|
|
2046
|
-
"model_specs": [
|
|
2047
|
-
{
|
|
2048
|
-
"model_format": "pytorch",
|
|
2049
|
-
"model_size_in_billions": 1,
|
|
2050
|
-
"quantizations": [
|
|
2051
|
-
"4-bit",
|
|
2052
|
-
"8-bit",
|
|
2053
|
-
"none"
|
|
2054
|
-
],
|
|
2055
|
-
"model_id": "facebook/opt-125m",
|
|
2056
|
-
"model_revision": "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32"
|
|
2057
|
-
}
|
|
2058
|
-
]
|
|
2059
|
-
},
|
|
2060
|
-
{
|
|
2061
|
-
"version": 1,
|
|
2062
|
-
"context_length": 2048,
|
|
2063
|
-
"model_name": "falcon",
|
|
2064
|
-
"model_lang": [
|
|
2065
|
-
"en"
|
|
2066
|
-
],
|
|
2067
|
-
"model_ability": [
|
|
2068
|
-
"generate"
|
|
2069
|
-
],
|
|
2070
|
-
"model_description": "Falcon is an open-source Transformer based LLM trained on the RefinedWeb dataset.",
|
|
2071
|
-
"model_specs": [
|
|
2072
|
-
{
|
|
2073
|
-
"model_format": "pytorch",
|
|
2074
|
-
"model_size_in_billions": 40,
|
|
2075
|
-
"quantizations": [
|
|
2076
|
-
"4-bit",
|
|
2077
|
-
"8-bit",
|
|
2078
|
-
"none"
|
|
2079
|
-
],
|
|
2080
|
-
"model_id": "tiiuae/falcon-40b",
|
|
2081
|
-
"model_revision": "561820f7eef0cc56a31ea38af15ca1acb07fab5d"
|
|
2082
|
-
},
|
|
2083
|
-
{
|
|
2084
|
-
"model_format": "pytorch",
|
|
2085
|
-
"model_size_in_billions": 7,
|
|
2086
|
-
"quantizations": [
|
|
2087
|
-
"4-bit",
|
|
2088
|
-
"8-bit",
|
|
2089
|
-
"none"
|
|
2090
|
-
],
|
|
2091
|
-
"model_id": "tiiuae/falcon-7b",
|
|
2092
|
-
"model_revision": "378337427557d1df3e742264a2901a49f25d4eb1"
|
|
2093
|
-
}
|
|
2094
|
-
]
|
|
2095
|
-
},
|
|
2096
|
-
{
|
|
2097
|
-
"version": 1,
|
|
2098
|
-
"context_length": 2048,
|
|
2099
|
-
"model_name": "falcon-instruct",
|
|
2100
|
-
"model_lang": [
|
|
2101
|
-
"en"
|
|
2102
|
-
],
|
|
2103
|
-
"model_ability": [
|
|
2104
|
-
"chat"
|
|
2105
|
-
],
|
|
2106
|
-
"model_description": "Falcon-instruct is a fine-tuned version of the Falcon LLM, specializing in chatting.",
|
|
2107
|
-
"model_specs": [
|
|
2108
|
-
{
|
|
2109
|
-
"model_format": "pytorch",
|
|
2110
|
-
"model_size_in_billions": 7,
|
|
2111
|
-
"quantizations": [
|
|
2112
|
-
"4-bit",
|
|
2113
|
-
"8-bit",
|
|
2114
|
-
"none"
|
|
2115
|
-
],
|
|
2116
|
-
"model_id": "tiiuae/falcon-7b-instruct",
|
|
2117
|
-
"model_revision": "eb410fb6ffa9028e97adb801f0d6ec46d02f8b07"
|
|
2118
|
-
},
|
|
2119
|
-
{
|
|
2120
|
-
"model_format": "pytorch",
|
|
2121
|
-
"model_size_in_billions": 40,
|
|
2122
|
-
"quantizations": [
|
|
2123
|
-
"4-bit",
|
|
2124
|
-
"8-bit",
|
|
2125
|
-
"none"
|
|
2126
|
-
],
|
|
2127
|
-
"model_id": "tiiuae/falcon-40b-instruct",
|
|
2128
|
-
"model_revision": "ca78eac0ed45bf64445ff0687fabba1598daebf3"
|
|
2129
|
-
}
|
|
2130
|
-
],
|
|
2131
|
-
"prompt_style": {
|
|
2132
|
-
"style_name": "FALCON",
|
|
2133
|
-
"system_prompt": "",
|
|
2134
|
-
"roles": [
|
|
2135
|
-
"User",
|
|
2136
|
-
"Assistant"
|
|
2137
|
-
],
|
|
2138
|
-
"intra_message_sep": "\n",
|
|
2139
|
-
"inter_message_sep": "<|endoftext|>",
|
|
2140
|
-
"stop": [
|
|
2141
|
-
"\nUser"
|
|
2142
|
-
],
|
|
2143
|
-
"stop_token_ids": [
|
|
2144
|
-
0,
|
|
2145
|
-
1,
|
|
2146
|
-
2,
|
|
2147
|
-
3,
|
|
2148
|
-
4,
|
|
2149
|
-
5,
|
|
2150
|
-
6,
|
|
2151
|
-
7,
|
|
2152
|
-
8,
|
|
2153
|
-
9,
|
|
2154
|
-
10,
|
|
2155
|
-
11
|
|
2156
|
-
]
|
|
2157
|
-
}
|
|
2158
|
-
},
|
|
2159
|
-
{
|
|
2160
|
-
"version": 1,
|
|
2161
|
-
"context_length": 8192,
|
|
2162
|
-
"model_name": "starcoderplus",
|
|
2163
|
-
"model_lang": [
|
|
2164
|
-
"en"
|
|
2165
|
-
],
|
|
2166
|
-
"model_ability": [
|
|
2167
|
-
"generate"
|
|
2168
|
-
],
|
|
2169
|
-
"model_description": "Starcoderplus is an open-source LLM trained by fine-tuning Starcoder on RedefinedWeb and StarCoderData datasets.",
|
|
2170
|
-
"model_specs": [
|
|
2171
|
-
{
|
|
2172
|
-
"model_format": "pytorch",
|
|
2173
|
-
"model_size_in_billions": 16,
|
|
2174
|
-
"quantizations": [
|
|
2175
|
-
"4-bit",
|
|
2176
|
-
"8-bit",
|
|
2177
|
-
"none"
|
|
2178
|
-
],
|
|
2179
|
-
"model_id": "bigcode/starcoderplus",
|
|
2180
|
-
"model_revision": "95be82087c33f14ee9941c812a154a9dd66efe72"
|
|
2181
|
-
}
|
|
2182
|
-
],
|
|
2183
|
-
"prompt_style": null
|
|
1510
|
+
"system_prompt": "You are a helpful assistant.",
|
|
1511
|
+
"roles": [
|
|
1512
|
+
"user",
|
|
1513
|
+
"assistant"
|
|
1514
|
+
],
|
|
1515
|
+
"intra_message_sep": "\n\n",
|
|
1516
|
+
"inter_message_sep": "<|eot_id|>",
|
|
1517
|
+
"stop_token_ids": [
|
|
1518
|
+
128001,
|
|
1519
|
+
128009
|
|
1520
|
+
],
|
|
1521
|
+
"stop": [
|
|
1522
|
+
"<|end_of_text|>",
|
|
1523
|
+
"<|eot_id|>"
|
|
1524
|
+
]
|
|
1525
|
+
}
|
|
2184
1526
|
},
|
|
2185
1527
|
{
|
|
2186
1528
|
"version": 1,
|
|
2187
|
-
"context_length":
|
|
2188
|
-
"model_name": "
|
|
1529
|
+
"context_length": 2048,
|
|
1530
|
+
"model_name": "opt",
|
|
2189
1531
|
"model_lang": [
|
|
2190
1532
|
"en"
|
|
2191
1533
|
],
|
|
2192
1534
|
"model_ability": [
|
|
2193
|
-
"
|
|
1535
|
+
"generate"
|
|
2194
1536
|
],
|
|
2195
|
-
"model_description": "
|
|
1537
|
+
"model_description": "Opt is an open-source, decoder-only, Transformer based LLM that was designed to replicate GPT-3.",
|
|
2196
1538
|
"model_specs": [
|
|
2197
1539
|
{
|
|
2198
1540
|
"model_format": "pytorch",
|
|
2199
|
-
"model_size_in_billions":
|
|
1541
|
+
"model_size_in_billions": 1,
|
|
2200
1542
|
"quantizations": [
|
|
2201
1543
|
"4-bit",
|
|
2202
1544
|
"8-bit",
|
|
2203
1545
|
"none"
|
|
2204
1546
|
],
|
|
2205
|
-
"model_id": "
|
|
2206
|
-
"model_revision": "
|
|
1547
|
+
"model_id": "facebook/opt-125m",
|
|
1548
|
+
"model_revision": "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32"
|
|
2207
1549
|
}
|
|
2208
|
-
]
|
|
2209
|
-
"prompt_style": {
|
|
2210
|
-
"style_name": "CHATML",
|
|
2211
|
-
"system_prompt": "<system>{system_message}\n",
|
|
2212
|
-
"roles": [
|
|
2213
|
-
"<|user|>",
|
|
2214
|
-
"<|assistant|>"
|
|
2215
|
-
],
|
|
2216
|
-
"intra_message_sep": "<|end|>",
|
|
2217
|
-
"stop_token_ids": [
|
|
2218
|
-
0,
|
|
2219
|
-
49155
|
|
2220
|
-
]
|
|
2221
|
-
}
|
|
1550
|
+
]
|
|
2222
1551
|
},
|
|
2223
1552
|
{
|
|
2224
1553
|
"version": 1,
|
|
@@ -2984,6 +2313,46 @@
|
|
|
2984
2313
|
],
|
|
2985
2314
|
"model_id": "Qwen/Qwen2-72B-Instruct-AWQ"
|
|
2986
2315
|
},
|
|
2316
|
+
{
|
|
2317
|
+
"model_format": "fp8",
|
|
2318
|
+
"model_size_in_billions": "0_5",
|
|
2319
|
+
"quantizations": [
|
|
2320
|
+
"fp8"
|
|
2321
|
+
],
|
|
2322
|
+
"model_id": "neuralmagic/Qwen2-0.5B-Instruct-FP8"
|
|
2323
|
+
},
|
|
2324
|
+
{
|
|
2325
|
+
"model_format": "fp8",
|
|
2326
|
+
"model_size_in_billions": "0_5",
|
|
2327
|
+
"quantizations": [
|
|
2328
|
+
"fp8"
|
|
2329
|
+
],
|
|
2330
|
+
"model_id": "neuralmagic/Qwen2-0.5B-Instruct-FP8"
|
|
2331
|
+
},
|
|
2332
|
+
{
|
|
2333
|
+
"model_format": "fp8",
|
|
2334
|
+
"model_size_in_billions": "1_5",
|
|
2335
|
+
"quantizations": [
|
|
2336
|
+
"fp8"
|
|
2337
|
+
],
|
|
2338
|
+
"model_id": "neuralmagic/Qwen2-1.5B-Instruct-FP8"
|
|
2339
|
+
},
|
|
2340
|
+
{
|
|
2341
|
+
"model_format": "fp8",
|
|
2342
|
+
"model_size_in_billions": 7,
|
|
2343
|
+
"quantizations": [
|
|
2344
|
+
"fp8"
|
|
2345
|
+
],
|
|
2346
|
+
"model_id": "neuralmagic/Qwen2-7B-Instruct-FP8"
|
|
2347
|
+
},
|
|
2348
|
+
{
|
|
2349
|
+
"model_format": "fp8",
|
|
2350
|
+
"model_size_in_billions": 72,
|
|
2351
|
+
"quantizations": [
|
|
2352
|
+
"fp8"
|
|
2353
|
+
],
|
|
2354
|
+
"model_id": "neuralmagic/Qwen2-72B-Instruct-FP8"
|
|
2355
|
+
},
|
|
2987
2356
|
{
|
|
2988
2357
|
"model_format": "mlx",
|
|
2989
2358
|
"model_size_in_billions": "0_5",
|
|
@@ -3098,398 +2467,141 @@
|
|
|
3098
2467
|
"00002-of-00002"
|
|
3099
2468
|
],
|
|
3100
2469
|
"q8_0": [
|
|
3101
|
-
"00001-of-00002",
|
|
3102
|
-
"00002-of-00002"
|
|
3103
|
-
],
|
|
3104
|
-
"fp16": [
|
|
3105
|
-
"00001-of-00004",
|
|
3106
|
-
"00002-of-00004",
|
|
3107
|
-
"00003-of-00004",
|
|
3108
|
-
"00004-of-00004"
|
|
3109
|
-
]
|
|
3110
|
-
}
|
|
3111
|
-
}
|
|
3112
|
-
],
|
|
3113
|
-
"prompt_style": {
|
|
3114
|
-
"style_name": "QWEN",
|
|
3115
|
-
"system_prompt": "You are a helpful assistant.",
|
|
3116
|
-
"roles": [
|
|
3117
|
-
"user",
|
|
3118
|
-
"assistant"
|
|
3119
|
-
],
|
|
3120
|
-
"intra_message_sep": "\n",
|
|
3121
|
-
"stop_token_ids": [
|
|
3122
|
-
151643,
|
|
3123
|
-
151644,
|
|
3124
|
-
151645
|
|
3125
|
-
],
|
|
3126
|
-
"stop": [
|
|
3127
|
-
"<|endoftext|>",
|
|
3128
|
-
"<|im_start|>",
|
|
3129
|
-
"<|im_end|>"
|
|
3130
|
-
]
|
|
3131
|
-
}
|
|
3132
|
-
},
|
|
3133
|
-
{
|
|
3134
|
-
"version": 1,
|
|
3135
|
-
"context_length": 32768,
|
|
3136
|
-
"model_name": "qwen2-moe-instruct",
|
|
3137
|
-
"model_lang": [
|
|
3138
|
-
"en",
|
|
3139
|
-
"zh"
|
|
3140
|
-
],
|
|
3141
|
-
"model_ability": [
|
|
3142
|
-
"chat",
|
|
3143
|
-
"tools"
|
|
3144
|
-
],
|
|
3145
|
-
"model_description": "Qwen2 is the new series of Qwen large language models. ",
|
|
3146
|
-
"model_specs": [
|
|
3147
|
-
{
|
|
3148
|
-
"model_format": "pytorch",
|
|
3149
|
-
"model_size_in_billions": 14,
|
|
3150
|
-
"quantizations": [
|
|
3151
|
-
"4-bit",
|
|
3152
|
-
"8-bit",
|
|
3153
|
-
"none"
|
|
3154
|
-
],
|
|
3155
|
-
"model_id": "Qwen/Qwen2-57B-A14B-Instruct"
|
|
3156
|
-
},
|
|
3157
|
-
{
|
|
3158
|
-
"model_format": "gptq",
|
|
3159
|
-
"model_size_in_billions": 14,
|
|
3160
|
-
"quantizations": [
|
|
3161
|
-
"Int4"
|
|
3162
|
-
],
|
|
3163
|
-
"model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
|
|
3164
|
-
},
|
|
3165
|
-
{
|
|
3166
|
-
"model_format": "ggufv2",
|
|
3167
|
-
"model_size_in_billions": 14,
|
|
3168
|
-
"quantizations": [
|
|
3169
|
-
"q3_k_m",
|
|
3170
|
-
"q4_0",
|
|
3171
|
-
"q4_k_m",
|
|
3172
|
-
"q5_0",
|
|
3173
|
-
"q5_k_m",
|
|
3174
|
-
"q6_k",
|
|
3175
|
-
"q8_0",
|
|
3176
|
-
"fp16"
|
|
3177
|
-
],
|
|
3178
|
-
"model_id": "Qwen/Qwen2-57B-A14B-Instruct-GGUF",
|
|
3179
|
-
"model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
|
|
3180
|
-
"model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
|
|
3181
|
-
"quantization_parts": {
|
|
3182
|
-
"q8_0": [
|
|
3183
|
-
"00001-of-00002",
|
|
3184
|
-
"00002-of-00002"
|
|
3185
|
-
],
|
|
3186
|
-
"fp16": [
|
|
3187
|
-
"00001-of-00003",
|
|
3188
|
-
"00002-of-00003",
|
|
3189
|
-
"00003-of-00003"
|
|
3190
|
-
]
|
|
3191
|
-
}
|
|
3192
|
-
}
|
|
3193
|
-
],
|
|
3194
|
-
"prompt_style": {
|
|
3195
|
-
"style_name": "QWEN",
|
|
3196
|
-
"system_prompt": "You are a helpful assistant.",
|
|
3197
|
-
"roles": [
|
|
3198
|
-
"user",
|
|
3199
|
-
"assistant"
|
|
3200
|
-
],
|
|
3201
|
-
"intra_message_sep": "\n",
|
|
3202
|
-
"stop_token_ids": [
|
|
3203
|
-
151643,
|
|
3204
|
-
151644,
|
|
3205
|
-
151645
|
|
3206
|
-
],
|
|
3207
|
-
"stop": [
|
|
3208
|
-
"<|endoftext|>",
|
|
3209
|
-
"<|im_start|>",
|
|
3210
|
-
"<|im_end|>"
|
|
3211
|
-
]
|
|
3212
|
-
}
|
|
3213
|
-
},
|
|
3214
|
-
{
|
|
3215
|
-
"version": 1,
|
|
3216
|
-
"context_length": 8192,
|
|
3217
|
-
"model_name": "starcoder",
|
|
3218
|
-
"model_lang": [
|
|
3219
|
-
"en"
|
|
3220
|
-
],
|
|
3221
|
-
"model_ability": [
|
|
3222
|
-
"generate"
|
|
3223
|
-
],
|
|
3224
|
-
"model_description": "Starcoder is an open-source Transformer based LLM that is trained on permissively licensed data from GitHub.",
|
|
3225
|
-
"model_specs": [
|
|
3226
|
-
{
|
|
3227
|
-
"model_format": "ggmlv3",
|
|
3228
|
-
"model_size_in_billions": 16,
|
|
3229
|
-
"quantizations": [
|
|
3230
|
-
"q4_0",
|
|
3231
|
-
"q4_1",
|
|
3232
|
-
"q5_0",
|
|
3233
|
-
"q5_1",
|
|
3234
|
-
"q8_0"
|
|
3235
|
-
],
|
|
3236
|
-
"model_id": "TheBloke/starcoder-GGML",
|
|
3237
|
-
"model_file_name_template": "starcoder.ggmlv3.{quantization}.bin"
|
|
3238
|
-
}
|
|
3239
|
-
]
|
|
3240
|
-
},
|
|
3241
|
-
{
|
|
3242
|
-
"version": 1,
|
|
3243
|
-
"context_length": 1024,
|
|
3244
|
-
"model_name": "gpt-2",
|
|
3245
|
-
"model_lang": [
|
|
3246
|
-
"en"
|
|
3247
|
-
],
|
|
3248
|
-
"model_ability": [
|
|
3249
|
-
"generate"
|
|
3250
|
-
],
|
|
3251
|
-
"model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
|
|
3252
|
-
"model_specs": [
|
|
3253
|
-
{
|
|
3254
|
-
"model_format": "pytorch",
|
|
3255
|
-
"model_size_in_billions": "1_5",
|
|
3256
|
-
"quantizations": [
|
|
3257
|
-
"none"
|
|
3258
|
-
],
|
|
3259
|
-
"model_id": "openai-community/gpt2",
|
|
3260
|
-
"model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
|
|
3261
|
-
}
|
|
3262
|
-
]
|
|
3263
|
-
},
|
|
3264
|
-
{
|
|
3265
|
-
"version": 1,
|
|
3266
|
-
"context_length": 8192,
|
|
3267
|
-
"model_name": "internlm-7b",
|
|
3268
|
-
"model_lang": [
|
|
3269
|
-
"en",
|
|
3270
|
-
"zh"
|
|
3271
|
-
],
|
|
3272
|
-
"model_ability": [
|
|
3273
|
-
"generate"
|
|
3274
|
-
],
|
|
3275
|
-
"model_description": "InternLM is a Transformer-based LLM that is trained on both Chinese and English data, focusing on practical scenarios.",
|
|
3276
|
-
"model_specs": [
|
|
3277
|
-
{
|
|
3278
|
-
"model_format": "pytorch",
|
|
3279
|
-
"model_size_in_billions": 7,
|
|
3280
|
-
"quantizations": [
|
|
3281
|
-
"4-bit",
|
|
3282
|
-
"8-bit",
|
|
3283
|
-
"none"
|
|
3284
|
-
],
|
|
3285
|
-
"model_id": "internlm/internlm-7b",
|
|
3286
|
-
"model_revision": "592b0efc83be3eb1cba8990c4caf41ce604b958c"
|
|
3287
|
-
}
|
|
3288
|
-
]
|
|
3289
|
-
},
|
|
3290
|
-
{
|
|
3291
|
-
"version": 1,
|
|
3292
|
-
"context_length": 4096,
|
|
3293
|
-
"model_name": "internlm-chat-7b",
|
|
3294
|
-
"model_lang": [
|
|
3295
|
-
"en",
|
|
3296
|
-
"zh"
|
|
3297
|
-
],
|
|
3298
|
-
"model_ability": [
|
|
3299
|
-
"chat"
|
|
3300
|
-
],
|
|
3301
|
-
"model_description": "Internlm-chat is a fine-tuned version of the Internlm LLM, specializing in chatting.",
|
|
3302
|
-
"model_specs": [
|
|
3303
|
-
{
|
|
3304
|
-
"model_format": "pytorch",
|
|
3305
|
-
"model_size_in_billions": 7,
|
|
3306
|
-
"quantizations": [
|
|
3307
|
-
"4-bit",
|
|
3308
|
-
"8-bit",
|
|
3309
|
-
"none"
|
|
3310
|
-
],
|
|
3311
|
-
"model_id": "internlm/internlm-chat-7b",
|
|
3312
|
-
"model_revision": "d4fa2dbcbd2fa4edfa6735aa2ba0f0577fed6a62"
|
|
2470
|
+
"00001-of-00002",
|
|
2471
|
+
"00002-of-00002"
|
|
2472
|
+
],
|
|
2473
|
+
"fp16": [
|
|
2474
|
+
"00001-of-00004",
|
|
2475
|
+
"00002-of-00004",
|
|
2476
|
+
"00003-of-00004",
|
|
2477
|
+
"00004-of-00004"
|
|
2478
|
+
]
|
|
2479
|
+
}
|
|
3313
2480
|
}
|
|
3314
2481
|
],
|
|
3315
2482
|
"prompt_style": {
|
|
3316
|
-
"style_name": "
|
|
3317
|
-
"system_prompt": "",
|
|
2483
|
+
"style_name": "QWEN",
|
|
2484
|
+
"system_prompt": "You are a helpful assistant.",
|
|
3318
2485
|
"roles": [
|
|
3319
|
-
"
|
|
3320
|
-
"
|
|
2486
|
+
"user",
|
|
2487
|
+
"assistant"
|
|
3321
2488
|
],
|
|
3322
|
-
"intra_message_sep": "
|
|
3323
|
-
"inter_message_sep": "<eoa>\n",
|
|
2489
|
+
"intra_message_sep": "\n",
|
|
3324
2490
|
"stop_token_ids": [
|
|
3325
|
-
|
|
3326
|
-
|
|
2491
|
+
151643,
|
|
2492
|
+
151644,
|
|
2493
|
+
151645
|
|
3327
2494
|
],
|
|
3328
2495
|
"stop": [
|
|
3329
|
-
"
|
|
2496
|
+
"<|endoftext|>",
|
|
2497
|
+
"<|im_start|>",
|
|
2498
|
+
"<|im_end|>"
|
|
3330
2499
|
]
|
|
3331
2500
|
}
|
|
3332
2501
|
},
|
|
3333
2502
|
{
|
|
3334
2503
|
"version": 1,
|
|
3335
|
-
"context_length":
|
|
3336
|
-
"model_name": "
|
|
3337
|
-
"model_lang": [
|
|
3338
|
-
"en",
|
|
3339
|
-
"zh"
|
|
3340
|
-
],
|
|
3341
|
-
"model_ability": [
|
|
3342
|
-
"generate"
|
|
3343
|
-
],
|
|
3344
|
-
"model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
|
|
3345
|
-
"model_specs": [
|
|
3346
|
-
{
|
|
3347
|
-
"model_format": "pytorch",
|
|
3348
|
-
"model_size_in_billions": 20,
|
|
3349
|
-
"quantizations": [
|
|
3350
|
-
"4-bit",
|
|
3351
|
-
"8-bit",
|
|
3352
|
-
"none"
|
|
3353
|
-
],
|
|
3354
|
-
"model_id": "internlm/internlm-20b",
|
|
3355
|
-
"model_revision": "c56a72957239b490ea206ea857e86611b3f65f3a"
|
|
3356
|
-
}
|
|
3357
|
-
]
|
|
3358
|
-
},
|
|
3359
|
-
{
|
|
3360
|
-
"version": 1,
|
|
3361
|
-
"context_length": 16384,
|
|
3362
|
-
"model_name": "internlm-chat-20b",
|
|
2504
|
+
"context_length": 32768,
|
|
2505
|
+
"model_name": "qwen2-moe-instruct",
|
|
3363
2506
|
"model_lang": [
|
|
3364
2507
|
"en",
|
|
3365
2508
|
"zh"
|
|
3366
2509
|
],
|
|
3367
2510
|
"model_ability": [
|
|
3368
|
-
"chat"
|
|
2511
|
+
"chat",
|
|
2512
|
+
"tools"
|
|
3369
2513
|
],
|
|
3370
|
-
"model_description": "
|
|
2514
|
+
"model_description": "Qwen2 is the new series of Qwen large language models. ",
|
|
3371
2515
|
"model_specs": [
|
|
3372
2516
|
{
|
|
3373
2517
|
"model_format": "pytorch",
|
|
3374
|
-
"model_size_in_billions":
|
|
2518
|
+
"model_size_in_billions": 14,
|
|
3375
2519
|
"quantizations": [
|
|
3376
2520
|
"4-bit",
|
|
3377
2521
|
"8-bit",
|
|
3378
2522
|
"none"
|
|
3379
2523
|
],
|
|
3380
|
-
"model_id": "
|
|
3381
|
-
|
|
3382
|
-
}
|
|
3383
|
-
],
|
|
3384
|
-
"prompt_style": {
|
|
3385
|
-
"style_name": "INTERNLM",
|
|
3386
|
-
"system_prompt": "",
|
|
3387
|
-
"roles": [
|
|
3388
|
-
"<|User|>",
|
|
3389
|
-
"<|Bot|>"
|
|
3390
|
-
],
|
|
3391
|
-
"intra_message_sep": "<eoh>\n",
|
|
3392
|
-
"inter_message_sep": "<eoa>\n",
|
|
3393
|
-
"stop_token_ids": [
|
|
3394
|
-
1,
|
|
3395
|
-
103028
|
|
3396
|
-
],
|
|
3397
|
-
"stop": [
|
|
3398
|
-
"<eoa>"
|
|
3399
|
-
]
|
|
3400
|
-
}
|
|
3401
|
-
},
|
|
3402
|
-
{
|
|
3403
|
-
"version": 1,
|
|
3404
|
-
"context_length": 4096,
|
|
3405
|
-
"model_name": "vicuna-v1.5",
|
|
3406
|
-
"model_lang": [
|
|
3407
|
-
"en"
|
|
3408
|
-
],
|
|
3409
|
-
"model_ability": [
|
|
3410
|
-
"chat"
|
|
3411
|
-
],
|
|
3412
|
-
"model_description": "Vicuna is an open-source LLM trained by fine-tuning LLaMA on data collected from ShareGPT.",
|
|
3413
|
-
"model_specs": [
|
|
2524
|
+
"model_id": "Qwen/Qwen2-57B-A14B-Instruct"
|
|
2525
|
+
},
|
|
3414
2526
|
{
|
|
3415
|
-
"model_format": "
|
|
3416
|
-
"model_size_in_billions":
|
|
2527
|
+
"model_format": "gptq",
|
|
2528
|
+
"model_size_in_billions": 14,
|
|
3417
2529
|
"quantizations": [
|
|
3418
|
-
"
|
|
3419
|
-
"8-bit",
|
|
3420
|
-
"none"
|
|
2530
|
+
"Int4"
|
|
3421
2531
|
],
|
|
3422
|
-
"model_id": "
|
|
3423
|
-
"model_revision": "de56c35b1763eaae20f4d60efd64af0a9091ebe5"
|
|
2532
|
+
"model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
|
|
3424
2533
|
},
|
|
3425
2534
|
{
|
|
3426
|
-
"model_format": "
|
|
3427
|
-
"model_size_in_billions":
|
|
2535
|
+
"model_format": "ggufv2",
|
|
2536
|
+
"model_size_in_billions": 14,
|
|
3428
2537
|
"quantizations": [
|
|
3429
|
-
"
|
|
3430
|
-
"
|
|
3431
|
-
"
|
|
2538
|
+
"q3_k_m",
|
|
2539
|
+
"q4_0",
|
|
2540
|
+
"q4_k_m",
|
|
2541
|
+
"q5_0",
|
|
2542
|
+
"q5_k_m",
|
|
2543
|
+
"q6_k",
|
|
2544
|
+
"q8_0",
|
|
2545
|
+
"fp16"
|
|
3432
2546
|
],
|
|
3433
|
-
"model_id": "
|
|
3434
|
-
"
|
|
2547
|
+
"model_id": "Qwen/Qwen2-57B-A14B-Instruct-GGUF",
|
|
2548
|
+
"model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
|
|
2549
|
+
"model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
|
|
2550
|
+
"quantization_parts": {
|
|
2551
|
+
"q8_0": [
|
|
2552
|
+
"00001-of-00002",
|
|
2553
|
+
"00002-of-00002"
|
|
2554
|
+
],
|
|
2555
|
+
"fp16": [
|
|
2556
|
+
"00001-of-00003",
|
|
2557
|
+
"00002-of-00003",
|
|
2558
|
+
"00003-of-00003"
|
|
2559
|
+
]
|
|
2560
|
+
}
|
|
3435
2561
|
}
|
|
3436
2562
|
],
|
|
3437
2563
|
"prompt_style": {
|
|
3438
|
-
"style_name": "
|
|
3439
|
-
"system_prompt": "
|
|
2564
|
+
"style_name": "QWEN",
|
|
2565
|
+
"system_prompt": "You are a helpful assistant.",
|
|
3440
2566
|
"roles": [
|
|
3441
|
-
"
|
|
3442
|
-
"
|
|
2567
|
+
"user",
|
|
2568
|
+
"assistant"
|
|
3443
2569
|
],
|
|
3444
|
-
"intra_message_sep": "
|
|
3445
|
-
"
|
|
2570
|
+
"intra_message_sep": "\n",
|
|
2571
|
+
"stop_token_ids": [
|
|
2572
|
+
151643,
|
|
2573
|
+
151644,
|
|
2574
|
+
151645
|
|
2575
|
+
],
|
|
2576
|
+
"stop": [
|
|
2577
|
+
"<|endoftext|>",
|
|
2578
|
+
"<|im_start|>",
|
|
2579
|
+
"<|im_end|>"
|
|
2580
|
+
]
|
|
3446
2581
|
}
|
|
3447
2582
|
},
|
|
3448
2583
|
{
|
|
3449
2584
|
"version": 1,
|
|
3450
|
-
"context_length":
|
|
3451
|
-
"model_name": "
|
|
2585
|
+
"context_length": 1024,
|
|
2586
|
+
"model_name": "gpt-2",
|
|
3452
2587
|
"model_lang": [
|
|
3453
2588
|
"en"
|
|
3454
2589
|
],
|
|
3455
2590
|
"model_ability": [
|
|
3456
|
-
"
|
|
2591
|
+
"generate"
|
|
3457
2592
|
],
|
|
3458
|
-
"model_description": "
|
|
2593
|
+
"model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
|
|
3459
2594
|
"model_specs": [
|
|
3460
2595
|
{
|
|
3461
2596
|
"model_format": "pytorch",
|
|
3462
|
-
"model_size_in_billions":
|
|
3463
|
-
"quantizations": [
|
|
3464
|
-
"4-bit",
|
|
3465
|
-
"8-bit",
|
|
3466
|
-
"none"
|
|
3467
|
-
],
|
|
3468
|
-
"model_id": "lmsys/vicuna-7b-v1.5-16k",
|
|
3469
|
-
"model_revision": "9a93d7d11fac7f3f9074510b80092b53bc1a5bec"
|
|
3470
|
-
},
|
|
3471
|
-
{
|
|
3472
|
-
"model_format": "pytorch",
|
|
3473
|
-
"model_size_in_billions": 13,
|
|
2597
|
+
"model_size_in_billions": "1_5",
|
|
3474
2598
|
"quantizations": [
|
|
3475
|
-
"4-bit",
|
|
3476
|
-
"8-bit",
|
|
3477
2599
|
"none"
|
|
3478
2600
|
],
|
|
3479
|
-
"model_id": "
|
|
3480
|
-
"model_revision": "
|
|
2601
|
+
"model_id": "openai-community/gpt2",
|
|
2602
|
+
"model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
|
|
3481
2603
|
}
|
|
3482
|
-
]
|
|
3483
|
-
"prompt_style": {
|
|
3484
|
-
"style_name": "ADD_COLON_TWO",
|
|
3485
|
-
"system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
|
|
3486
|
-
"roles": [
|
|
3487
|
-
"USER",
|
|
3488
|
-
"ASSISTANT"
|
|
3489
|
-
],
|
|
3490
|
-
"intra_message_sep": " ",
|
|
3491
|
-
"inter_message_sep": "</s>"
|
|
3492
|
-
}
|
|
2604
|
+
]
|
|
3493
2605
|
},
|
|
3494
2606
|
{
|
|
3495
2607
|
"version": 1,
|
|
@@ -5463,131 +4575,44 @@
|
|
|
5463
4575
|
"model_file_name_template": "Yi-1.5-9B-Chat-16K.{quantization}.gguf"
|
|
5464
4576
|
},
|
|
5465
4577
|
{
|
|
5466
|
-
"model_format": "ggufv2",
|
|
5467
|
-
"model_size_in_billions": 34,
|
|
5468
|
-
"quantizations": [
|
|
5469
|
-
"Q2_K",
|
|
5470
|
-
"Q3_K_L",
|
|
5471
|
-
"Q3_K_M",
|
|
5472
|
-
"Q3_K_S",
|
|
5473
|
-
"Q4_K_M",
|
|
5474
|
-
"Q4_K_S",
|
|
5475
|
-
"Q5_K_M",
|
|
5476
|
-
"Q5_K_S",
|
|
5477
|
-
"Q6_K",
|
|
5478
|
-
"Q8_0"
|
|
5479
|
-
],
|
|
5480
|
-
"model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
|
|
5481
|
-
"model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
|
|
5482
|
-
}
|
|
5483
|
-
],
|
|
5484
|
-
"prompt_style": {
|
|
5485
|
-
"style_name": "CHATML",
|
|
5486
|
-
"system_prompt": "",
|
|
5487
|
-
"roles": [
|
|
5488
|
-
"<|im_start|>user",
|
|
5489
|
-
"<|im_start|>assistant"
|
|
5490
|
-
],
|
|
5491
|
-
"intra_message_sep": "<|im_end|>",
|
|
5492
|
-
"inter_message_sep": "",
|
|
5493
|
-
"stop_token_ids": [
|
|
5494
|
-
2,
|
|
5495
|
-
6,
|
|
5496
|
-
7,
|
|
5497
|
-
8
|
|
5498
|
-
],
|
|
5499
|
-
"stop": [
|
|
5500
|
-
"<|endoftext|>",
|
|
5501
|
-
"<|im_start|>",
|
|
5502
|
-
"<|im_end|>",
|
|
5503
|
-
"<|im_sep|>"
|
|
5504
|
-
]
|
|
5505
|
-
}
|
|
5506
|
-
},
|
|
5507
|
-
{
|
|
5508
|
-
"version": 1,
|
|
5509
|
-
"context_length": 2048,
|
|
5510
|
-
"model_name": "OpenBuddy",
|
|
5511
|
-
"model_lang": [
|
|
5512
|
-
"en"
|
|
5513
|
-
],
|
|
5514
|
-
"model_ability": [
|
|
5515
|
-
"chat"
|
|
5516
|
-
],
|
|
5517
|
-
"model_description": "OpenBuddy is a powerful open multilingual chatbot model aimed at global users.",
|
|
5518
|
-
"model_specs": [
|
|
5519
|
-
{
|
|
5520
|
-
"model_format": "ggmlv3",
|
|
5521
|
-
"model_size_in_billions": 13,
|
|
5522
|
-
"quantizations": [
|
|
5523
|
-
"Q2_K",
|
|
5524
|
-
"Q3_K_S",
|
|
5525
|
-
"Q3_K_M",
|
|
5526
|
-
"Q3_K_L",
|
|
5527
|
-
"Q4_0",
|
|
5528
|
-
"Q4_1",
|
|
5529
|
-
"Q4_K_S",
|
|
5530
|
-
"Q4_K_M",
|
|
5531
|
-
"Q5_0",
|
|
5532
|
-
"Q5_1",
|
|
5533
|
-
"Q5_K_S",
|
|
5534
|
-
"Q5_K_M",
|
|
5535
|
-
"Q6_K",
|
|
5536
|
-
"Q8_0"
|
|
5537
|
-
],
|
|
5538
|
-
"model_id": "TheBloke/OpenBuddy-Llama2-13B-v11.1-GGML",
|
|
5539
|
-
"model_file_name_template": "openbuddy-llama2-13b-v11.1.ggmlv3.{quantization}.bin"
|
|
5540
|
-
}
|
|
5541
|
-
],
|
|
5542
|
-
"prompt_style": {
|
|
5543
|
-
"style_name": "INSTRUCTION",
|
|
5544
|
-
"system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:",
|
|
5545
|
-
"roles": [
|
|
5546
|
-
"User",
|
|
5547
|
-
"Assistant"
|
|
5548
|
-
],
|
|
5549
|
-
"intra_message_sep": "",
|
|
5550
|
-
"inter_message_sep": ""
|
|
5551
|
-
}
|
|
5552
|
-
},
|
|
5553
|
-
{
|
|
5554
|
-
"version": 1,
|
|
5555
|
-
"context_length": 16384,
|
|
5556
|
-
"model_name": "glaive-coder",
|
|
5557
|
-
"model_description": "A code model trained on a dataset of ~140k programming related problems and solutions generated from Glaive’s synthetic data generation platform.",
|
|
5558
|
-
"model_lang": [
|
|
5559
|
-
"en"
|
|
5560
|
-
],
|
|
5561
|
-
"model_ability": [
|
|
5562
|
-
"chat"
|
|
5563
|
-
],
|
|
5564
|
-
"model_specs": [
|
|
5565
|
-
{
|
|
5566
|
-
"model_format": "pytorch",
|
|
5567
|
-
"model_size_in_billions": 7,
|
|
4578
|
+
"model_format": "ggufv2",
|
|
4579
|
+
"model_size_in_billions": 34,
|
|
5568
4580
|
"quantizations": [
|
|
5569
|
-
"
|
|
5570
|
-
"
|
|
5571
|
-
"
|
|
4581
|
+
"Q2_K",
|
|
4582
|
+
"Q3_K_L",
|
|
4583
|
+
"Q3_K_M",
|
|
4584
|
+
"Q3_K_S",
|
|
4585
|
+
"Q4_K_M",
|
|
4586
|
+
"Q4_K_S",
|
|
4587
|
+
"Q5_K_M",
|
|
4588
|
+
"Q5_K_S",
|
|
4589
|
+
"Q6_K",
|
|
4590
|
+
"Q8_0"
|
|
5572
4591
|
],
|
|
5573
|
-
"model_id": "
|
|
5574
|
-
"
|
|
4592
|
+
"model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
|
|
4593
|
+
"model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
|
|
5575
4594
|
}
|
|
5576
4595
|
],
|
|
5577
4596
|
"prompt_style": {
|
|
5578
|
-
"style_name": "
|
|
5579
|
-
"system_prompt": "
|
|
4597
|
+
"style_name": "CHATML",
|
|
4598
|
+
"system_prompt": "",
|
|
5580
4599
|
"roles": [
|
|
5581
|
-
"
|
|
5582
|
-
"
|
|
4600
|
+
"<|im_start|>user",
|
|
4601
|
+
"<|im_start|>assistant"
|
|
5583
4602
|
],
|
|
5584
|
-
"intra_message_sep": "
|
|
5585
|
-
"inter_message_sep": "
|
|
4603
|
+
"intra_message_sep": "<|im_end|>",
|
|
4604
|
+
"inter_message_sep": "",
|
|
5586
4605
|
"stop_token_ids": [
|
|
5587
|
-
2
|
|
4606
|
+
2,
|
|
4607
|
+
6,
|
|
4608
|
+
7,
|
|
4609
|
+
8
|
|
5588
4610
|
],
|
|
5589
4611
|
"stop": [
|
|
5590
|
-
"
|
|
4612
|
+
"<|endoftext|>",
|
|
4613
|
+
"<|im_start|>",
|
|
4614
|
+
"<|im_end|>",
|
|
4615
|
+
"<|im_sep|>"
|
|
5591
4616
|
]
|
|
5592
4617
|
}
|
|
5593
4618
|
},
|
|
@@ -6624,6 +5649,15 @@
|
|
|
6624
5649
|
],
|
|
6625
5650
|
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
6626
5651
|
"model_specs": [
|
|
5652
|
+
{
|
|
5653
|
+
"model_format": "pytorch",
|
|
5654
|
+
"model_size_in_billions": "1_8",
|
|
5655
|
+
"quantizations": [
|
|
5656
|
+
"none"
|
|
5657
|
+
],
|
|
5658
|
+
"model_id": "internlm/internlm2_5-1_8b-chat",
|
|
5659
|
+
"model_revision": "4426f00b854561fa60d555d2b628064b56bcb758"
|
|
5660
|
+
},
|
|
6627
5661
|
{
|
|
6628
5662
|
"model_format": "pytorch",
|
|
6629
5663
|
"model_size_in_billions": 7,
|
|
@@ -6633,6 +5667,15 @@
|
|
|
6633
5667
|
"model_id": "internlm/internlm2_5-7b-chat",
|
|
6634
5668
|
"model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
|
|
6635
5669
|
},
|
|
5670
|
+
{
|
|
5671
|
+
"model_format": "pytorch",
|
|
5672
|
+
"model_size_in_billions": 20,
|
|
5673
|
+
"quantizations": [
|
|
5674
|
+
"none"
|
|
5675
|
+
],
|
|
5676
|
+
"model_id": "internlm/internlm2_5-20b-chat",
|
|
5677
|
+
"model_revision": "ef17bde929761255fee76d95e2c25969ccd93b0d"
|
|
5678
|
+
},
|
|
6636
5679
|
{
|
|
6637
5680
|
"model_format": "gptq",
|
|
6638
5681
|
"model_size_in_billions": 7,
|
|
@@ -6642,6 +5685,23 @@
|
|
|
6642
5685
|
"model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
|
|
6643
5686
|
"model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
|
|
6644
5687
|
},
|
|
5688
|
+
{
|
|
5689
|
+
"model_format": "ggufv2",
|
|
5690
|
+
"model_size_in_billions": "1_8",
|
|
5691
|
+
"quantizations": [
|
|
5692
|
+
"q2_k",
|
|
5693
|
+
"q3_k_m",
|
|
5694
|
+
"q4_0",
|
|
5695
|
+
"q4_k_m",
|
|
5696
|
+
"q5_0",
|
|
5697
|
+
"q5_k_m",
|
|
5698
|
+
"q6_k",
|
|
5699
|
+
"q8_0",
|
|
5700
|
+
"fp16"
|
|
5701
|
+
],
|
|
5702
|
+
"model_id": "internlm/internlm2_5-1_8b-chat-gguf",
|
|
5703
|
+
"model_file_name_template": "internlm2_5-1_8b-chat-{quantization}.gguf"
|
|
5704
|
+
},
|
|
6645
5705
|
{
|
|
6646
5706
|
"model_format": "ggufv2",
|
|
6647
5707
|
"model_size_in_billions": 7,
|
|
@@ -6659,6 +5719,23 @@
|
|
|
6659
5719
|
"model_id": "internlm/internlm2_5-7b-chat-gguf",
|
|
6660
5720
|
"model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
|
|
6661
5721
|
},
|
|
5722
|
+
{
|
|
5723
|
+
"model_format": "ggufv2",
|
|
5724
|
+
"model_size_in_billions": 20,
|
|
5725
|
+
"quantizations": [
|
|
5726
|
+
"q2_k",
|
|
5727
|
+
"q3_k_m",
|
|
5728
|
+
"q4_0",
|
|
5729
|
+
"q4_k_m",
|
|
5730
|
+
"q5_0",
|
|
5731
|
+
"q5_k_m",
|
|
5732
|
+
"q6_k",
|
|
5733
|
+
"q8_0",
|
|
5734
|
+
"fp16"
|
|
5735
|
+
],
|
|
5736
|
+
"model_id": "internlm/internlm2_5-20b-chat-gguf",
|
|
5737
|
+
"model_file_name_template": "internlm2_5-20b-chat-{quantization}.gguf"
|
|
5738
|
+
},
|
|
6662
5739
|
{
|
|
6663
5740
|
"model_format": "mlx",
|
|
6664
5741
|
"model_size_in_billions": 7,
|
|
@@ -7142,6 +6219,16 @@
|
|
|
7142
6219
|
],
|
|
7143
6220
|
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
7144
6221
|
"model_specs": [
|
|
6222
|
+
{
|
|
6223
|
+
"model_format": "pytorch",
|
|
6224
|
+
"model_size_in_billions": 2,
|
|
6225
|
+
"quantizations": [
|
|
6226
|
+
"none",
|
|
6227
|
+
"4-bit",
|
|
6228
|
+
"8-bit"
|
|
6229
|
+
],
|
|
6230
|
+
"model_id": "google/gemma-2-2b-it"
|
|
6231
|
+
},
|
|
7145
6232
|
{
|
|
7146
6233
|
"model_format": "pytorch",
|
|
7147
6234
|
"model_size_in_billions": 9,
|
|
@@ -7162,6 +6249,23 @@
|
|
|
7162
6249
|
],
|
|
7163
6250
|
"model_id": "google/gemma-2-27b-it"
|
|
7164
6251
|
},
|
|
6252
|
+
{
|
|
6253
|
+
"model_format": "ggufv2",
|
|
6254
|
+
"model_size_in_billions": 2,
|
|
6255
|
+
"quantizations": [
|
|
6256
|
+
"Q3_K_L",
|
|
6257
|
+
"Q4_K_M",
|
|
6258
|
+
"Q4_K_S",
|
|
6259
|
+
"Q5_K_M",
|
|
6260
|
+
"Q5_K_S",
|
|
6261
|
+
"Q6_K",
|
|
6262
|
+
"Q6_K_L",
|
|
6263
|
+
"Q8_0",
|
|
6264
|
+
"f32"
|
|
6265
|
+
],
|
|
6266
|
+
"model_id": "bartowski/gemma-2-2b-it-GGUF",
|
|
6267
|
+
"model_file_name_template": "gemma-2-2b-it-{quantization}.gguf"
|
|
6268
|
+
},
|
|
7165
6269
|
{
|
|
7166
6270
|
"model_format": "ggufv2",
|
|
7167
6271
|
"model_size_in_billions": 9,
|
|
@@ -7208,6 +6312,30 @@
|
|
|
7208
6312
|
"model_id": "bartowski/gemma-2-27b-it-GGUF",
|
|
7209
6313
|
"model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
|
|
7210
6314
|
},
|
|
6315
|
+
{
|
|
6316
|
+
"model_format": "mlx",
|
|
6317
|
+
"model_size_in_billions": 2,
|
|
6318
|
+
"quantizations": [
|
|
6319
|
+
"4-bit"
|
|
6320
|
+
],
|
|
6321
|
+
"model_id": "mlx-community/gemma-2-2b-it-4bit"
|
|
6322
|
+
},
|
|
6323
|
+
{
|
|
6324
|
+
"model_format": "mlx",
|
|
6325
|
+
"model_size_in_billions": 2,
|
|
6326
|
+
"quantizations": [
|
|
6327
|
+
"8-bit"
|
|
6328
|
+
],
|
|
6329
|
+
"model_id": "mlx-community/gemma-2-2b-it-8bit"
|
|
6330
|
+
},
|
|
6331
|
+
{
|
|
6332
|
+
"model_format": "mlx",
|
|
6333
|
+
"model_size_in_billions": 2,
|
|
6334
|
+
"quantizations": [
|
|
6335
|
+
"None"
|
|
6336
|
+
],
|
|
6337
|
+
"model_id": "mlx-community/gemma-2-2b-it"
|
|
6338
|
+
},
|
|
7211
6339
|
{
|
|
7212
6340
|
"model_format": "mlx",
|
|
7213
6341
|
"model_size_in_billions": 9,
|
|
@@ -7955,32 +7083,195 @@
|
|
|
7955
7083
|
"model_format": "pytorch",
|
|
7956
7084
|
"model_size_in_billions": 2,
|
|
7957
7085
|
"quantizations": [
|
|
7958
|
-
|
|
7086
|
+
"4-bit",
|
|
7087
|
+
"8-bit",
|
|
7088
|
+
"none"
|
|
7959
7089
|
],
|
|
7960
7090
|
"model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
|
|
7961
|
-
"model_revision": "
|
|
7091
|
+
"model_revision": "ecbbd21dcf38caa74d925967b997167b0c7b3f47"
|
|
7092
|
+
},
|
|
7093
|
+
{
|
|
7094
|
+
"model_format": "pytorch",
|
|
7095
|
+
"model_size_in_billions": 4,
|
|
7096
|
+
"quantizations": [
|
|
7097
|
+
"4-bit",
|
|
7098
|
+
"8-bit",
|
|
7099
|
+
"none"
|
|
7100
|
+
],
|
|
7101
|
+
"model_id": "OpenGVLab/Mini-InternVL-Chat-4B-V1-5",
|
|
7102
|
+
"model_revision": "ce1559ddf9d87f5130aa5233b0e93b95e4e4161a"
|
|
7962
7103
|
},
|
|
7963
7104
|
{
|
|
7964
7105
|
"model_format": "pytorch",
|
|
7965
7106
|
"model_size_in_billions": 26,
|
|
7966
7107
|
"quantizations": [
|
|
7967
|
-
|
|
7108
|
+
"4-bit",
|
|
7109
|
+
"8-bit",
|
|
7110
|
+
"none"
|
|
7968
7111
|
],
|
|
7969
7112
|
"model_id": "OpenGVLab/InternVL-Chat-V1-5",
|
|
7970
|
-
"model_revision": "
|
|
7113
|
+
"model_revision": "9db32d9127cac0c85961e169d75da57a18a847b1"
|
|
7114
|
+
}
|
|
7115
|
+
],
|
|
7116
|
+
"prompt_style": {
|
|
7117
|
+
"style_name": "INTERNVL",
|
|
7118
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
7119
|
+
"roles": [
|
|
7120
|
+
"<|im_start|>user",
|
|
7121
|
+
"<|im_start|>assistant"
|
|
7122
|
+
],
|
|
7123
|
+
"intra_message_sep": "<|im_end|>",
|
|
7124
|
+
"stop_token_ids": [
|
|
7125
|
+
2,
|
|
7126
|
+
92543,
|
|
7127
|
+
92542
|
|
7128
|
+
],
|
|
7129
|
+
"stop": [
|
|
7130
|
+
"</s>",
|
|
7131
|
+
"<|im_end|>",
|
|
7132
|
+
"<|im_start|>"
|
|
7133
|
+
]
|
|
7134
|
+
}
|
|
7135
|
+
},
|
|
7136
|
+
{
|
|
7137
|
+
"version": 1,
|
|
7138
|
+
"context_length": 32768,
|
|
7139
|
+
"model_name": "internvl2",
|
|
7140
|
+
"model_lang": [
|
|
7141
|
+
"en",
|
|
7142
|
+
"zh"
|
|
7143
|
+
],
|
|
7144
|
+
"model_ability": [
|
|
7145
|
+
"chat",
|
|
7146
|
+
"vision"
|
|
7147
|
+
],
|
|
7148
|
+
"model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
7149
|
+
"model_specs": [
|
|
7150
|
+
{
|
|
7151
|
+
"model_format": "pytorch",
|
|
7152
|
+
"model_size_in_billions": 1,
|
|
7153
|
+
"quantizations": [
|
|
7154
|
+
"4-bit",
|
|
7155
|
+
"8-bit",
|
|
7156
|
+
"none"
|
|
7157
|
+
],
|
|
7158
|
+
"model_id": "OpenGVLab/InternVL2-1B",
|
|
7159
|
+
"model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
|
|
7160
|
+
},
|
|
7161
|
+
{
|
|
7162
|
+
"model_format": "pytorch",
|
|
7163
|
+
"model_size_in_billions": 2,
|
|
7164
|
+
"quantizations": [
|
|
7165
|
+
"4-bit",
|
|
7166
|
+
"8-bit",
|
|
7167
|
+
"none"
|
|
7168
|
+
],
|
|
7169
|
+
"model_id": "OpenGVLab/InternVL2-2B",
|
|
7170
|
+
"model_revision": "422ad7c6335917bfb514958233955512338485a6"
|
|
7171
|
+
},
|
|
7172
|
+
{
|
|
7173
|
+
"model_format": "awq",
|
|
7174
|
+
"model_size_in_billions": 2,
|
|
7175
|
+
"quantizations": [
|
|
7176
|
+
"Int4"
|
|
7177
|
+
],
|
|
7178
|
+
"model_id": "OpenGVLab/InternVL2-2B-AWQ",
|
|
7179
|
+
"model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
|
|
7180
|
+
},
|
|
7181
|
+
{
|
|
7182
|
+
"model_format": "pytorch",
|
|
7183
|
+
"model_size_in_billions": 4,
|
|
7184
|
+
"quantizations": [
|
|
7185
|
+
"4-bit",
|
|
7186
|
+
"8-bit",
|
|
7187
|
+
"none"
|
|
7188
|
+
],
|
|
7189
|
+
"model_id": "OpenGVLab/InternVL2-4B",
|
|
7190
|
+
"model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
|
|
7191
|
+
},
|
|
7192
|
+
{
|
|
7193
|
+
"model_format": "pytorch",
|
|
7194
|
+
"model_size_in_billions": 8,
|
|
7195
|
+
"quantizations": [
|
|
7196
|
+
"4-bit",
|
|
7197
|
+
"8-bit",
|
|
7198
|
+
"none"
|
|
7199
|
+
],
|
|
7200
|
+
"model_id": "OpenGVLab/InternVL2-8B",
|
|
7201
|
+
"model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
|
|
7202
|
+
},
|
|
7203
|
+
{
|
|
7204
|
+
"model_format": "awq",
|
|
7205
|
+
"model_size_in_billions": 8,
|
|
7206
|
+
"quantizations": [
|
|
7207
|
+
"Int4"
|
|
7208
|
+
],
|
|
7209
|
+
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
|
|
7210
|
+
"model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
|
|
7971
7211
|
},
|
|
7972
7212
|
{
|
|
7973
7213
|
"model_format": "pytorch",
|
|
7974
7214
|
"model_size_in_billions": 26,
|
|
7975
7215
|
"quantizations": [
|
|
7976
|
-
|
|
7216
|
+
"4-bit",
|
|
7217
|
+
"8-bit",
|
|
7218
|
+
"none"
|
|
7977
7219
|
],
|
|
7978
|
-
"model_id": "OpenGVLab/
|
|
7979
|
-
"model_revision": "
|
|
7220
|
+
"model_id": "OpenGVLab/InternVL2-26B",
|
|
7221
|
+
"model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
|
|
7222
|
+
},
|
|
7223
|
+
{
|
|
7224
|
+
"model_format": "awq",
|
|
7225
|
+
"model_size_in_billions": 26,
|
|
7226
|
+
"quantizations": [
|
|
7227
|
+
"Int4"
|
|
7228
|
+
],
|
|
7229
|
+
"model_id": "OpenGVLab/InternVL2-26B-AWQ",
|
|
7230
|
+
"model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
|
|
7231
|
+
},
|
|
7232
|
+
{
|
|
7233
|
+
"model_format": "pytorch",
|
|
7234
|
+
"model_size_in_billions": 40,
|
|
7235
|
+
"quantizations": [
|
|
7236
|
+
"4-bit",
|
|
7237
|
+
"8-bit",
|
|
7238
|
+
"none"
|
|
7239
|
+
],
|
|
7240
|
+
"model_id": "OpenGVLab/InternVL2-40B",
|
|
7241
|
+
"model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
|
|
7242
|
+
},
|
|
7243
|
+
{
|
|
7244
|
+
"model_format": "awq",
|
|
7245
|
+
"model_size_in_billions": 40,
|
|
7246
|
+
"quantizations": [
|
|
7247
|
+
"Int4"
|
|
7248
|
+
],
|
|
7249
|
+
"model_id": "OpenGVLab/InternVL2-40B-AWQ",
|
|
7250
|
+
"model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
|
|
7251
|
+
},
|
|
7252
|
+
{
|
|
7253
|
+
"model_format": "pytorch",
|
|
7254
|
+
"model_size_in_billions": 76,
|
|
7255
|
+
"quantizations": [
|
|
7256
|
+
"4-bit",
|
|
7257
|
+
"8-bit",
|
|
7258
|
+
"none"
|
|
7259
|
+
],
|
|
7260
|
+
"model_id": "OpenGVLab/InternVL2-Llama3-76B",
|
|
7261
|
+
"model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
|
|
7262
|
+
},
|
|
7263
|
+
{
|
|
7264
|
+
"model_format": "awq",
|
|
7265
|
+
"model_size_in_billions": 76,
|
|
7266
|
+
"quantizations": [
|
|
7267
|
+
"Int4"
|
|
7268
|
+
],
|
|
7269
|
+
"model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
|
|
7270
|
+
"model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
|
|
7980
7271
|
}
|
|
7981
7272
|
],
|
|
7982
7273
|
"prompt_style": {
|
|
7983
|
-
"style_name": "
|
|
7274
|
+
"style_name": "INTERNVL",
|
|
7984
7275
|
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
7985
7276
|
"roles": [
|
|
7986
7277
|
"<|im_start|>user",
|
|
@@ -7988,10 +7279,14 @@
|
|
|
7988
7279
|
],
|
|
7989
7280
|
"intra_message_sep": "<|im_end|>",
|
|
7990
7281
|
"stop_token_ids": [
|
|
7282
|
+
2,
|
|
7283
|
+
92543,
|
|
7991
7284
|
92542
|
|
7992
7285
|
],
|
|
7993
7286
|
"stop": [
|
|
7994
|
-
"
|
|
7287
|
+
"</s>",
|
|
7288
|
+
"<|im_end|>",
|
|
7289
|
+
"<|im_start|>"
|
|
7995
7290
|
]
|
|
7996
7291
|
}
|
|
7997
7292
|
},
|
|
@@ -8047,6 +7342,51 @@
|
|
|
8047
7342
|
]
|
|
8048
7343
|
}
|
|
8049
7344
|
},
|
|
7345
|
+
{
|
|
7346
|
+
"version": 1,
|
|
7347
|
+
"context_length": 8192,
|
|
7348
|
+
"model_name": "cogvlm2-video-llama3-chat",
|
|
7349
|
+
"model_lang": [
|
|
7350
|
+
"en",
|
|
7351
|
+
"zh"
|
|
7352
|
+
],
|
|
7353
|
+
"model_ability": [
|
|
7354
|
+
"chat",
|
|
7355
|
+
"vision"
|
|
7356
|
+
],
|
|
7357
|
+
"model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
|
|
7358
|
+
"model_specs": [
|
|
7359
|
+
{
|
|
7360
|
+
"model_format": "pytorch",
|
|
7361
|
+
"model_size_in_billions": 12,
|
|
7362
|
+
"quantizations": [
|
|
7363
|
+
"4-bit",
|
|
7364
|
+
"8-bit",
|
|
7365
|
+
"none"
|
|
7366
|
+
],
|
|
7367
|
+
"model_id": "THUDM/cogvlm2-video-llama3-chat",
|
|
7368
|
+
"model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
|
|
7369
|
+
}
|
|
7370
|
+
],
|
|
7371
|
+
"prompt_style": {
|
|
7372
|
+
"style_name": "LLAMA3",
|
|
7373
|
+
"system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
|
|
7374
|
+
"roles": [
|
|
7375
|
+
"user",
|
|
7376
|
+
"assistant"
|
|
7377
|
+
],
|
|
7378
|
+
"intra_message_sep": "\n\n",
|
|
7379
|
+
"inter_message_sep": "<|eot_id|>",
|
|
7380
|
+
"stop_token_ids": [
|
|
7381
|
+
128001,
|
|
7382
|
+
128009
|
|
7383
|
+
],
|
|
7384
|
+
"stop": [
|
|
7385
|
+
"<|end_of_text|>",
|
|
7386
|
+
"<|eot_id|>"
|
|
7387
|
+
]
|
|
7388
|
+
}
|
|
7389
|
+
},
|
|
8050
7390
|
{
|
|
8051
7391
|
"version": 1,
|
|
8052
7392
|
"context_length": 8192,
|