xinference 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +15 -34
  3. xinference/client/restful/restful_client.py +2 -2
  4. xinference/core/chat_interface.py +45 -10
  5. xinference/core/image_interface.py +9 -0
  6. xinference/core/model.py +8 -5
  7. xinference/core/scheduler.py +1 -2
  8. xinference/core/worker.py +49 -42
  9. xinference/deploy/cmdline.py +2 -2
  10. xinference/deploy/test/test_cmdline.py +7 -7
  11. xinference/model/audio/chattts.py +24 -9
  12. xinference/model/audio/core.py +8 -2
  13. xinference/model/audio/fish_speech.py +228 -0
  14. xinference/model/audio/model_spec.json +8 -0
  15. xinference/model/embedding/core.py +23 -1
  16. xinference/model/image/model_spec.json +2 -1
  17. xinference/model/image/model_spec_modelscope.json +2 -1
  18. xinference/model/image/stable_diffusion/core.py +49 -1
  19. xinference/model/llm/__init__.py +26 -27
  20. xinference/model/llm/{ggml/llamacpp.py → llama_cpp/core.py} +2 -35
  21. xinference/model/llm/llm_family.json +606 -1266
  22. xinference/model/llm/llm_family.py +16 -139
  23. xinference/model/llm/llm_family_modelscope.json +276 -313
  24. xinference/model/llm/lmdeploy/__init__.py +0 -0
  25. xinference/model/llm/lmdeploy/core.py +557 -0
  26. xinference/model/llm/memory.py +9 -9
  27. xinference/model/llm/sglang/core.py +2 -2
  28. xinference/model/llm/{pytorch → transformers}/chatglm.py +6 -13
  29. xinference/model/llm/{pytorch → transformers}/cogvlm2.py +4 -45
  30. xinference/model/llm/transformers/cogvlm2_video.py +524 -0
  31. xinference/model/llm/{pytorch → transformers}/core.py +3 -10
  32. xinference/model/llm/{pytorch → transformers}/glm4v.py +2 -23
  33. xinference/model/llm/transformers/intern_vl.py +540 -0
  34. xinference/model/llm/{pytorch → transformers}/internlm2.py +4 -8
  35. xinference/model/llm/{pytorch → transformers}/minicpmv25.py +2 -23
  36. xinference/model/llm/{pytorch → transformers}/minicpmv26.py +66 -41
  37. xinference/model/llm/{pytorch → transformers}/utils.py +1 -2
  38. xinference/model/llm/{pytorch → transformers}/yi_vl.py +2 -24
  39. xinference/model/llm/utils.py +85 -70
  40. xinference/model/llm/vllm/core.py +110 -11
  41. xinference/model/utils.py +1 -95
  42. xinference/thirdparty/fish_speech/__init__.py +0 -0
  43. xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
  44. xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
  45. xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
  46. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  47. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  48. xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
  49. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  50. xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
  51. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  52. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
  53. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
  54. xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
  55. xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
  56. xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
  57. xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
  58. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  59. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
  60. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
  61. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
  62. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
  63. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
  64. xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
  65. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  66. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
  67. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
  68. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
  69. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
  70. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
  71. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
  72. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  73. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
  74. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
  75. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
  76. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
  77. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
  78. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
  79. xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
  80. xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
  81. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
  82. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
  83. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
  84. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
  85. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
  86. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
  87. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
  88. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
  89. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
  90. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
  91. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
  92. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
  93. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
  94. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
  95. xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
  96. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
  97. xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
  98. xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
  99. xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
  100. xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
  101. xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
  102. xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
  103. xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
  104. xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
  105. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
  106. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  107. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
  108. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
  109. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  110. xinference/thirdparty/fish_speech/tools/api.py +495 -0
  111. xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
  112. xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
  113. xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
  114. xinference/thirdparty/fish_speech/tools/file.py +108 -0
  115. xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
  116. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  117. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
  118. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
  119. xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
  120. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
  121. xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
  122. xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
  123. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
  124. xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
  125. xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
  126. xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
  127. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
  128. xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
  129. xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
  130. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  131. xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
  132. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
  133. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
  134. xinference/thirdparty/fish_speech/tools/webui.py +619 -0
  135. xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
  136. xinference/thirdparty/internvl/__init__.py +0 -0
  137. xinference/thirdparty/internvl/conversation.py +393 -0
  138. xinference/thirdparty/omnilmm/model/utils.py +16 -1
  139. xinference/web/ui/build/asset-manifest.json +3 -3
  140. xinference/web/ui/build/index.html +1 -1
  141. xinference/web/ui/build/static/js/main.661c7b0a.js +3 -0
  142. xinference/web/ui/build/static/js/{main.17ca0398.js.map → main.661c7b0a.js.map} +1 -1
  143. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +1 -0
  146. xinference/web/ui/node_modules/.cache/babel-loader/5391543180fead1eeef5364300301498d58a7d91d62de3841a32768b67f4552f.json +1 -0
  147. xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +1 -0
  148. xinference/web/ui/node_modules/.cache/babel-loader/714c37ce0ec5b5c591033f02be2f3f491fdd70da3ef568ee4a4f94689a3d5ca2.json +1 -0
  149. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +1 -0
  150. xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +1 -0
  151. xinference/web/ui/node_modules/.cache/babel-loader/a797831de0dc74897f4b50b3426555d748f328b4c2cc391de709eadaf6a5f3e3.json +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +1 -0
  153. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +1 -0
  154. xinference/web/ui/node_modules/.cache/babel-loader/e91938976f229ce986b2907e51e1f00540b584ced0a315d498c172d13220739d.json +1 -0
  155. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +1 -0
  156. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/METADATA +22 -13
  157. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/RECORD +170 -79
  158. xinference/locale/utils.py +0 -39
  159. xinference/locale/zh_CN.json +0 -26
  160. xinference/model/llm/ggml/tools/__init__.py +0 -15
  161. xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +0 -498
  162. xinference/model/llm/ggml/tools/gguf.py +0 -884
  163. xinference/model/llm/pytorch/__init__.py +0 -13
  164. xinference/model/llm/pytorch/baichuan.py +0 -81
  165. xinference/model/llm/pytorch/falcon.py +0 -138
  166. xinference/model/llm/pytorch/intern_vl.py +0 -352
  167. xinference/model/llm/pytorch/vicuna.py +0 -69
  168. xinference/web/ui/build/static/js/main.17ca0398.js +0 -3
  169. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +0 -1
  170. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
  171. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +0 -1
  172. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +0 -1
  173. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +0 -1
  174. xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +0 -1
  175. xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +0 -1
  176. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +0 -1
  177. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +0 -1
  178. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +0 -1
  179. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +0 -1
  180. xinference/web/ui/node_modules/.cache/babel-loader/f28b83886159d83b84f099b05d607a822dca4dd7f2d8aa6d56fe08bab0b5b086.json +0 -1
  181. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +0 -1
  182. /xinference/{locale → model/llm/llama_cpp}/__init__.py +0 -0
  183. /xinference/model/llm/{ggml → transformers}/__init__.py +0 -0
  184. /xinference/model/llm/{pytorch → transformers}/compression.py +0 -0
  185. /xinference/model/llm/{pytorch → transformers}/deepseek_vl.py +0 -0
  186. /xinference/model/llm/{pytorch → transformers}/llama_2.py +0 -0
  187. /xinference/model/llm/{pytorch → transformers}/omnilmm.py +0 -0
  188. /xinference/model/llm/{pytorch → transformers}/qwen_vl.py +0 -0
  189. /xinference/model/llm/{pytorch → transformers}/tensorizer_utils.py +0 -0
  190. /xinference/web/ui/build/static/js/{main.17ca0398.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
  191. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
  192. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
  193. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
  194. {xinference-0.14.1.post1.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
@@ -503,78 +503,6 @@
503
503
  }
504
504
  ]
505
505
  },
506
- {
507
- "version": 1,
508
- "context_length": 8192,
509
- "model_name": "chatglm2",
510
- "model_lang": [
511
- "en",
512
- "zh"
513
- ],
514
- "model_ability": [
515
- "chat"
516
- ],
517
- "model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
518
- "model_specs": [
519
- {
520
- "model_format": "pytorch",
521
- "model_size_in_billions": 6,
522
- "quantizations": [
523
- "4-bit",
524
- "8-bit",
525
- "none"
526
- ],
527
- "model_hub": "modelscope",
528
- "model_id": "ZhipuAI/chatglm2-6b",
529
- "model_revision": "v1.0.12"
530
- }
531
- ],
532
- "prompt_style": {
533
- "style_name": "CHATGLM",
534
- "system_prompt": "",
535
- "roles": [
536
- "问",
537
- "答"
538
- ],
539
- "intra_message_sep": "\n\n"
540
- }
541
- },
542
- {
543
- "version": 1,
544
- "context_length": 32768,
545
- "model_name": "chatglm2-32k",
546
- "model_lang": [
547
- "en",
548
- "zh"
549
- ],
550
- "model_ability": [
551
- "chat"
552
- ],
553
- "model_description": "ChatGLM2-32k is a special version of ChatGLM2, with a context window of 32k tokens instead of 8k.",
554
- "model_specs": [
555
- {
556
- "model_format": "pytorch",
557
- "model_size_in_billions": 6,
558
- "quantizations": [
559
- "4-bit",
560
- "8-bit",
561
- "none"
562
- ],
563
- "model_hub": "modelscope",
564
- "model_id": "ZhipuAI/chatglm2-6b-32k",
565
- "model_revision": "v1.0.2"
566
- }
567
- ],
568
- "prompt_style": {
569
- "style_name": "CHATGLM",
570
- "system_prompt": "",
571
- "roles": [
572
- "问",
573
- "答"
574
- ],
575
- "intra_message_sep": "\n\n"
576
- }
577
- },
578
506
  {
579
507
  "version": 1,
580
508
  "context_length": 8192,
@@ -1060,166 +988,60 @@
1060
988
  },
1061
989
  {
1062
990
  "version": 1,
1063
- "context_length": 8192,
1064
- "model_name": "internlm-7b",
991
+ "context_length": 32768,
992
+ "model_name": "internlm2.5-chat",
1065
993
  "model_lang": [
1066
994
  "en",
1067
995
  "zh"
1068
996
  ],
1069
997
  "model_ability": [
1070
- "generate"
998
+ "chat"
1071
999
  ],
1072
- "model_description": "InternLM is a Transformer-based LLM that is trained on both Chinese and English data, focusing on practical scenarios.",
1000
+ "model_description": "InternLM2.5 series of the InternLM model.",
1073
1001
  "model_specs": [
1074
1002
  {
1075
1003
  "model_format": "pytorch",
1076
- "model_size_in_billions": 7,
1004
+ "model_size_in_billions": "1_8",
1077
1005
  "quantizations": [
1078
- "4-bit",
1079
- "8-bit",
1080
1006
  "none"
1081
1007
  ],
1082
- "model_id": "Shanghai_AI_Laboratory/internlm-7b",
1083
- "model_hub": "modelscope",
1084
- "model_revision": "v1.0.1"
1085
- }
1086
- ]
1087
- },
1088
- {
1089
- "version": 1,
1090
- "context_length": 4096,
1091
- "model_name": "internlm-chat-7b",
1092
- "model_lang": [
1093
- "en",
1094
- "zh"
1095
- ],
1096
- "model_ability": [
1097
- "chat"
1098
- ],
1099
- "model_description": "Internlm-chat is a fine-tuned version of the Internlm LLM, specializing in chatting.",
1100
- "model_specs": [
1008
+ "model_id": "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
1009
+ "model_hub": "modelscope"
1010
+ },
1101
1011
  {
1102
1012
  "model_format": "pytorch",
1103
1013
  "model_size_in_billions": 7,
1104
1014
  "quantizations": [
1105
- "4-bit",
1106
- "8-bit",
1107
1015
  "none"
1108
1016
  ],
1109
- "model_id": "Shanghai_AI_Laboratory/internlm-chat-7b",
1110
- "model_hub": "modelscope",
1111
- "model_revision": "v1.0.1"
1112
- }
1113
- ],
1114
- "prompt_style": {
1115
- "style_name": "INTERNLM",
1116
- "system_prompt": "",
1117
- "roles": [
1118
- "<|User|>",
1119
- "<|Bot|>"
1120
- ],
1121
- "intra_message_sep": "<eoh>\n",
1122
- "inter_message_sep": "<eoa>\n",
1123
- "stop_token_ids": [
1124
- 1,
1125
- 103028
1126
- ],
1127
- "stop": [
1128
- "<eoa>"
1129
- ]
1130
- }
1131
- },
1132
- {
1133
- "version": 1,
1134
- "context_length": 16384,
1135
- "model_name": "internlm-20b",
1136
- "model_lang": [
1137
- "en",
1138
- "zh"
1139
- ],
1140
- "model_ability": [
1141
- "generate"
1142
- ],
1143
- "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
1144
- "model_specs": [
1017
+ "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
1018
+ "model_hub": "modelscope"
1019
+ },
1145
1020
  {
1146
- "model_format": "pytorch",
1147
- "model_size_in_billions": 20,
1021
+ "model_format": "ggufv2",
1022
+ "model_size_in_billions": 7,
1148
1023
  "quantizations": [
1149
- "4-bit",
1150
- "8-bit",
1151
- "none"
1024
+ "q2_k",
1025
+ "q3_k_m",
1026
+ "q4_0",
1027
+ "q4_k_m",
1028
+ "q5_0",
1029
+ "q5_k_m",
1030
+ "q6_k",
1031
+ "q8_0",
1032
+ "fp16"
1152
1033
  ],
1153
- "model_id": "Shanghai_AI_Laboratory/internlm-20b",
1154
- "model_hub": "modelscope",
1155
- "model_revision": "v1.0.1"
1156
- }
1157
- ]
1158
- },
1159
- {
1160
- "version": 1,
1161
- "context_length": 16384,
1162
- "model_name": "internlm-chat-20b",
1163
- "model_lang": [
1164
- "en",
1165
- "zh"
1166
- ],
1167
- "model_ability": [
1168
- "chat"
1169
- ],
1170
- "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data. The Chat version has undergone SFT and RLHF training.",
1171
- "model_specs": [
1034
+ "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-gguf",
1035
+ "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf",
1036
+ "model_hub": "modelscope"
1037
+ },
1172
1038
  {
1173
1039
  "model_format": "pytorch",
1174
1040
  "model_size_in_billions": 20,
1175
- "quantizations": [
1176
- "4-bit",
1177
- "8-bit",
1178
- "none"
1179
- ],
1180
- "model_id": "Shanghai_AI_Laboratory/internlm-chat-20b",
1181
- "model_hub": "modelscope",
1182
- "model_revision": "v1.0.1"
1183
- }
1184
- ],
1185
- "prompt_style": {
1186
- "style_name": "INTERNLM",
1187
- "system_prompt": "",
1188
- "roles": [
1189
- "<|User|>",
1190
- "<|Bot|>"
1191
- ],
1192
- "intra_message_sep": "<eoh>\n",
1193
- "inter_message_sep": "<eoa>\n",
1194
- "stop_token_ids": [
1195
- 1,
1196
- 103028
1197
- ],
1198
- "stop": [
1199
- "<eoa>"
1200
- ]
1201
- }
1202
- },
1203
- {
1204
- "version": 1,
1205
- "context_length": 32768,
1206
- "model_name": "internlm2.5-chat",
1207
- "model_lang": [
1208
- "en",
1209
- "zh"
1210
- ],
1211
- "model_ability": [
1212
- "chat"
1213
- ],
1214
- "model_description": "InternLM2.5 series of the InternLM model.",
1215
- "model_specs": [
1216
- {
1217
- "model_format": "pytorch",
1218
- "model_size_in_billions": 7,
1219
1041
  "quantizations": [
1220
1042
  "none"
1221
1043
  ],
1222
- "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
1044
+ "model_id": "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
1223
1045
  "model_hub": "modelscope"
1224
1046
  }
1225
1047
  ],
@@ -2403,59 +2225,6 @@
2403
2225
  ]
2404
2226
  }
2405
2227
  },
2406
- {
2407
- "version": 1,
2408
- "context_length": 2048,
2409
- "model_name": "falcon-instruct",
2410
- "model_lang": [
2411
- "en"
2412
- ],
2413
- "model_ability": [
2414
- "chat"
2415
- ],
2416
- "model_description": "Falcon-instruct is a fine-tuned version of the Falcon LLM, specializing in chatting.",
2417
- "model_specs": [
2418
- {
2419
- "model_format": "pytorch",
2420
- "model_size_in_billions": 7,
2421
- "quantizations": [
2422
- "4-bit",
2423
- "8-bit",
2424
- "none"
2425
- ],
2426
- "model_hub": "modelscope",
2427
- "model_id": "Xorbits/falcon-7b-instruct",
2428
- "model_revision": "v1.0.0"
2429
- }
2430
- ],
2431
- "prompt_style": {
2432
- "style_name": "FALCON",
2433
- "system_prompt": "",
2434
- "roles": [
2435
- "User",
2436
- "Assistant"
2437
- ],
2438
- "intra_message_sep": "\n",
2439
- "inter_message_sep": "<|endoftext|>",
2440
- "stop": [
2441
- "\nUser"
2442
- ],
2443
- "stop_token_ids": [
2444
- 0,
2445
- 1,
2446
- 2,
2447
- 3,
2448
- 4,
2449
- 5,
2450
- 6,
2451
- 7,
2452
- 8,
2453
- 9,
2454
- 10,
2455
- 11
2456
- ]
2457
- }
2458
- },
2459
2228
  {
2460
2229
  "version": 1,
2461
2230
  "context_length": 8192,
@@ -2540,53 +2309,6 @@
2540
2309
  ]
2541
2310
  }
2542
2311
  },
2543
- {
2544
- "version": 1,
2545
- "context_length": 2048,
2546
- "model_name": "OpenBuddy",
2547
- "model_lang": [
2548
- "en"
2549
- ],
2550
- "model_ability": [
2551
- "chat"
2552
- ],
2553
- "model_description": "OpenBuddy is a powerful open multilingual chatbot model aimed at global users.",
2554
- "model_specs": [
2555
- {
2556
- "model_format": "ggmlv3",
2557
- "model_size_in_billions": 13,
2558
- "quantizations": [
2559
- "Q2_K",
2560
- "Q3_K_S",
2561
- "Q3_K_M",
2562
- "Q3_K_L",
2563
- "Q4_0",
2564
- "Q4_1",
2565
- "Q4_K_S",
2566
- "Q4_K_M",
2567
- "Q5_0",
2568
- "Q5_1",
2569
- "Q5_K_S",
2570
- "Q5_K_M",
2571
- "Q6_K",
2572
- "Q8_0"
2573
- ],
2574
- "model_hub": "modelscope",
2575
- "model_id": "Xorbits/OpenBuddy-Llama2-13B-v11.1-GGML",
2576
- "model_file_name_template": "openbuddy-llama2-13b-v11.1.ggmlv3.{quantization}.bin"
2577
- }
2578
- ],
2579
- "prompt_style": {
2580
- "style_name": "INSTRUCTION",
2581
- "system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:",
2582
- "roles": [
2583
- "User",
2584
- "Assistant"
2585
- ],
2586
- "intra_message_sep": "",
2587
- "inter_message_sep": ""
2588
- }
2589
- },
2590
2312
  {
2591
2313
  "version": 1,
2592
2314
  "context_length": 32768,
@@ -3416,6 +3138,24 @@
3416
3138
  "model_id": "qwen/Qwen2-72B-Instruct-AWQ",
3417
3139
  "model_hub": "modelscope"
3418
3140
  },
3141
+ {
3142
+ "model_format": "fp8",
3143
+ "model_size_in_billions": 7,
3144
+ "quantizations": [
3145
+ "fp8"
3146
+ ],
3147
+ "model_id": "liuzhenghua/Qwen2-7B-FP8-Instruct",
3148
+ "model_hub": "modelscope"
3149
+ },
3150
+ {
3151
+ "model_format": "fp8",
3152
+ "model_size_in_billions": 72,
3153
+ "quantizations": [
3154
+ "fp8"
3155
+ ],
3156
+ "model_id": "liuzhenghua/Qwen2-72B-FP8-Instruct",
3157
+ "model_hub": "modelscope"
3158
+ },
3419
3159
  {
3420
3160
  "model_format": "mlx",
3421
3161
  "model_size_in_billions": "0_5",
@@ -4245,6 +3985,17 @@
4245
3985
  ],
4246
3986
  "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
4247
3987
  "model_specs": [
3988
+ {
3989
+ "model_format": "pytorch",
3990
+ "model_size_in_billions": 2,
3991
+ "quantizations": [
3992
+ "none",
3993
+ "4-bit",
3994
+ "8-bit"
3995
+ ],
3996
+ "model_id": "LLM-Research/gemma-2-2b-it",
3997
+ "model_hub": "modelscope"
3998
+ },
4248
3999
  {
4249
4000
  "model_format": "pytorch",
4250
4001
  "model_size_in_billions": 9,
@@ -4958,25 +4709,187 @@
4958
4709
  "model_format": "pytorch",
4959
4710
  "model_size_in_billions": 26,
4960
4711
  "quantizations": [
4961
- "none"
4712
+ "4-bit",
4713
+ "8-bit",
4714
+ "none"
4962
4715
  ],
4963
- "model_hub": "modelscope",
4964
- "model_id": "AI-ModelScope/InternVL-Chat-V1-5",
4716
+ "model_hub": "modelscope",
4717
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5",
4718
+ "model_revision": "master"
4719
+ }
4720
+ ],
4721
+ "prompt_style": {
4722
+ "style_name": "INTERNVL",
4723
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
4724
+ "roles": [
4725
+ "<|im_start|>user",
4726
+ "<|im_start|>assistant"
4727
+ ],
4728
+ "intra_message_sep": "<|im_end|>",
4729
+ "stop_token_ids": [
4730
+ 2,
4731
+ 92543,
4732
+ 92542
4733
+ ],
4734
+ "stop": [
4735
+ "</s>",
4736
+ "<|im_end|>",
4737
+ "<|im_start|>"
4738
+ ]
4739
+ }
4740
+ },
4741
+ {
4742
+ "version": 1,
4743
+ "context_length": 32768,
4744
+ "model_name": "internvl2",
4745
+ "model_lang": [
4746
+ "en",
4747
+ "zh"
4748
+ ],
4749
+ "model_ability": [
4750
+ "chat",
4751
+ "vision"
4752
+ ],
4753
+ "model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
4754
+ "model_specs": [
4755
+
4756
+ {
4757
+ "model_format": "pytorch",
4758
+ "model_size_in_billions": 1,
4759
+ "quantizations": [
4760
+ "4-bit",
4761
+ "8-bit",
4762
+ "none"
4763
+ ],
4764
+ "model_hub": "modelscope",
4765
+ "model_id": "OpenGVLab/InternVL2-1B",
4766
+ "model_revision": "master"
4767
+ },
4768
+ {
4769
+ "model_format": "pytorch",
4770
+ "model_size_in_billions": 2,
4771
+ "quantizations": [
4772
+ "4-bit",
4773
+ "8-bit",
4774
+ "none"
4775
+ ],
4776
+ "model_hub": "modelscope",
4777
+ "model_id": "OpenGVLab/InternVL2-2B",
4778
+ "model_revision": "master"
4779
+ },
4780
+ {
4781
+ "model_format": "awq",
4782
+ "model_size_in_billions": 2,
4783
+ "quantizations": [
4784
+ "Int4"
4785
+ ],
4786
+ "model_hub": "modelscope",
4787
+ "model_id": "OpenGVLab/InternVL2-2B-AWQ",
4788
+ "model_revision": "master"
4789
+ },
4790
+ {
4791
+ "model_format": "pytorch",
4792
+ "model_size_in_billions": 4,
4793
+ "quantizations": [
4794
+ "4-bit",
4795
+ "8-bit",
4796
+ "none"
4797
+ ],
4798
+ "model_hub": "modelscope",
4799
+ "model_id": "OpenGVLab/InternVL2-4B",
4800
+ "model_revision": "master"
4801
+ },
4802
+ {
4803
+ "model_format": "pytorch",
4804
+ "model_size_in_billions": 8,
4805
+ "quantizations": [
4806
+ "4-bit",
4807
+ "8-bit",
4808
+ "none"
4809
+ ],
4810
+ "model_hub": "modelscope",
4811
+ "model_id": "OpenGVLab/InternVL2-8B",
4812
+ "model_revision": "master"
4813
+ },
4814
+ {
4815
+ "model_format": "awq",
4816
+ "model_size_in_billions": 8,
4817
+ "quantizations": [
4818
+ "Int4"
4819
+ ],
4820
+ "model_hub": "modelscope",
4821
+ "model_id": "OpenGVLab/InternVL2-8B-AWQ",
4965
4822
  "model_revision": "master"
4966
4823
  },
4967
4824
  {
4968
4825
  "model_format": "pytorch",
4969
4826
  "model_size_in_billions": 26,
4970
4827
  "quantizations": [
4971
- "Int8"
4828
+ "4-bit",
4829
+ "8-bit",
4830
+ "none"
4831
+ ],
4832
+ "model_hub": "modelscope",
4833
+ "model_id": "OpenGVLab/InternVL2-26B",
4834
+ "model_revision": "master"
4835
+ },
4836
+ {
4837
+ "model_format": "awq",
4838
+ "model_size_in_billions": 26,
4839
+ "quantizations": [
4840
+ "Int4"
4841
+ ],
4842
+ "model_hub": "modelscope",
4843
+ "model_id": "OpenGVLab/InternVL2-26B-AWQ",
4844
+ "model_revision": "master"
4845
+ },
4846
+ {
4847
+ "model_format": "pytorch",
4848
+ "model_size_in_billions": 40,
4849
+ "quantizations": [
4850
+ "4-bit",
4851
+ "8-bit",
4852
+ "none"
4853
+ ],
4854
+ "model_hub": "modelscope",
4855
+ "model_id": "OpenGVLab/InternVL2-40B",
4856
+ "model_revision": "master"
4857
+ },
4858
+ {
4859
+ "model_format": "awq",
4860
+ "model_size_in_billions": 40,
4861
+ "quantizations": [
4862
+ "Int4"
4863
+ ],
4864
+ "model_hub": "modelscope",
4865
+ "model_id": "OpenGVLab/InternVL2-40B-AWQ",
4866
+ "model_revision": "master"
4867
+ },
4868
+ {
4869
+ "model_format": "pytorch",
4870
+ "model_size_in_billions": 76,
4871
+ "quantizations": [
4872
+ "4-bit",
4873
+ "8-bit",
4874
+ "none"
4972
4875
  ],
4973
- "model_hub": "modelscope",
4974
- "model_id": "AI-ModelScope/InternVL-Chat-V1-5-{quantization}",
4876
+ "model_hub": "modelscope",
4877
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B",
4878
+ "model_revision": "master"
4879
+ },
4880
+ {
4881
+ "model_format": "awq",
4882
+ "model_size_in_billions": 76,
4883
+ "quantizations": [
4884
+ "Int4"
4885
+ ],
4886
+ "model_hub": "modelscope",
4887
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
4975
4888
  "model_revision": "master"
4976
4889
  }
4977
4890
  ],
4978
4891
  "prompt_style": {
4979
- "style_name": "INTERNLM2",
4892
+ "style_name": "INTERNVL",
4980
4893
  "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
4981
4894
  "roles": [
4982
4895
  "<|im_start|>user",
@@ -4984,10 +4897,14 @@
4984
4897
  ],
4985
4898
  "intra_message_sep": "<|im_end|>",
4986
4899
  "stop_token_ids": [
4900
+ 2,
4901
+ 92543,
4987
4902
  92542
4988
4903
  ],
4989
4904
  "stop": [
4990
- "<|im_end|>"
4905
+ "</s>",
4906
+ "<|im_end|>",
4907
+ "<|im_start|>"
4991
4908
  ]
4992
4909
  }
4993
4910
  },
@@ -5045,6 +4962,52 @@
5045
4962
  ]
5046
4963
  }
5047
4964
  },
4965
+ {
4966
+ "version": 1,
4967
+ "context_length": 8192,
4968
+ "model_name": "cogvlm2-video-llama3-chat",
4969
+ "model_lang": [
4970
+ "en",
4971
+ "zh"
4972
+ ],
4973
+ "model_ability": [
4974
+ "chat",
4975
+ "vision"
4976
+ ],
4977
+ "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
4978
+ "model_specs": [
4979
+ {
4980
+ "model_format": "pytorch",
4981
+ "model_size_in_billions": 12,
4982
+ "quantizations": [
4983
+ "4-bit",
4984
+ "8-bit",
4985
+ "none"
4986
+ ],
4987
+ "model_hub": "modelscope",
4988
+ "model_id": "ZhipuAI/cogvlm2-video-llama3-chat",
4989
+ "model_revision": "master"
4990
+ }
4991
+ ],
4992
+ "prompt_style": {
4993
+ "style_name": "LLAMA3",
4994
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
4995
+ "roles": [
4996
+ "user",
4997
+ "assistant"
4998
+ ],
4999
+ "intra_message_sep": "\n\n",
5000
+ "inter_message_sep": "<|eot_id|>",
5001
+ "stop_token_ids": [
5002
+ 128001,
5003
+ 128009
5004
+ ],
5005
+ "stop": [
5006
+ "<|end_of_text|>",
5007
+ "<|eot_id|>"
5008
+ ]
5009
+ }
5010
+ },
5048
5011
  {
5049
5012
  "version": 1,
5050
5013
  "context_length": 8192,
File without changes