xinference 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (210) hide show
  1. xinference/_compat.py +2 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +72 -66
  4. xinference/core/model.py +78 -25
  5. xinference/core/supervisor.py +81 -10
  6. xinference/core/utils.py +12 -8
  7. xinference/core/worker.py +32 -0
  8. xinference/model/audio/core.py +5 -0
  9. xinference/model/audio/cosyvoice.py +25 -3
  10. xinference/model/audio/f5tts.py +15 -10
  11. xinference/model/audio/f5tts_mlx.py +260 -0
  12. xinference/model/audio/fish_speech.py +35 -111
  13. xinference/model/audio/model_spec.json +19 -3
  14. xinference/model/audio/model_spec_modelscope.json +9 -0
  15. xinference/model/audio/utils.py +32 -0
  16. xinference/model/image/core.py +69 -1
  17. xinference/model/image/model_spec.json +145 -4
  18. xinference/model/image/model_spec_modelscope.json +150 -4
  19. xinference/model/image/stable_diffusion/core.py +45 -13
  20. xinference/model/llm/__init__.py +2 -0
  21. xinference/model/llm/llm_family.json +143 -0
  22. xinference/model/llm/llm_family.py +15 -36
  23. xinference/model/llm/llm_family_modelscope.json +148 -0
  24. xinference/model/llm/mlx/core.py +37 -32
  25. xinference/model/llm/transformers/cogagent.py +272 -0
  26. xinference/model/llm/transformers/core.py +2 -0
  27. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  28. xinference/model/llm/utils.py +28 -3
  29. xinference/model/llm/vllm/core.py +48 -9
  30. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  31. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  32. xinference/model/llm/vllm/xavier/block.py +112 -0
  33. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  34. xinference/model/llm/vllm/xavier/block_tracker.py +116 -0
  35. xinference/model/llm/vllm/xavier/engine.py +247 -0
  36. xinference/model/llm/vllm/xavier/executor.py +132 -0
  37. xinference/model/llm/vllm/xavier/scheduler.py +422 -0
  38. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  39. xinference/model/llm/vllm/xavier/test/test_xavier.py +122 -0
  40. xinference/model/llm/vllm/xavier/transfer.py +298 -0
  41. xinference/model/video/diffusers.py +14 -0
  42. xinference/model/video/model_spec.json +15 -0
  43. xinference/model/video/model_spec_modelscope.json +16 -0
  44. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  45. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  46. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  47. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  48. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  49. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  50. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  51. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  52. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  53. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  54. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  55. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  56. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  57. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  58. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  59. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  60. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  61. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  62. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  63. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  64. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  65. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  66. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  67. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  68. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  69. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  70. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  71. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  72. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  73. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  74. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  75. xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
  76. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
  77. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
  78. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  79. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  80. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  81. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  82. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
  83. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  84. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  85. xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
  86. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  87. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  88. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  89. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  90. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  91. xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
  92. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  93. xinference/thirdparty/fish_speech/tools/schema.py +11 -28
  94. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  95. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  96. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  97. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  98. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  99. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  100. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  101. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  102. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  103. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  104. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  105. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  106. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  107. xinference/thirdparty/matcha/utils/utils.py +2 -2
  108. xinference/types.py +13 -0
  109. xinference/web/ui/build/asset-manifest.json +6 -6
  110. xinference/web/ui/build/index.html +1 -1
  111. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  112. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  113. xinference/web/ui/build/static/js/main.1eb206d1.js +3 -0
  114. xinference/web/ui/build/static/js/main.1eb206d1.js.map +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  116. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  117. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  118. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  119. xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +1 -0
  120. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  121. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  122. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  123. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  124. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  125. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  126. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  127. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  129. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  130. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  131. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  132. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  133. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  134. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  135. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  136. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  137. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  138. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  139. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  140. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  141. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  142. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  143. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  146. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  147. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  148. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  149. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  150. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  151. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  153. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  154. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  155. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  156. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  157. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  158. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  159. xinference/web/ui/node_modules/.package-lock.json +67 -3
  160. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  161. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  162. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  163. xinference/web/ui/node_modules/i18next/package.json +129 -0
  164. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  165. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  166. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  167. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  168. xinference/web/ui/package-lock.json +69 -3
  169. xinference/web/ui/package.json +2 -0
  170. xinference/web/ui/src/locales/en.json +186 -0
  171. xinference/web/ui/src/locales/zh.json +186 -0
  172. {xinference-1.1.0.dist-info → xinference-1.2.0.dist-info}/METADATA +19 -11
  173. {xinference-1.1.0.dist-info → xinference-1.2.0.dist-info}/RECORD +178 -111
  174. xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
  175. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  176. xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
  177. xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
  178. xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
  179. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  180. xinference/thirdparty/fish_speech/tools/api.py +0 -943
  181. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
  182. xinference/thirdparty/fish_speech/tools/webui.py +0 -548
  183. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  184. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  185. xinference/web/ui/build/static/js/main.4eb4ee80.js +0 -3
  186. xinference/web/ui/build/static/js/main.4eb4ee80.js.map +0 -1
  187. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  188. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  189. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  190. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  191. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  192. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  193. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  194. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  195. xinference/web/ui/node_modules/.cache/babel-loader/8c5eeb02f772d02cbe8b89c05428d0dd41a97866f75f7dc1c2164a67f5a1cf98.json +0 -1
  196. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  197. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  198. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  199. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  200. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  201. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  202. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  203. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  204. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  205. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  206. /xinference/web/ui/build/static/js/{main.4eb4ee80.js.LICENSE.txt → main.1eb206d1.js.LICENSE.txt} +0 -0
  207. {xinference-1.1.0.dist-info → xinference-1.2.0.dist-info}/LICENSE +0 -0
  208. {xinference-1.1.0.dist-info → xinference-1.2.0.dist-info}/WHEEL +0 -0
  209. {xinference-1.1.0.dist-info → xinference-1.2.0.dist-info}/entry_points.txt +0 -0
  210. {xinference-1.1.0.dist-info → xinference-1.2.0.dist-info}/top_level.txt +0 -0
@@ -12,8 +12,24 @@
12
12
  ],
13
13
  "default_model_config": {
14
14
  "quantize": true,
15
- "quantize_text_encoder": "text_encoder_2"
16
- }
15
+ "quantize_text_encoder": "text_encoder_2",
16
+ "torch_dtype": "bfloat16"
17
+ },
18
+ "gguf_model_id": "Xorbits/FLUX.1-schnell-gguf",
19
+ "gguf_quantizations": [
20
+ "F16",
21
+ "Q2_K",
22
+ "Q3_K_S",
23
+ "Q4_0",
24
+ "Q4_1",
25
+ "Q4_K_S",
26
+ "Q5_0",
27
+ "Q5_1",
28
+ "Q5_K_S",
29
+ "Q6_K",
30
+ "Q8_0"
31
+ ],
32
+ "gguf_model_file_name_template": "flux1-schnell-{quantization}.gguf"
17
33
  },
18
34
  {
19
35
  "model_name": "FLUX.1-dev",
@@ -28,8 +44,24 @@
28
44
  ],
29
45
  "default_model_config": {
30
46
  "quantize": true,
31
- "quantize_text_encoder": "text_encoder_2"
32
- }
47
+ "quantize_text_encoder": "text_encoder_2",
48
+ "torch_dtype": "bfloat16"
49
+ },
50
+ "gguf_model_id": "AI-ModelScope/FLUX.1-dev-gguf",
51
+ "gguf_quantizations": [
52
+ "F16",
53
+ "Q2_K",
54
+ "Q3_K_S",
55
+ "Q4_0",
56
+ "Q4_1",
57
+ "Q4_K_S",
58
+ "Q5_0",
59
+ "Q5_1",
60
+ "Q5_K_S",
61
+ "Q6_K",
62
+ "Q8_0"
63
+ ],
64
+ "gguf_model_file_name_template": "flux1-dev-{quantization}.gguf"
33
65
  },
34
66
  {
35
67
  "model_name": "sd3-medium",
@@ -47,6 +79,120 @@
47
79
  "quantize_text_encoder": "text_encoder_3"
48
80
  }
49
81
  },
82
+ {
83
+ "model_name": "sd3.5-medium",
84
+ "model_family": "stable_diffusion",
85
+ "model_hub": "modelscope",
86
+ "model_id": "AI-ModelScope/stable-diffusion-3.5-medium",
87
+ "model_revision": "master",
88
+ "model_ability": [
89
+ "text2image",
90
+ "image2image",
91
+ "inpainting"
92
+ ],
93
+ "default_model_config": {
94
+ "quantize": true,
95
+ "quantize_text_encoder": "text_encoder_3",
96
+ "torch_dtype": "bfloat16"
97
+ },
98
+ "gguf_model_id": "Xorbits/stable-diffusion-3.5-medium-gguf",
99
+ "gguf_quantizations": [
100
+ "F16",
101
+ "Q3_K_M",
102
+ "Q3_K_S",
103
+ "Q4_0",
104
+ "Q4_1",
105
+ "Q4_K_M",
106
+ "Q4_K_S",
107
+ "Q5_0",
108
+ "Q5_1",
109
+ "Q5_K_M",
110
+ "Q5_K_S",
111
+ "Q6_K",
112
+ "Q8_0"
113
+ ],
114
+ "gguf_model_file_name_template": "sd3.5_medium-{quantization}.gguf"
115
+ },
116
+ {
117
+ "model_name": "sd3.5-large",
118
+ "model_family": "stable_diffusion",
119
+ "model_hub": "modelscope",
120
+ "model_id": "AI-ModelScope/stable-diffusion-3.5-large",
121
+ "model_revision": "master",
122
+ "model_ability": [
123
+ "text2image",
124
+ "image2image",
125
+ "inpainting"
126
+ ],
127
+ "default_model_config": {
128
+ "quantize": true,
129
+ "quantize_text_encoder": "text_encoder_3",
130
+ "torch_dtype": "bfloat16",
131
+ "transformer_nf4": true
132
+ },
133
+ "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-gguf",
134
+ "gguf_quantizations": [
135
+ "F16",
136
+ "Q4_0",
137
+ "Q4_1",
138
+ "Q5_0",
139
+ "Q5_1",
140
+ "Q8_0"
141
+ ],
142
+ "gguf_model_file_name_template": "sd3.5_large-{quantization}.gguf"
143
+ },
144
+ {
145
+ "model_name": "sd3.5-large-turbo",
146
+ "model_family": "stable_diffusion",
147
+ "model_hub": "modelscope",
148
+ "model_id": "AI-ModelScope/stable-diffusion-3.5-large-turbo",
149
+ "model_revision": "master",
150
+ "model_ability": [
151
+ "text2image",
152
+ "image2image",
153
+ "inpainting"
154
+ ],
155
+ "default_model_config": {
156
+ "quantize": true,
157
+ "quantize_text_encoder": "text_encoder_3",
158
+ "torch_dtype": "bfloat16",
159
+ "transformer_nf4": true
160
+ },
161
+ "default_generate_config": {
162
+ "guidance_scale": 1.0,
163
+ "num_inference_steps": 4
164
+ },
165
+ "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-turbo-gguf",
166
+ "gguf_quantizations": [
167
+ "F16",
168
+ "Q4_0",
169
+ "Q4_1",
170
+ "Q5_0",
171
+ "Q5_1",
172
+ "Q8_0"
173
+ ],
174
+ "gguf_model_file_name_template": "sd3.5_large_turbo-{quantization}.gguf"
175
+ },
176
+ {
177
+ "model_name": "HunyuanDiT-v1.2",
178
+ "model_family": "stable_diffusion",
179
+ "model_hub": "modelscope",
180
+ "model_id": "Xorbits/HunyuanDiT-v1.2-Diffusers",
181
+ "model_revision": "master",
182
+ "model_ability": [
183
+ "text2image"
184
+ ]
185
+ },
186
+ {
187
+ "model_name": "HunyuanDiT-v1.2-Distilled",
188
+ "model_family": "stable_diffusion",
189
+ "model_hub": "modelscope",
190
+ "model_id": "Xorbits/HunyuanDiT-v1.2-Diffusers-Distilled",
191
+ "model_revision": "master",
192
+ "model_ability": [
193
+ "text2image"
194
+ ]
195
+ },
50
196
  {
51
197
  "model_name": "sd-turbo",
52
198
  "model_family": "stable_diffusion",
@@ -14,8 +14,10 @@
14
14
 
15
15
  import contextlib
16
16
  import gc
17
+ import importlib
17
18
  import inspect
18
19
  import itertools
20
+ import json
19
21
  import logging
20
22
  import os
21
23
  import re
@@ -86,6 +88,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
86
88
  lora_load_kwargs: Optional[Dict] = None,
87
89
  lora_fuse_kwargs: Optional[Dict] = None,
88
90
  model_spec: Optional["ImageModelFamilyV1"] = None,
91
+ gguf_model_path: Optional[str] = None,
89
92
  **kwargs,
90
93
  ):
91
94
  self._model_uid = model_uid
@@ -109,6 +112,8 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
109
112
  self._model_spec = model_spec
110
113
  self._abilities = model_spec.model_ability or [] # type: ignore
111
114
  self._kwargs = kwargs
115
+ # gguf
116
+ self._gguf_model_path = gguf_model_path
112
117
 
113
118
  @property
114
119
  def model_ability(self):
@@ -184,7 +189,17 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
184
189
  self._model.fuse_lora(**self._lora_fuse_kwargs)
185
190
  logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
186
191
 
192
+ def _get_layer_cls(self, layer: str):
193
+ with open(os.path.join(self._model_path, "model_index.json")) as f: # type: ignore
194
+ model_index = json.load(f)
195
+ layer_info = model_index[layer]
196
+ module_name, class_name = layer_info
197
+ module = importlib.import_module(module_name)
198
+ return getattr(module, class_name)
199
+
187
200
  def load(self):
201
+ from transformers import BitsAndBytesConfig, T5EncoderModel
202
+
188
203
  if "text2image" in self._abilities or "image2image" in self._abilities:
189
204
  from diffusers import AutoPipelineForText2Image as AutoPipelineModel
190
205
  elif "inpainting" in self._abilities:
@@ -200,7 +215,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
200
215
  glob(os.path.join(self._model_path, "*/*.safetensors"))
201
216
  )
202
217
  if isinstance(torch_dtype, str):
203
- self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
218
+ self._torch_dtype = torch_dtype = self._kwargs["torch_dtype"] = getattr(
219
+ torch, torch_dtype
220
+ )
204
221
 
205
222
  controlnet = self._kwargs.get("controlnet")
206
223
  if controlnet is not None:
@@ -212,18 +229,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
212
229
  ]
213
230
 
214
231
  quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
215
- if quantize_text_encoder:
216
- try:
217
- from transformers import BitsAndBytesConfig, T5EncoderModel
218
- except ImportError:
219
- error_message = "Failed to import module 'transformers'"
220
- installation_guide = [
221
- "Please make sure 'transformers' is installed. ",
222
- "You can install it by `pip install transformers`\n",
223
- ]
224
-
225
- raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
226
-
232
+ if quantize_text_encoder and not self._gguf_model_path:
227
233
  try:
228
234
  import bitsandbytes # noqa: F401
229
235
  except ImportError:
@@ -249,6 +255,32 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
249
255
  self._kwargs[text_encoder_name] = text_encoder
250
256
  self._kwargs["device_map"] = "balanced"
251
257
 
258
+ if self._gguf_model_path:
259
+ from diffusers import GGUFQuantizationConfig
260
+
261
+ # GGUF transformer
262
+ self._kwargs["transformer"] = self._get_layer_cls(
263
+ "transformer"
264
+ ).from_single_file(
265
+ self._gguf_model_path,
266
+ quantization_config=GGUFQuantizationConfig(compute_dtype=torch_dtype),
267
+ torch_dtype=torch_dtype,
268
+ config=os.path.join(self._model_path, "transformer"),
269
+ )
270
+ elif self._kwargs.get("transformer_nf4"):
271
+ nf4_config = BitsAndBytesConfig(
272
+ load_in_4bit=True,
273
+ bnb_4bit_quant_type="nf4",
274
+ bnb_4bit_compute_dtype=torch_dtype,
275
+ )
276
+ model_nf4 = self._get_layer_cls("transformer").from_pretrained(
277
+ self._model_path,
278
+ subfolder="transformer",
279
+ quantization_config=nf4_config,
280
+ torch_dtype=torch_dtype,
281
+ )
282
+ self._kwargs["transformer"] = model_nf4
283
+
252
284
  logger.debug(
253
285
  "Loading model from %s, kwargs: %s", self._model_path, self._kwargs
254
286
  )
@@ -134,6 +134,7 @@ def _install():
134
134
  from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
135
135
  from .sglang.core import SGLANGChatModel, SGLANGModel
136
136
  from .transformers.chatglm import ChatglmPytorchChatModel
137
+ from .transformers.cogagent import CogAgentChatModel
137
138
  from .transformers.cogvlm2 import CogVLM2Model
138
139
  from .transformers.cogvlm2_video import CogVLM2VideoModel
139
140
  from .transformers.core import PytorchChatModel, PytorchModel
@@ -195,6 +196,7 @@ def _install():
195
196
  DeepSeekV2PytorchChatModel,
196
197
  OptPytorchModel,
197
198
  GlmEdgeVModel,
199
+ CogAgentChatModel,
198
200
  ]
199
201
  )
200
202
  if OmniLMMModel: # type: ignore
@@ -8942,5 +8942,148 @@
8942
8942
  "<|user|>",
8943
8943
  "<|observation|>"
8944
8944
  ]
8945
+ },
8946
+ {
8947
+ "version": 1,
8948
+ "context_length": 32768,
8949
+ "model_name": "QvQ-72B-Preview",
8950
+ "model_lang": [
8951
+ "en",
8952
+ "zh"
8953
+ ],
8954
+ "model_ability": [
8955
+ "chat",
8956
+ "vision"
8957
+ ],
8958
+ "model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
8959
+ "model_specs": [
8960
+ {
8961
+ "model_format": "pytorch",
8962
+ "model_size_in_billions": 72,
8963
+ "quantizations": [
8964
+ "4-bit",
8965
+ "8-bit",
8966
+ "none"
8967
+ ],
8968
+ "model_id": "Qwen/QVQ-72B-Preview"
8969
+ },
8970
+ {
8971
+ "model_format": "mlx",
8972
+ "model_size_in_billions": 72,
8973
+ "quantizations": [
8974
+ "3bit",
8975
+ "4bit",
8976
+ "6bit",
8977
+ "8bit",
8978
+ "bf16"
8979
+ ],
8980
+ "model_id": "mlx-community/QVQ-72B-Preview-{quantization}"
8981
+ }
8982
+ ],
8983
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
8984
+ "stop_token_ids": [
8985
+ 151645,
8986
+ 151643
8987
+ ],
8988
+ "stop": [
8989
+ "<|im_end|>",
8990
+ "<|endoftext|>"
8991
+ ]
8992
+ },
8993
+ {
8994
+ "version": 1,
8995
+ "context_length": 32768,
8996
+ "model_name": "marco-o1",
8997
+ "model_lang": [
8998
+ "en",
8999
+ "zh"
9000
+ ],
9001
+ "model_ability": [
9002
+ "chat",
9003
+ "tools"
9004
+ ],
9005
+ "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
9006
+ "model_specs": [
9007
+ {
9008
+ "model_format": "pytorch",
9009
+ "model_size_in_billions": 7,
9010
+ "quantizations": [
9011
+ "4-bit",
9012
+ "8-bit",
9013
+ "none"
9014
+ ],
9015
+ "model_id": "AIDC-AI/Marco-o1"
9016
+ },
9017
+ {
9018
+ "model_format": "ggufv2",
9019
+ "model_size_in_billions": 7,
9020
+ "quantizations": [
9021
+ "Q2_K",
9022
+ "Q3_K_L",
9023
+ "Q3_K_M",
9024
+ "Q3_K_S",
9025
+ "Q4_0",
9026
+ "Q4_1",
9027
+ "Q4_K_M",
9028
+ "Q4_K_S",
9029
+ "Q5_0",
9030
+ "Q5_1",
9031
+ "Q5_K_M",
9032
+ "Q5_K_S",
9033
+ "Q6_K",
9034
+ "Q8_0"
9035
+ ],
9036
+ "model_id": "QuantFactory/Marco-o1-GGUF",
9037
+ "model_file_name_template": "Marco-o1.{quantization}.gguf"
9038
+ }
9039
+ ],
9040
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在<Thought>内完成,<Output>内输出你的结果。\n<Thought>应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,<Output>内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
9041
+ "stop_token_ids": [
9042
+ 151643,
9043
+ 151644,
9044
+ 151645
9045
+ ],
9046
+ "stop": [
9047
+ "<|endoftext|>",
9048
+ "<|im_start|>",
9049
+ "<|im_end|>"
9050
+ ]
9051
+ },
9052
+ {
9053
+ "version": 1,
9054
+ "context_length": 4096,
9055
+ "model_name": "cogagent",
9056
+ "model_lang": [
9057
+ "en",
9058
+ "zh"
9059
+ ],
9060
+ "model_ability": [
9061
+ "chat",
9062
+ "vision"
9063
+ ],
9064
+ "model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
9065
+ "model_specs": [
9066
+ {
9067
+ "model_format": "pytorch",
9068
+ "model_size_in_billions": "9",
9069
+ "quantizations": [
9070
+ "4-bit",
9071
+ "8-bit",
9072
+ "none"
9073
+ ],
9074
+ "model_id": "THUDM/cogagent-9b-20241220"
9075
+ }
9076
+ ],
9077
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
9078
+ "stop_token_ids": [
9079
+ 151329,
9080
+ 151336,
9081
+ 151338
9082
+ ],
9083
+ "stop": [
9084
+ "<|endoftext|>",
9085
+ "<|user|>",
9086
+ "<|observation|>"
9087
+ ]
8945
9088
  }
8946
9089
  ]
@@ -972,46 +972,25 @@ def match_llm(
972
972
  return spec
973
973
 
974
974
  # priority: download_hub > download_from_modelscope() and download_from_csghub()
975
- if download_hub == "modelscope":
976
- all_families = (
977
- BUILTIN_MODELSCOPE_LLM_FAMILIES
978
- + BUILTIN_LLM_FAMILIES
979
- + user_defined_llm_families
980
- )
981
- elif download_hub == "openmind_hub":
982
- all_families = (
983
- BUILTIN_OPENMIND_HUB_LLM_FAMILIES
984
- + BUILTIN_LLM_FAMILIES
985
- + user_defined_llm_families
986
- )
987
- elif download_hub == "csghub":
988
- all_families = (
989
- BUILTIN_CSGHUB_LLM_FAMILIES
990
- + BUILTIN_LLM_FAMILIES
991
- + user_defined_llm_families
992
- )
993
- elif download_hub == "huggingface":
994
- all_families = BUILTIN_LLM_FAMILIES + user_defined_llm_families
975
+ # set base model
976
+ base_families = BUILTIN_LLM_FAMILIES + user_defined_llm_families
977
+ hub_families_map = {
978
+ "modelscope": BUILTIN_MODELSCOPE_LLM_FAMILIES,
979
+ "openmind_hub": BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
980
+ "csghub": BUILTIN_CSGHUB_LLM_FAMILIES,
981
+ }
982
+ if download_hub == "huggingface":
983
+ all_families = base_families
984
+ elif download_hub in hub_families_map:
985
+ all_families = hub_families_map[download_hub] + base_families
995
986
  elif download_from_modelscope():
996
- all_families = (
997
- BUILTIN_MODELSCOPE_LLM_FAMILIES
998
- + BUILTIN_LLM_FAMILIES
999
- + user_defined_llm_families
1000
- )
987
+ all_families = BUILTIN_MODELSCOPE_LLM_FAMILIES + base_families
1001
988
  elif download_from_openmind_hub():
1002
- all_families = (
1003
- BUILTIN_OPENMIND_HUB_LLM_FAMILIES
1004
- + BUILTIN_LLM_FAMILIES
1005
- + user_defined_llm_families
1006
- )
989
+ all_families = BUILTIN_OPENMIND_HUB_LLM_FAMILIES + base_families
1007
990
  elif download_from_csghub():
1008
- all_families = (
1009
- BUILTIN_CSGHUB_LLM_FAMILIES
1010
- + BUILTIN_LLM_FAMILIES
1011
- + user_defined_llm_families
1012
- )
991
+ all_families = BUILTIN_CSGHUB_LLM_FAMILIES + base_families
1013
992
  else:
1014
- all_families = BUILTIN_LLM_FAMILIES + user_defined_llm_families
993
+ all_families = base_families
1015
994
 
1016
995
  for family in all_families:
1017
996
  if model_name != family.model_name:
@@ -6673,5 +6673,153 @@
6673
6673
  "<|user|>",
6674
6674
  "<|observation|>"
6675
6675
  ]
6676
+ },
6677
+ {
6678
+ "version": 1,
6679
+ "context_length": 32768,
6680
+ "model_name": "QvQ-72B-Preview",
6681
+ "model_lang": [
6682
+ "en",
6683
+ "zh"
6684
+ ],
6685
+ "model_ability": [
6686
+ "chat",
6687
+ "vision"
6688
+ ],
6689
+ "model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
6690
+ "model_specs": [
6691
+ {
6692
+ "model_format": "pytorch",
6693
+ "model_size_in_billions": 72,
6694
+ "quantizations": [
6695
+ "4-bit",
6696
+ "8-bit",
6697
+ "none"
6698
+ ],
6699
+ "model_id": "Qwen/QVQ-72B-Preview",
6700
+ "model_hub": "modelscope"
6701
+ },
6702
+ {
6703
+ "model_format": "mlx",
6704
+ "model_size_in_billions": 72,
6705
+ "quantizations": [
6706
+ "3bit",
6707
+ "4bit",
6708
+ "6bit",
6709
+ "8bit",
6710
+ "bf16"
6711
+ ],
6712
+ "model_id": "mlx-community/QVQ-72B-Preview-{quantization}",
6713
+ "model_hub": "modelscope"
6714
+ }
6715
+ ],
6716
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
6717
+ "stop_token_ids": [
6718
+ 151645,
6719
+ 151643
6720
+ ],
6721
+ "stop": [
6722
+ "<|im_end|>",
6723
+ "<|endoftext|>"
6724
+ ]
6725
+ },
6726
+ {
6727
+ "version": 1,
6728
+ "context_length": 32768,
6729
+ "model_name": "marco-o1",
6730
+ "model_lang": [
6731
+ "en",
6732
+ "zh"
6733
+ ],
6734
+ "model_ability": [
6735
+ "chat",
6736
+ "tools"
6737
+ ],
6738
+ "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
6739
+ "model_specs": [
6740
+ {
6741
+ "model_format": "pytorch",
6742
+ "model_size_in_billions": 7,
6743
+ "quantizations": [
6744
+ "4-bit",
6745
+ "8-bit",
6746
+ "none"
6747
+ ],
6748
+ "model_id": "AIDC-AI/Marco-o1",
6749
+ "model_hub": "modelscope"
6750
+ },
6751
+ {
6752
+ "model_format": "ggufv2",
6753
+ "model_size_in_billions": 7,
6754
+ "quantizations": [
6755
+ "Q2_K",
6756
+ "Q3_K_L",
6757
+ "Q3_K_M",
6758
+ "Q3_K_S",
6759
+ "Q4_0",
6760
+ "Q4_1",
6761
+ "Q4_K_M",
6762
+ "Q4_K_S",
6763
+ "Q5_0",
6764
+ "Q5_1",
6765
+ "Q5_K_M",
6766
+ "Q5_K_S",
6767
+ "Q6_K",
6768
+ "Q8_0"
6769
+ ],
6770
+ "model_file_name_template": "Marco-o1.{quantization}.gguf",
6771
+ "model_hub": "modelscope",
6772
+ "model_id": "QuantFactory/Marco-o1-GGUF"
6773
+ }
6774
+ ],
6775
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在<Thought>内完成,<Output>内输出你的结果。\n<Thought>应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,<Output>内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6776
+ "stop_token_ids": [
6777
+ 151643,
6778
+ 151644,
6779
+ 151645
6780
+ ],
6781
+ "stop": [
6782
+ "<|endoftext|>",
6783
+ "<|im_start|>",
6784
+ "<|im_end|>"
6785
+ ]
6786
+ },
6787
+ {
6788
+ "version": 1,
6789
+ "context_length": 4096,
6790
+ "model_name": "cogagent",
6791
+ "model_lang": [
6792
+ "en",
6793
+ "zh"
6794
+ ],
6795
+ "model_ability": [
6796
+ "chat",
6797
+ "vision"
6798
+ ],
6799
+ "model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
6800
+ "model_specs": [
6801
+ {
6802
+ "model_format": "pytorch",
6803
+ "model_size_in_billions": "9",
6804
+ "quantizations": [
6805
+ "4-bit",
6806
+ "8-bit",
6807
+ "none"
6808
+ ],
6809
+ "model_id": "ZhipuAI/cogagent-9b-20241220",
6810
+ "model_hub": "modelscope"
6811
+ }
6812
+ ],
6813
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6814
+ "stop_token_ids": [
6815
+ 151329,
6816
+ 151336,
6817
+ 151338
6818
+ ],
6819
+ "stop": [
6820
+ "<|endoftext|>",
6821
+ "<|user|>",
6822
+ "<|observation|>"
6823
+ ]
6676
6824
  }
6677
6825
  ]