xinference 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (170) hide show
  1. xinference/_compat.py +2 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +28 -6
  4. xinference/core/utils.py +10 -6
  5. xinference/deploy/cmdline.py +3 -1
  6. xinference/deploy/test/test_cmdline.py +56 -0
  7. xinference/isolation.py +24 -0
  8. xinference/model/audio/core.py +10 -0
  9. xinference/model/audio/cosyvoice.py +25 -3
  10. xinference/model/audio/f5tts.py +200 -0
  11. xinference/model/audio/f5tts_mlx.py +260 -0
  12. xinference/model/audio/fish_speech.py +36 -111
  13. xinference/model/audio/model_spec.json +27 -3
  14. xinference/model/audio/model_spec_modelscope.json +18 -0
  15. xinference/model/audio/utils.py +32 -0
  16. xinference/model/embedding/core.py +203 -142
  17. xinference/model/embedding/model_spec.json +7 -0
  18. xinference/model/embedding/model_spec_modelscope.json +8 -0
  19. xinference/model/image/core.py +69 -1
  20. xinference/model/image/model_spec.json +127 -4
  21. xinference/model/image/model_spec_modelscope.json +130 -4
  22. xinference/model/image/stable_diffusion/core.py +45 -13
  23. xinference/model/llm/__init__.py +2 -2
  24. xinference/model/llm/llm_family.json +219 -53
  25. xinference/model/llm/llm_family.py +15 -36
  26. xinference/model/llm/llm_family_modelscope.json +167 -20
  27. xinference/model/llm/mlx/core.py +287 -51
  28. xinference/model/llm/sglang/core.py +1 -0
  29. xinference/model/llm/transformers/chatglm.py +9 -5
  30. xinference/model/llm/transformers/core.py +1 -0
  31. xinference/model/llm/transformers/qwen2_vl.py +2 -0
  32. xinference/model/llm/transformers/utils.py +16 -8
  33. xinference/model/llm/utils.py +5 -1
  34. xinference/model/llm/vllm/core.py +16 -2
  35. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  36. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  37. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  38. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  39. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  40. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  41. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  42. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  43. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  44. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  45. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  46. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  47. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  48. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  49. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  50. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  51. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  52. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  53. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  54. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  55. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  56. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  57. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  58. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  59. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  60. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  61. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  62. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  63. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  64. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  65. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  66. xinference/thirdparty/f5_tts/api.py +166 -0
  67. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  68. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  69. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  70. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  71. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  72. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  73. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  74. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  75. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  76. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  77. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  78. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  79. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  80. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  81. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  82. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  83. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  84. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  85. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  86. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  87. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  88. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  89. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  90. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  91. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  92. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  93. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  94. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  95. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  96. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  97. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  98. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  99. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  100. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  101. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  102. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  103. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  104. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  105. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  106. xinference/thirdparty/f5_tts/train/README.md +77 -0
  107. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  108. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  109. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  110. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  111. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  112. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  113. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  114. xinference/thirdparty/f5_tts/train/train.py +75 -0
  115. xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
  116. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
  117. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
  118. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  119. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  120. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  121. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  122. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
  123. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  124. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  125. xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
  126. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  127. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  128. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  129. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  130. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  131. xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
  132. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  133. xinference/thirdparty/fish_speech/tools/schema.py +11 -28
  134. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  135. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  136. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  137. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  138. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  139. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  140. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  141. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  142. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  143. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  144. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  145. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  146. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  147. xinference/thirdparty/matcha/utils/utils.py +2 -2
  148. xinference/web/ui/build/asset-manifest.json +3 -3
  149. xinference/web/ui/build/index.html +1 -1
  150. xinference/web/ui/build/static/js/{main.2f269bb3.js → main.4eb4ee80.js} +3 -3
  151. xinference/web/ui/build/static/js/main.4eb4ee80.js.map +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/8c5eeb02f772d02cbe8b89c05428d0dd41a97866f75f7dc1c2164a67f5a1cf98.json +1 -0
  153. {xinference-1.0.1.dist-info → xinference-1.1.1.dist-info}/METADATA +41 -17
  154. {xinference-1.0.1.dist-info → xinference-1.1.1.dist-info}/RECORD +160 -88
  155. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  156. xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
  157. xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
  158. xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
  159. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  160. xinference/thirdparty/fish_speech/tools/api.py +0 -943
  161. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
  162. xinference/thirdparty/fish_speech/tools/webui.py +0 -548
  163. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  164. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  165. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  166. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.4eb4ee80.js.LICENSE.txt} +0 -0
  167. {xinference-1.0.1.dist-info → xinference-1.1.1.dist-info}/LICENSE +0 -0
  168. {xinference-1.0.1.dist-info → xinference-1.1.1.dist-info}/WHEEL +0 -0
  169. {xinference-1.0.1.dist-info → xinference-1.1.1.dist-info}/entry_points.txt +0 -0
  170. {xinference-1.0.1.dist-info → xinference-1.1.1.dist-info}/top_level.txt +0 -0
@@ -11,8 +11,24 @@
11
11
  ],
12
12
  "default_model_config": {
13
13
  "quantize": true,
14
- "quantize_text_encoder": "text_encoder_2"
15
- }
14
+ "quantize_text_encoder": "text_encoder_2",
15
+ "torch_dtype": "bfloat16"
16
+ },
17
+ "gguf_model_id": "city96/FLUX.1-schnell-gguf",
18
+ "gguf_quantizations": [
19
+ "F16",
20
+ "Q2_K",
21
+ "Q3_K_S",
22
+ "Q4_0",
23
+ "Q4_1",
24
+ "Q4_K_S",
25
+ "Q5_0",
26
+ "Q5_1",
27
+ "Q5_K_S",
28
+ "Q6_K",
29
+ "Q8_0"
30
+ ],
31
+ "gguf_model_file_name_template": "flux1-schnell-{quantization}.gguf"
16
32
  },
17
33
  {
18
34
  "model_name": "FLUX.1-dev",
@@ -26,8 +42,24 @@
26
42
  ],
27
43
  "default_model_config": {
28
44
  "quantize": true,
29
- "quantize_text_encoder": "text_encoder_2"
30
- }
45
+ "quantize_text_encoder": "text_encoder_2",
46
+ "torch_dtype": "bfloat16"
47
+ },
48
+ "gguf_model_id": "city96/FLUX.1-dev-gguf",
49
+ "gguf_quantizations": [
50
+ "F16",
51
+ "Q2_K",
52
+ "Q3_K_S",
53
+ "Q4_0",
54
+ "Q4_1",
55
+ "Q4_K_S",
56
+ "Q5_0",
57
+ "Q5_1",
58
+ "Q5_K_S",
59
+ "Q6_K",
60
+ "Q8_0"
61
+ ],
62
+ "gguf_model_file_name_template": "flux1-dev-{quantization}.gguf"
31
63
  },
32
64
  {
33
65
  "model_name": "sd3-medium",
@@ -44,6 +76,97 @@
44
76
  "quantize_text_encoder": "text_encoder_3"
45
77
  }
46
78
  },
79
+ {
80
+ "model_name": "sd3.5-medium",
81
+ "model_family": "stable_diffusion",
82
+ "model_id": "stabilityai/stable-diffusion-3.5-medium",
83
+ "model_revision": "94b13ccbe959c51e8159d91f562c58f29fac971a",
84
+ "model_ability": [
85
+ "text2image",
86
+ "image2image",
87
+ "inpainting"
88
+ ],
89
+ "default_model_config": {
90
+ "quantize": true,
91
+ "quantize_text_encoder": "text_encoder_3",
92
+ "torch_dtype": "bfloat16"
93
+ },
94
+ "gguf_model_id": "city96/stable-diffusion-3.5-medium-gguf",
95
+ "gguf_quantizations": [
96
+ "F16",
97
+ "Q3_K_M",
98
+ "Q3_K_S",
99
+ "Q4_0",
100
+ "Q4_1",
101
+ "Q4_K_M",
102
+ "Q4_K_S",
103
+ "Q5_0",
104
+ "Q5_1",
105
+ "Q5_K_M",
106
+ "Q5_K_S",
107
+ "Q6_K",
108
+ "Q8_0"
109
+ ],
110
+ "gguf_model_file_name_template": "sd3.5_medium-{quantization}.gguf"
111
+ },
112
+ {
113
+ "model_name": "sd3.5-large",
114
+ "model_family": "stable_diffusion",
115
+ "model_id": "stabilityai/stable-diffusion-3.5-large",
116
+ "model_revision": "ceddf0a7fdf2064ea28e2213e3b84e4afa170a0f",
117
+ "model_ability": [
118
+ "text2image",
119
+ "image2image",
120
+ "inpainting"
121
+ ],
122
+ "default_model_config": {
123
+ "quantize": true,
124
+ "quantize_text_encoder": "text_encoder_3",
125
+ "torch_dtype": "bfloat16",
126
+ "transformer_nf4": true
127
+ },
128
+ "gguf_model_id": "city96/stable-diffusion-3.5-large-gguf",
129
+ "gguf_quantizations": [
130
+ "F16",
131
+ "Q4_0",
132
+ "Q4_1",
133
+ "Q5_0",
134
+ "Q5_1",
135
+ "Q8_0"
136
+ ],
137
+ "gguf_model_file_name_template": "sd3.5_large-{quantization}.gguf"
138
+ },
139
+ {
140
+ "model_name": "sd3.5-large-turbo",
141
+ "model_family": "stable_diffusion",
142
+ "model_id": "stabilityai/stable-diffusion-3.5-large-turbo",
143
+ "model_revision": "ec07796fc06b096cc56de9762974a28f4c632eda",
144
+ "model_ability": [
145
+ "text2image",
146
+ "image2image",
147
+ "inpainting"
148
+ ],
149
+ "default_model_config": {
150
+ "quantize": true,
151
+ "quantize_text_encoder": "text_encoder_3",
152
+ "torch_dtype": "bfloat16",
153
+ "transformer_nf4": true
154
+ },
155
+ "default_generate_config": {
156
+ "guidance_scale": 1.0,
157
+ "num_inference_steps": 4
158
+ },
159
+ "gguf_model_id": "city96/stable-diffusion-3.5-large-turbo-gguf",
160
+ "gguf_quantizations": [
161
+ "F16",
162
+ "Q4_0",
163
+ "Q4_1",
164
+ "Q5_0",
165
+ "Q5_1",
166
+ "Q8_0"
167
+ ],
168
+ "gguf_model_file_name_template": "sd3.5_large_turbo-{quantization}.gguf"
169
+ },
47
170
  {
48
171
  "model_name": "sd-turbo",
49
172
  "model_family": "stable_diffusion",
@@ -12,8 +12,24 @@
12
12
  ],
13
13
  "default_model_config": {
14
14
  "quantize": true,
15
- "quantize_text_encoder": "text_encoder_2"
16
- }
15
+ "quantize_text_encoder": "text_encoder_2",
16
+ "torch_dtype": "bfloat16"
17
+ },
18
+ "gguf_model_id": "Xorbits/FLUX.1-schnell-gguf",
19
+ "gguf_quantizations": [
20
+ "F16",
21
+ "Q2_K",
22
+ "Q3_K_S",
23
+ "Q4_0",
24
+ "Q4_1",
25
+ "Q4_K_S",
26
+ "Q5_0",
27
+ "Q5_1",
28
+ "Q5_K_S",
29
+ "Q6_K",
30
+ "Q8_0"
31
+ ],
32
+ "gguf_model_file_name_template": "flux1-schnell-{quantization}.gguf"
17
33
  },
18
34
  {
19
35
  "model_name": "FLUX.1-dev",
@@ -28,8 +44,24 @@
28
44
  ],
29
45
  "default_model_config": {
30
46
  "quantize": true,
31
- "quantize_text_encoder": "text_encoder_2"
32
- }
47
+ "quantize_text_encoder": "text_encoder_2",
48
+ "torch_dtype": "bfloat16"
49
+ },
50
+ "gguf_model_id": "AI-ModelScope/FLUX.1-dev-gguf",
51
+ "gguf_quantizations": [
52
+ "F16",
53
+ "Q2_K",
54
+ "Q3_K_S",
55
+ "Q4_0",
56
+ "Q4_1",
57
+ "Q4_K_S",
58
+ "Q5_0",
59
+ "Q5_1",
60
+ "Q5_K_S",
61
+ "Q6_K",
62
+ "Q8_0"
63
+ ],
64
+ "gguf_model_file_name_template": "flux1-dev-{quantization}.gguf"
33
65
  },
34
66
  {
35
67
  "model_name": "sd3-medium",
@@ -47,6 +79,100 @@
47
79
  "quantize_text_encoder": "text_encoder_3"
48
80
  }
49
81
  },
82
+ {
83
+ "model_name": "sd3.5-medium",
84
+ "model_family": "stable_diffusion",
85
+ "model_hub": "modelscope",
86
+ "model_id": "AI-ModelScope/stable-diffusion-3.5-medium",
87
+ "model_revision": "master",
88
+ "model_ability": [
89
+ "text2image",
90
+ "image2image",
91
+ "inpainting"
92
+ ],
93
+ "default_model_config": {
94
+ "quantize": true,
95
+ "quantize_text_encoder": "text_encoder_3",
96
+ "torch_dtype": "bfloat16"
97
+ },
98
+ "gguf_model_id": "Xorbits/stable-diffusion-3.5-medium-gguf",
99
+ "gguf_quantizations": [
100
+ "F16",
101
+ "Q3_K_M",
102
+ "Q3_K_S",
103
+ "Q4_0",
104
+ "Q4_1",
105
+ "Q4_K_M",
106
+ "Q4_K_S",
107
+ "Q5_0",
108
+ "Q5_1",
109
+ "Q5_K_M",
110
+ "Q5_K_S",
111
+ "Q6_K",
112
+ "Q8_0"
113
+ ],
114
+ "gguf_model_file_name_template": "sd3.5_medium-{quantization}.gguf"
115
+ },
116
+ {
117
+ "model_name": "sd3.5-large",
118
+ "model_family": "stable_diffusion",
119
+ "model_hub": "modelscope",
120
+ "model_id": "AI-ModelScope/stable-diffusion-3.5-large",
121
+ "model_revision": "master",
122
+ "model_ability": [
123
+ "text2image",
124
+ "image2image",
125
+ "inpainting"
126
+ ],
127
+ "default_model_config": {
128
+ "quantize": true,
129
+ "quantize_text_encoder": "text_encoder_3",
130
+ "torch_dtype": "bfloat16",
131
+ "transformer_nf4": true
132
+ },
133
+ "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-gguf",
134
+ "gguf_quantizations": [
135
+ "F16",
136
+ "Q4_0",
137
+ "Q4_1",
138
+ "Q5_0",
139
+ "Q5_1",
140
+ "Q8_0"
141
+ ],
142
+ "gguf_model_file_name_template": "sd3.5_large-{quantization}.gguf"
143
+ },
144
+ {
145
+ "model_name": "sd3.5-large-turbo",
146
+ "model_family": "stable_diffusion",
147
+ "model_hub": "modelscope",
148
+ "model_id": "AI-ModelScope/stable-diffusion-3.5-large-turbo",
149
+ "model_revision": "master",
150
+ "model_ability": [
151
+ "text2image",
152
+ "image2image",
153
+ "inpainting"
154
+ ],
155
+ "default_model_config": {
156
+ "quantize": true,
157
+ "quantize_text_encoder": "text_encoder_3",
158
+ "torch_dtype": "bfloat16",
159
+ "transformer_nf4": true
160
+ },
161
+ "default_generate_config": {
162
+ "guidance_scale": 1.0,
163
+ "num_inference_steps": 4
164
+ },
165
+ "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-turbo-gguf",
166
+ "gguf_quantizations": [
167
+ "F16",
168
+ "Q4_0",
169
+ "Q4_1",
170
+ "Q5_0",
171
+ "Q5_1",
172
+ "Q8_0"
173
+ ],
174
+ "gguf_model_file_name_template": "sd3.5_large_turbo-{quantization}.gguf"
175
+ },
50
176
  {
51
177
  "model_name": "sd-turbo",
52
178
  "model_family": "stable_diffusion",
@@ -14,8 +14,10 @@
14
14
 
15
15
  import contextlib
16
16
  import gc
17
+ import importlib
17
18
  import inspect
18
19
  import itertools
20
+ import json
19
21
  import logging
20
22
  import os
21
23
  import re
@@ -86,6 +88,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
86
88
  lora_load_kwargs: Optional[Dict] = None,
87
89
  lora_fuse_kwargs: Optional[Dict] = None,
88
90
  model_spec: Optional["ImageModelFamilyV1"] = None,
91
+ gguf_model_path: Optional[str] = None,
89
92
  **kwargs,
90
93
  ):
91
94
  self._model_uid = model_uid
@@ -109,6 +112,8 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
109
112
  self._model_spec = model_spec
110
113
  self._abilities = model_spec.model_ability or [] # type: ignore
111
114
  self._kwargs = kwargs
115
+ # gguf
116
+ self._gguf_model_path = gguf_model_path
112
117
 
113
118
  @property
114
119
  def model_ability(self):
@@ -184,7 +189,17 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
184
189
  self._model.fuse_lora(**self._lora_fuse_kwargs)
185
190
  logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
186
191
 
192
+ def _get_layer_cls(self, layer: str):
193
+ with open(os.path.join(self._model_path, "model_index.json")) as f: # type: ignore
194
+ model_index = json.load(f)
195
+ layer_info = model_index[layer]
196
+ module_name, class_name = layer_info
197
+ module = importlib.import_module(module_name)
198
+ return getattr(module, class_name)
199
+
187
200
  def load(self):
201
+ from transformers import BitsAndBytesConfig, T5EncoderModel
202
+
188
203
  if "text2image" in self._abilities or "image2image" in self._abilities:
189
204
  from diffusers import AutoPipelineForText2Image as AutoPipelineModel
190
205
  elif "inpainting" in self._abilities:
@@ -200,7 +215,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
200
215
  glob(os.path.join(self._model_path, "*/*.safetensors"))
201
216
  )
202
217
  if isinstance(torch_dtype, str):
203
- self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
218
+ self._torch_dtype = torch_dtype = self._kwargs["torch_dtype"] = getattr(
219
+ torch, torch_dtype
220
+ )
204
221
 
205
222
  controlnet = self._kwargs.get("controlnet")
206
223
  if controlnet is not None:
@@ -212,18 +229,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
212
229
  ]
213
230
 
214
231
  quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
215
- if quantize_text_encoder:
216
- try:
217
- from transformers import BitsAndBytesConfig, T5EncoderModel
218
- except ImportError:
219
- error_message = "Failed to import module 'transformers'"
220
- installation_guide = [
221
- "Please make sure 'transformers' is installed. ",
222
- "You can install it by `pip install transformers`\n",
223
- ]
224
-
225
- raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
226
-
232
+ if quantize_text_encoder and not self._gguf_model_path:
227
233
  try:
228
234
  import bitsandbytes # noqa: F401
229
235
  except ImportError:
@@ -249,6 +255,32 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
249
255
  self._kwargs[text_encoder_name] = text_encoder
250
256
  self._kwargs["device_map"] = "balanced"
251
257
 
258
+ if self._gguf_model_path:
259
+ from diffusers import GGUFQuantizationConfig
260
+
261
+ # GGUF transformer
262
+ self._kwargs["transformer"] = self._get_layer_cls(
263
+ "transformer"
264
+ ).from_single_file(
265
+ self._gguf_model_path,
266
+ quantization_config=GGUFQuantizationConfig(compute_dtype=torch_dtype),
267
+ torch_dtype=torch_dtype,
268
+ config=os.path.join(self._model_path, "transformer"),
269
+ )
270
+ elif self._kwargs.get("transformer_nf4"):
271
+ nf4_config = BitsAndBytesConfig(
272
+ load_in_4bit=True,
273
+ bnb_4bit_quant_type="nf4",
274
+ bnb_4bit_compute_dtype=torch_dtype,
275
+ )
276
+ model_nf4 = self._get_layer_cls("transformer").from_pretrained(
277
+ self._model_path,
278
+ subfolder="transformer",
279
+ quantization_config=nf4_config,
280
+ torch_dtype=torch_dtype,
281
+ )
282
+ self._kwargs["transformer"] = model_nf4
283
+
252
284
  logger.debug(
253
285
  "Loading model from %s, kwargs: %s", self._model_path, self._kwargs
254
286
  )
@@ -131,7 +131,7 @@ def register_custom_model():
131
131
  def _install():
132
132
  from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
133
133
  from .lmdeploy.core import LMDeployChatModel, LMDeployModel
134
- from .mlx.core import MLXChatModel, MLXModel
134
+ from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
135
135
  from .sglang.core import SGLANGChatModel, SGLANGModel
136
136
  from .transformers.chatglm import ChatglmPytorchChatModel
137
137
  from .transformers.cogvlm2 import CogVLM2Model
@@ -172,7 +172,7 @@ def _install():
172
172
  )
173
173
  SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
174
174
  VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
175
- MLX_CLASSES.extend([MLXModel, MLXChatModel])
175
+ MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
176
176
  LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
177
177
  TRANSFORMERS_CLASSES.extend(
178
178
  [