xinference 0.15.3__py3-none-any.whl → 0.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +29 -2
- xinference/client/restful/restful_client.py +10 -0
- xinference/constants.py +4 -0
- xinference/core/image_interface.py +76 -23
- xinference/core/model.py +80 -39
- xinference/core/progress_tracker.py +187 -0
- xinference/core/supervisor.py +11 -0
- xinference/core/worker.py +1 -0
- xinference/model/audio/chattts.py +2 -1
- xinference/model/audio/core.py +0 -2
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/core.py +6 -7
- xinference/model/image/sdapi.py +35 -4
- xinference/model/image/stable_diffusion/core.py +208 -78
- xinference/model/llm/llm_family.json +16 -16
- xinference/model/llm/llm_family_modelscope.json +16 -12
- xinference/model/llm/transformers/cogvlm2.py +2 -1
- xinference/model/llm/transformers/cogvlm2_video.py +2 -0
- xinference/model/llm/transformers/core.py +6 -2
- xinference/model/llm/transformers/deepseek_vl.py +2 -0
- xinference/model/llm/transformers/glm4v.py +2 -1
- xinference/model/llm/transformers/intern_vl.py +2 -0
- xinference/model/llm/transformers/minicpmv25.py +2 -0
- xinference/model/llm/transformers/minicpmv26.py +2 -0
- xinference/model/llm/transformers/omnilmm.py +2 -0
- xinference/model/llm/transformers/qwen2_audio.py +11 -4
- xinference/model/llm/transformers/qwen2_vl.py +2 -28
- xinference/model/llm/transformers/qwen_vl.py +2 -1
- xinference/model/llm/transformers/utils.py +35 -2
- xinference/model/llm/transformers/yi_vl.py +2 -0
- xinference/model/llm/utils.py +58 -14
- xinference/model/llm/vllm/core.py +52 -8
- xinference/model/llm/vllm/utils.py +0 -1
- xinference/model/utils.py +7 -4
- xinference/model/video/core.py +0 -2
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/METADATA +3 -3
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/RECORD +43 -42
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
|
@@ -71,6 +71,14 @@
|
|
|
71
71
|
"model_ability": "audio-to-text",
|
|
72
72
|
"multilingual": true
|
|
73
73
|
},
|
|
74
|
+
{
|
|
75
|
+
"model_name": "whisper-large-v3-turbo",
|
|
76
|
+
"model_family": "whisper",
|
|
77
|
+
"model_id": "openai/whisper-large-v3-turbo",
|
|
78
|
+
"model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
|
|
79
|
+
"model_ability": "audio-to-text",
|
|
80
|
+
"multilingual": true
|
|
81
|
+
},
|
|
74
82
|
{
|
|
75
83
|
"model_name": "Belle-distilwhisper-large-v2-zh",
|
|
76
84
|
"model_family": "whisper",
|
|
@@ -8,6 +8,15 @@
|
|
|
8
8
|
"model_ability": "audio-to-text",
|
|
9
9
|
"multilingual": true
|
|
10
10
|
},
|
|
11
|
+
{
|
|
12
|
+
"model_name": "whisper-large-v3-turbo",
|
|
13
|
+
"model_family": "whisper",
|
|
14
|
+
"model_hub": "modelscope",
|
|
15
|
+
"model_id": "AI-ModelScope/whisper-large-v3-turbo",
|
|
16
|
+
"model_revision": "master",
|
|
17
|
+
"model_ability": "audio-to-text",
|
|
18
|
+
"multilingual": true
|
|
19
|
+
},
|
|
11
20
|
{
|
|
12
21
|
"model_name": "SenseVoiceSmall",
|
|
13
22
|
"model_family": "funasr",
|
xinference/model/image/core.py
CHANGED
|
@@ -23,8 +23,6 @@ from ..core import CacheableModelSpec, ModelDescription
|
|
|
23
23
|
from ..utils import valid_model_revision
|
|
24
24
|
from .stable_diffusion.core import DiffusionModel
|
|
25
25
|
|
|
26
|
-
MAX_ATTEMPTS = 3
|
|
27
|
-
|
|
28
26
|
logger = logging.getLogger(__name__)
|
|
29
27
|
|
|
30
28
|
MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
|
|
@@ -210,18 +208,19 @@ def create_image_model_instance(
|
|
|
210
208
|
for name in controlnet:
|
|
211
209
|
for cn_model_spec in model_spec.controlnet:
|
|
212
210
|
if cn_model_spec.model_name == name:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
controlnet_model_paths.append(model_path)
|
|
211
|
+
controlnet_model_path = cache(cn_model_spec)
|
|
212
|
+
controlnet_model_paths.append(controlnet_model_path)
|
|
216
213
|
break
|
|
217
214
|
else:
|
|
218
215
|
raise ValueError(
|
|
219
216
|
f"controlnet `{name}` is not supported for model `{model_name}`."
|
|
220
217
|
)
|
|
221
218
|
if len(controlnet_model_paths) == 1:
|
|
222
|
-
kwargs["controlnet"] = controlnet_model_paths[0]
|
|
219
|
+
kwargs["controlnet"] = (controlnet[0], controlnet_model_paths[0])
|
|
223
220
|
else:
|
|
224
|
-
kwargs["controlnet"] =
|
|
221
|
+
kwargs["controlnet"] = [
|
|
222
|
+
(n, path) for n, path in zip(controlnet, controlnet_model_paths)
|
|
223
|
+
]
|
|
225
224
|
if not model_path:
|
|
226
225
|
model_path = cache(model_spec)
|
|
227
226
|
if peft_model_config is not None:
|
xinference/model/image/sdapi.py
CHANGED
|
@@ -11,11 +11,12 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import base64
|
|
15
16
|
import io
|
|
16
17
|
import warnings
|
|
17
18
|
|
|
18
|
-
from PIL import Image
|
|
19
|
+
from PIL import Image, ImageOps
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class SDAPIToDiffusersConverter:
|
|
@@ -26,11 +27,12 @@ class SDAPIToDiffusersConverter:
|
|
|
26
27
|
"width",
|
|
27
28
|
"height",
|
|
28
29
|
"sampler_name",
|
|
30
|
+
"progressor",
|
|
29
31
|
}
|
|
30
32
|
txt2img_arg_mapping = {
|
|
31
33
|
"steps": "num_inference_steps",
|
|
32
34
|
"cfg_scale": "guidance_scale",
|
|
33
|
-
|
|
35
|
+
"denoising_strength": "strength",
|
|
34
36
|
}
|
|
35
37
|
img2img_identical_args = {
|
|
36
38
|
"prompt",
|
|
@@ -39,12 +41,15 @@ class SDAPIToDiffusersConverter:
|
|
|
39
41
|
"width",
|
|
40
42
|
"height",
|
|
41
43
|
"sampler_name",
|
|
44
|
+
"progressor",
|
|
42
45
|
}
|
|
43
46
|
img2img_arg_mapping = {
|
|
44
47
|
"init_images": "image",
|
|
48
|
+
"mask": "mask_image",
|
|
45
49
|
"steps": "num_inference_steps",
|
|
46
50
|
"cfg_scale": "guidance_scale",
|
|
47
51
|
"denoising_strength": "strength",
|
|
52
|
+
"inpaint_full_res_padding": "padding_mask_crop",
|
|
48
53
|
}
|
|
49
54
|
|
|
50
55
|
@staticmethod
|
|
@@ -121,12 +126,38 @@ class SDAPIDiffusionModelMixin:
|
|
|
121
126
|
|
|
122
127
|
def img2img(self, **kwargs):
|
|
123
128
|
init_images = kwargs.pop("init_images", [])
|
|
124
|
-
kwargs["init_images"] =
|
|
129
|
+
kwargs["init_images"] = init_images = [
|
|
130
|
+
self._decode_b64_img(i) for i in init_images
|
|
131
|
+
]
|
|
132
|
+
if len(init_images) == 1:
|
|
133
|
+
kwargs["init_images"] = init_images[0]
|
|
134
|
+
mask_image = kwargs.pop("mask", None)
|
|
135
|
+
if mask_image:
|
|
136
|
+
if kwargs.pop("inpainting_mask_invert"):
|
|
137
|
+
mask_image = ImageOps.invert(mask_image)
|
|
138
|
+
|
|
139
|
+
kwargs["mask"] = self._decode_b64_img(mask_image)
|
|
140
|
+
|
|
141
|
+
# process inpaint_full_res and inpaint_full_res_padding
|
|
142
|
+
if kwargs.pop("inpaint_full_res", None):
|
|
143
|
+
kwargs["inpaint_full_res_padding"] = kwargs.pop(
|
|
144
|
+
"inpaint_full_res_padding", 0
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
# inpaint_full_res_padding is turned `into padding_mask_crop`
|
|
148
|
+
# in diffusers, if padding_mask_crop is passed, it will do inpaint_full_res
|
|
149
|
+
# so if not inpaint_full_rs, we need to pop this option
|
|
150
|
+
kwargs.pop("inpaint_full_res_padding", None)
|
|
151
|
+
|
|
125
152
|
clip_skip = kwargs.get("override_settings", {}).get("clip_skip")
|
|
126
153
|
converted_kwargs = self._check_kwargs("img2img", kwargs)
|
|
127
154
|
if clip_skip:
|
|
128
155
|
converted_kwargs["clip_skip"] = clip_skip
|
|
129
|
-
|
|
156
|
+
|
|
157
|
+
if not converted_kwargs.get("mask_image"):
|
|
158
|
+
result = self.image_to_image(response_format="b64_json", **converted_kwargs) # type: ignore
|
|
159
|
+
else:
|
|
160
|
+
result = self.inpainting(response_format="b64_json", **converted_kwargs) # type: ignore
|
|
130
161
|
|
|
131
162
|
# convert to SD API result
|
|
132
163
|
return {
|
|
@@ -14,7 +14,9 @@
|
|
|
14
14
|
|
|
15
15
|
import base64
|
|
16
16
|
import contextlib
|
|
17
|
+
import gc
|
|
17
18
|
import inspect
|
|
19
|
+
import itertools
|
|
18
20
|
import logging
|
|
19
21
|
import os
|
|
20
22
|
import re
|
|
@@ -25,7 +27,7 @@ import warnings
|
|
|
25
27
|
from concurrent.futures import ThreadPoolExecutor
|
|
26
28
|
from functools import partial
|
|
27
29
|
from io import BytesIO
|
|
28
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
30
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
29
31
|
|
|
30
32
|
import PIL.Image
|
|
31
33
|
import torch
|
|
@@ -37,6 +39,7 @@ from ....types import Image, ImageList, LoRA
|
|
|
37
39
|
from ..sdapi import SDAPIDiffusionModelMixin
|
|
38
40
|
|
|
39
41
|
if TYPE_CHECKING:
|
|
42
|
+
from ....core.progress_tracker import Progressor
|
|
40
43
|
from ..core import ImageModelFamilyV1
|
|
41
44
|
|
|
42
45
|
logger = logging.getLogger(__name__)
|
|
@@ -93,16 +96,21 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
93
96
|
self._model_uid = model_uid
|
|
94
97
|
self._model_path = model_path
|
|
95
98
|
self._device = device
|
|
96
|
-
#
|
|
97
|
-
# it will be loaded as AutoPipelineForText2Image
|
|
98
|
-
# for image2image and inpainting,
|
|
99
|
-
# we convert to the corresponding model
|
|
99
|
+
# model info when loading
|
|
100
100
|
self._model = None
|
|
101
|
-
self._i2i_model = None # image to image model
|
|
102
|
-
self._inpainting_model = None # inpainting model
|
|
103
101
|
self._lora_model = lora_model
|
|
104
102
|
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
105
103
|
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
104
|
+
# deepcache
|
|
105
|
+
self._deepcache_helper = None
|
|
106
|
+
# when a model has text2image ability,
|
|
107
|
+
# it will be loaded as AutoPipelineForText2Image
|
|
108
|
+
# for image2image and inpainting,
|
|
109
|
+
# we convert to the corresponding model
|
|
110
|
+
self._torch_dtype = None
|
|
111
|
+
self._ability_to_models: Dict[Tuple[str, Any], Any] = {}
|
|
112
|
+
self._controlnet_models: Dict[str, Any] = {}
|
|
113
|
+
# info
|
|
106
114
|
self._model_spec = model_spec
|
|
107
115
|
self._abilities = model_spec.model_ability or [] # type: ignore
|
|
108
116
|
self._kwargs = kwargs
|
|
@@ -111,6 +119,63 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
111
119
|
def model_ability(self):
|
|
112
120
|
return self._abilities
|
|
113
121
|
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _get_pipeline_type(ability: str) -> type:
|
|
124
|
+
if ability == "text2image":
|
|
125
|
+
from diffusers import AutoPipelineForText2Image as AutoPipelineModel
|
|
126
|
+
elif ability == "image2image":
|
|
127
|
+
from diffusers import AutoPipelineForImage2Image as AutoPipelineModel
|
|
128
|
+
elif ability == "inpainting":
|
|
129
|
+
from diffusers import AutoPipelineForInpainting as AutoPipelineModel
|
|
130
|
+
else:
|
|
131
|
+
raise ValueError(f"Unknown ability: {ability}")
|
|
132
|
+
return AutoPipelineModel
|
|
133
|
+
|
|
134
|
+
def _get_controlnet_model(self, name: str, path: str):
|
|
135
|
+
from diffusers import ControlNetModel
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
return self._controlnet_models[name]
|
|
139
|
+
except KeyError:
|
|
140
|
+
logger.debug("Loading controlnet %s, from %s", name, path)
|
|
141
|
+
model = ControlNetModel.from_pretrained(path, torch_dtype=self._torch_dtype)
|
|
142
|
+
self._controlnet_models[name] = model
|
|
143
|
+
return model
|
|
144
|
+
|
|
145
|
+
def _get_model(
|
|
146
|
+
self,
|
|
147
|
+
ability: str,
|
|
148
|
+
controlnet_name: Optional[Union[str, List[str]]] = None,
|
|
149
|
+
controlnet_path: Optional[Union[str, List[str]]] = None,
|
|
150
|
+
):
|
|
151
|
+
try:
|
|
152
|
+
return self._ability_to_models[ability, controlnet_name]
|
|
153
|
+
except KeyError:
|
|
154
|
+
model_type = self._get_pipeline_type(ability)
|
|
155
|
+
|
|
156
|
+
assert self._model is not None
|
|
157
|
+
|
|
158
|
+
if controlnet_name:
|
|
159
|
+
assert controlnet_path
|
|
160
|
+
if isinstance(controlnet_name, (list, tuple)):
|
|
161
|
+
controlnet = []
|
|
162
|
+
# multiple controlnet
|
|
163
|
+
for name, path in itertools.zip_longest(
|
|
164
|
+
controlnet_name, controlnet_path
|
|
165
|
+
):
|
|
166
|
+
controlnet.append(self._get_controlnet_model(name, path))
|
|
167
|
+
else:
|
|
168
|
+
controlnet = self._get_controlnet_model(
|
|
169
|
+
controlnet_name, controlnet_path
|
|
170
|
+
)
|
|
171
|
+
model = model_type.from_pipe(self._model, controlnet=controlnet)
|
|
172
|
+
else:
|
|
173
|
+
model = model_type.from_pipe(self._model)
|
|
174
|
+
self._load_to_device(model)
|
|
175
|
+
|
|
176
|
+
self._ability_to_models[ability, controlnet_name] = model
|
|
177
|
+
return model
|
|
178
|
+
|
|
114
179
|
def _apply_lora(self):
|
|
115
180
|
if self._lora_model is not None:
|
|
116
181
|
logger.info(
|
|
@@ -132,22 +197,24 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
132
197
|
else:
|
|
133
198
|
raise ValueError(f"Unknown ability: {self._abilities}")
|
|
134
199
|
|
|
135
|
-
|
|
136
|
-
if controlnet is not None:
|
|
137
|
-
from diffusers import ControlNetModel
|
|
138
|
-
|
|
139
|
-
logger.debug("Loading controlnet %s", controlnet)
|
|
140
|
-
self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
|
|
141
|
-
|
|
142
|
-
torch_dtype = self._kwargs.get("torch_dtype")
|
|
200
|
+
self._torch_dtype = torch_dtype = self._kwargs.get("torch_dtype")
|
|
143
201
|
if sys.platform != "darwin" and torch_dtype is None:
|
|
144
202
|
# The following params crashes on Mac M2
|
|
145
|
-
self._kwargs["torch_dtype"] = torch.float16
|
|
203
|
+
self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
|
|
146
204
|
self._kwargs["variant"] = "fp16"
|
|
147
205
|
self._kwargs["use_safetensors"] = True
|
|
148
206
|
if isinstance(torch_dtype, str):
|
|
149
207
|
self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
|
|
150
208
|
|
|
209
|
+
controlnet = self._kwargs.get("controlnet")
|
|
210
|
+
if controlnet is not None:
|
|
211
|
+
if isinstance(controlnet, tuple):
|
|
212
|
+
self._kwargs["controlnet"] = self._get_controlnet_model(*controlnet)
|
|
213
|
+
else:
|
|
214
|
+
self._kwargs["controlnet"] = [
|
|
215
|
+
self._get_controlnet_model(*cn) for cn in controlnet
|
|
216
|
+
]
|
|
217
|
+
|
|
151
218
|
quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
|
|
152
219
|
if quantize_text_encoder:
|
|
153
220
|
try:
|
|
@@ -193,27 +260,42 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
193
260
|
self._model_path,
|
|
194
261
|
**self._kwargs,
|
|
195
262
|
)
|
|
196
|
-
|
|
197
|
-
|
|
263
|
+
self._load_to_device(self._model)
|
|
264
|
+
self._apply_lora()
|
|
265
|
+
|
|
266
|
+
if self._kwargs.get("deepcache", False):
|
|
198
267
|
try:
|
|
199
268
|
from DeepCache import DeepCacheSDHelper
|
|
200
|
-
|
|
201
|
-
helper = DeepCacheSDHelper(pipe=self._model)
|
|
202
|
-
helper.set_params(cache_interval=3, cache_branch_id=0)
|
|
203
|
-
helper.enable()
|
|
204
269
|
except ImportError:
|
|
205
|
-
|
|
206
|
-
|
|
270
|
+
error_message = "Failed to import module 'deepcache' when you launch with deepcache=True"
|
|
271
|
+
installation_guide = [
|
|
272
|
+
"Please make sure 'deepcache' is installed. ",
|
|
273
|
+
"You can install it by `pip install deepcache`\n",
|
|
274
|
+
]
|
|
207
275
|
|
|
276
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
277
|
+
else:
|
|
278
|
+
self._deepcache_helper = helper = DeepCacheSDHelper()
|
|
279
|
+
helper.set_params(
|
|
280
|
+
cache_interval=self._kwargs.get("deepcache_cache_interval", 3),
|
|
281
|
+
cache_branch_id=self._kwargs.get("deepcache_cache_branch_id", 0),
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def _load_to_device(self, model):
|
|
208
285
|
if self._kwargs.get("cpu_offload", False):
|
|
209
286
|
logger.debug("CPU offloading model")
|
|
210
|
-
|
|
287
|
+
model.enable_model_cpu_offload()
|
|
288
|
+
elif self._kwargs.get("sequential_cpu_offload", False):
|
|
289
|
+
logger.debug("CPU sequential offloading model")
|
|
290
|
+
model.enable_sequential_cpu_offload()
|
|
211
291
|
elif not self._kwargs.get("device_map"):
|
|
212
292
|
logger.debug("Loading model to available device")
|
|
213
|
-
|
|
293
|
+
model = move_model_to_available_device(self._model)
|
|
214
294
|
# Recommended if your computer has < 64 GB of RAM
|
|
215
|
-
self.
|
|
216
|
-
|
|
295
|
+
if self._kwargs.get("attention_slicing", True):
|
|
296
|
+
model.enable_attention_slicing()
|
|
297
|
+
if self._kwargs.get("vae_tiling", False):
|
|
298
|
+
model.enable_vae_tiling()
|
|
217
299
|
|
|
218
300
|
@staticmethod
|
|
219
301
|
def _get_scheduler(model: Any, sampler_name: str):
|
|
@@ -224,61 +306,78 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
224
306
|
|
|
225
307
|
import diffusers
|
|
226
308
|
|
|
309
|
+
kwargs = {}
|
|
310
|
+
if (
|
|
311
|
+
sampler_name.startswith("DPM++")
|
|
312
|
+
and "final_sigmas_type" not in model.scheduler.config
|
|
313
|
+
):
|
|
314
|
+
# `final_sigmas_type` will be set as `zero` by default which will cause error
|
|
315
|
+
kwargs["final_sigmas_type"] = "sigma_min"
|
|
316
|
+
|
|
227
317
|
# see https://github.com/huggingface/diffusers/issues/4167
|
|
228
318
|
# to get A1111 <> Diffusers Scheduler mapping
|
|
229
319
|
if sampler_name == "DPM++ 2M":
|
|
230
320
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
231
|
-
model.scheduler.config
|
|
321
|
+
model.scheduler.config, **kwargs
|
|
232
322
|
)
|
|
233
323
|
elif sampler_name == "DPM++ 2M Karras":
|
|
234
324
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
235
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
325
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
236
326
|
)
|
|
237
327
|
elif sampler_name == "DPM++ 2M SDE":
|
|
238
328
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
239
|
-
model.scheduler.config, algorithm_type="sde-dpmsolver++"
|
|
329
|
+
model.scheduler.config, algorithm_type="sde-dpmsolver++", **kwargs
|
|
240
330
|
)
|
|
241
331
|
elif sampler_name == "DPM++ 2M SDE Karras":
|
|
242
332
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
243
333
|
model.scheduler.config,
|
|
244
334
|
algorithm_type="sde-dpmsolver++",
|
|
245
335
|
use_karras_sigmas=True,
|
|
336
|
+
**kwargs,
|
|
246
337
|
)
|
|
247
338
|
elif sampler_name == "DPM++ SDE":
|
|
248
339
|
return diffusers.DPMSolverSinglestepScheduler.from_config(
|
|
249
|
-
model.scheduler.config
|
|
340
|
+
model.scheduler.config, **kwargs
|
|
250
341
|
)
|
|
251
342
|
elif sampler_name == "DPM++ SDE Karras":
|
|
252
343
|
return diffusers.DPMSolverSinglestepScheduler.from_config(
|
|
253
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
344
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
254
345
|
)
|
|
255
346
|
elif sampler_name == "DPM2":
|
|
256
|
-
return diffusers.KDPM2DiscreteScheduler.from_config(
|
|
347
|
+
return diffusers.KDPM2DiscreteScheduler.from_config(
|
|
348
|
+
model.scheduler.config, **kwargs
|
|
349
|
+
)
|
|
257
350
|
elif sampler_name == "DPM2 Karras":
|
|
258
351
|
return diffusers.KDPM2DiscreteScheduler.from_config(
|
|
259
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
352
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
260
353
|
)
|
|
261
354
|
elif sampler_name == "DPM2 a":
|
|
262
355
|
return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
|
|
263
|
-
model.scheduler.config
|
|
356
|
+
model.scheduler.config, **kwargs
|
|
264
357
|
)
|
|
265
358
|
elif sampler_name == "DPM2 a Karras":
|
|
266
359
|
return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
|
|
267
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
360
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
268
361
|
)
|
|
269
362
|
elif sampler_name == "Euler":
|
|
270
|
-
return diffusers.EulerDiscreteScheduler.from_config(
|
|
363
|
+
return diffusers.EulerDiscreteScheduler.from_config(
|
|
364
|
+
model.scheduler.config, **kwargs
|
|
365
|
+
)
|
|
271
366
|
elif sampler_name == "Euler a":
|
|
272
367
|
return diffusers.EulerAncestralDiscreteScheduler.from_config(
|
|
273
|
-
model.scheduler.config
|
|
368
|
+
model.scheduler.config, **kwargs
|
|
274
369
|
)
|
|
275
370
|
elif sampler_name == "Heun":
|
|
276
|
-
return diffusers.HeunDiscreteScheduler.from_config(
|
|
371
|
+
return diffusers.HeunDiscreteScheduler.from_config(
|
|
372
|
+
model.scheduler.config, **kwargs
|
|
373
|
+
)
|
|
277
374
|
elif sampler_name == "LMS":
|
|
278
|
-
return diffusers.LMSDiscreteScheduler.from_config(
|
|
375
|
+
return diffusers.LMSDiscreteScheduler.from_config(
|
|
376
|
+
model.scheduler.config, **kwargs
|
|
377
|
+
)
|
|
279
378
|
elif sampler_name == "LMS Karras":
|
|
280
379
|
return diffusers.LMSDiscreteScheduler.from_config(
|
|
281
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
380
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
282
381
|
)
|
|
283
382
|
else:
|
|
284
383
|
raise ValueError(f"Unknown sampler: {sampler_name}")
|
|
@@ -298,27 +397,70 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
298
397
|
else:
|
|
299
398
|
yield
|
|
300
399
|
|
|
400
|
+
@staticmethod
|
|
401
|
+
@contextlib.contextmanager
|
|
402
|
+
def _release_after():
|
|
403
|
+
from ....device_utils import empty_cache
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
yield
|
|
407
|
+
finally:
|
|
408
|
+
gc.collect()
|
|
409
|
+
empty_cache()
|
|
410
|
+
|
|
411
|
+
@contextlib.contextmanager
|
|
412
|
+
def _wrap_deepcache(self, model: Any):
|
|
413
|
+
if self._deepcache_helper:
|
|
414
|
+
self._deepcache_helper.pipe = model
|
|
415
|
+
self._deepcache_helper.enable()
|
|
416
|
+
try:
|
|
417
|
+
yield
|
|
418
|
+
finally:
|
|
419
|
+
if self._deepcache_helper:
|
|
420
|
+
self._deepcache_helper.disable()
|
|
421
|
+
self._deepcache_helper.pipe = None
|
|
422
|
+
|
|
423
|
+
@staticmethod
|
|
424
|
+
def _process_progressor(kwargs: dict):
|
|
425
|
+
import diffusers
|
|
426
|
+
|
|
427
|
+
progressor: Progressor = kwargs.pop("progressor", None)
|
|
428
|
+
|
|
429
|
+
def report_status_callback(
|
|
430
|
+
pipe: diffusers.DiffusionPipeline,
|
|
431
|
+
step: int,
|
|
432
|
+
timestep: int,
|
|
433
|
+
callback_kwargs: dict,
|
|
434
|
+
):
|
|
435
|
+
num_steps = pipe.num_timesteps
|
|
436
|
+
progressor.set_progress((step + 1) / num_steps)
|
|
437
|
+
|
|
438
|
+
return callback_kwargs
|
|
439
|
+
|
|
440
|
+
if progressor and progressor.request_id:
|
|
441
|
+
kwargs["callback_on_step_end"] = report_status_callback
|
|
442
|
+
|
|
301
443
|
def _call_model(
|
|
302
444
|
self,
|
|
303
445
|
response_format: str,
|
|
304
446
|
model=None,
|
|
305
447
|
**kwargs,
|
|
306
448
|
):
|
|
307
|
-
import gc
|
|
308
|
-
|
|
309
|
-
from ....device_utils import empty_cache
|
|
310
|
-
|
|
311
449
|
model = model if model is not None else self._model
|
|
312
450
|
is_padded = kwargs.pop("is_padded", None)
|
|
313
451
|
origin_size = kwargs.pop("origin_size", None)
|
|
314
452
|
seed = kwargs.pop("seed", None)
|
|
315
|
-
|
|
453
|
+
return_images = kwargs.pop("_return_images", None)
|
|
454
|
+
if seed is not None and seed != -1:
|
|
316
455
|
kwargs["generator"] = generator = torch.Generator(device=get_available_device()) # type: ignore
|
|
317
456
|
if seed != -1:
|
|
318
457
|
kwargs["generator"] = generator.manual_seed(seed)
|
|
319
458
|
sampler_name = kwargs.pop("sampler_name", None)
|
|
459
|
+
self._process_progressor(kwargs)
|
|
320
460
|
assert callable(model)
|
|
321
|
-
with self._reset_when_done(
|
|
461
|
+
with self._reset_when_done(
|
|
462
|
+
model, sampler_name
|
|
463
|
+
), self._release_after(), self._wrap_deepcache(model):
|
|
322
464
|
logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
|
|
323
465
|
self._filter_kwargs(model, kwargs)
|
|
324
466
|
images = model(**kwargs).images
|
|
@@ -331,9 +473,8 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
331
473
|
new_images.append(img.crop((0, 0, x, y)))
|
|
332
474
|
images = new_images
|
|
333
475
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
empty_cache()
|
|
476
|
+
if return_images:
|
|
477
|
+
return images
|
|
337
478
|
|
|
338
479
|
if response_format == "url":
|
|
339
480
|
os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
|
|
@@ -378,15 +519,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
378
519
|
response_format: str = "url",
|
|
379
520
|
**kwargs,
|
|
380
521
|
):
|
|
381
|
-
# References:
|
|
382
|
-
# https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
|
|
383
522
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
384
523
|
generate_kwargs = self._model_spec.default_generate_config.copy() # type: ignore
|
|
385
524
|
generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
|
|
525
|
+
generate_kwargs["width"], generate_kwargs["height"] = width, height
|
|
526
|
+
|
|
386
527
|
return self._call_model(
|
|
387
528
|
prompt=prompt,
|
|
388
|
-
height=height,
|
|
389
|
-
width=width,
|
|
390
529
|
num_images_per_prompt=n,
|
|
391
530
|
response_format=response_format,
|
|
392
531
|
**generate_kwargs,
|
|
@@ -409,19 +548,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
409
548
|
response_format: str = "url",
|
|
410
549
|
**kwargs,
|
|
411
550
|
):
|
|
412
|
-
if "controlnet"
|
|
551
|
+
if self._kwargs.get("controlnet"):
|
|
413
552
|
model = self._model
|
|
414
553
|
else:
|
|
415
|
-
|
|
554
|
+
ability = "image2image"
|
|
555
|
+
if ability not in self._abilities:
|
|
416
556
|
raise RuntimeError(f"{self._model_uid} does not support image2image")
|
|
417
|
-
|
|
418
|
-
model = self._i2i_model
|
|
419
|
-
else:
|
|
420
|
-
from diffusers import AutoPipelineForImage2Image
|
|
421
|
-
|
|
422
|
-
self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
|
|
423
|
-
self._model
|
|
424
|
-
)
|
|
557
|
+
model = self._get_model(ability)
|
|
425
558
|
|
|
426
559
|
if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
|
|
427
560
|
# Model like SD3 image to image requires image's height and width is times of 16
|
|
@@ -462,24 +595,23 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
462
595
|
response_format: str = "url",
|
|
463
596
|
**kwargs,
|
|
464
597
|
):
|
|
465
|
-
|
|
598
|
+
ability = "inpainting"
|
|
599
|
+
if ability not in self._abilities:
|
|
466
600
|
raise RuntimeError(f"{self._model_uid} does not support inpainting")
|
|
467
601
|
|
|
468
602
|
if (
|
|
469
603
|
"text2image" in self._abilities or "image2image" in self._abilities
|
|
470
604
|
) and self._model is not None:
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
if self._inpainting_model is not None:
|
|
474
|
-
model = self._inpainting_model
|
|
475
|
-
else:
|
|
476
|
-
model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
|
|
477
|
-
self._model
|
|
478
|
-
)
|
|
605
|
+
model = self._get_model(ability)
|
|
479
606
|
else:
|
|
480
607
|
model = self._model
|
|
481
608
|
|
|
482
|
-
|
|
609
|
+
if mask_blur := kwargs.pop("mask_blur", None):
|
|
610
|
+
logger.debug("Process mask image with mask_blur: %s", mask_blur)
|
|
611
|
+
mask_image = model.mask_processor.blur(mask_image, blur_factor=mask_blur) # type: ignore
|
|
612
|
+
|
|
613
|
+
if "width" not in kwargs:
|
|
614
|
+
kwargs["width"], kwargs["height"] = map(int, re.split(r"[^\d]+", size))
|
|
483
615
|
|
|
484
616
|
if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
|
|
485
617
|
# Model like SD3 inpainting requires image's height and width is times of 16
|
|
@@ -492,14 +624,12 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
492
624
|
mask_image, multiple=int(padding_image_to_multiple)
|
|
493
625
|
)
|
|
494
626
|
# calculate actual image size after padding
|
|
495
|
-
width, height = image.size
|
|
627
|
+
kwargs["width"], kwargs["height"] = image.size
|
|
496
628
|
|
|
497
629
|
return self._call_model(
|
|
498
630
|
image=image,
|
|
499
631
|
mask_image=mask_image,
|
|
500
632
|
prompt=prompt,
|
|
501
|
-
height=height,
|
|
502
|
-
width=width,
|
|
503
633
|
num_images_per_prompt=n,
|
|
504
634
|
response_format=response_format,
|
|
505
635
|
model=model,
|