xinference 0.15.3__py3-none-any.whl → 0.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +29 -2
  3. xinference/client/restful/restful_client.py +10 -0
  4. xinference/constants.py +4 -0
  5. xinference/core/image_interface.py +76 -23
  6. xinference/core/model.py +80 -39
  7. xinference/core/progress_tracker.py +187 -0
  8. xinference/core/supervisor.py +11 -0
  9. xinference/core/worker.py +1 -0
  10. xinference/model/audio/chattts.py +2 -1
  11. xinference/model/audio/core.py +0 -2
  12. xinference/model/audio/model_spec.json +8 -0
  13. xinference/model/audio/model_spec_modelscope.json +9 -0
  14. xinference/model/image/core.py +6 -7
  15. xinference/model/image/sdapi.py +35 -4
  16. xinference/model/image/stable_diffusion/core.py +208 -78
  17. xinference/model/llm/llm_family.json +16 -16
  18. xinference/model/llm/llm_family_modelscope.json +16 -12
  19. xinference/model/llm/transformers/cogvlm2.py +2 -1
  20. xinference/model/llm/transformers/cogvlm2_video.py +2 -0
  21. xinference/model/llm/transformers/core.py +6 -2
  22. xinference/model/llm/transformers/deepseek_vl.py +2 -0
  23. xinference/model/llm/transformers/glm4v.py +2 -1
  24. xinference/model/llm/transformers/intern_vl.py +2 -0
  25. xinference/model/llm/transformers/minicpmv25.py +2 -0
  26. xinference/model/llm/transformers/minicpmv26.py +2 -0
  27. xinference/model/llm/transformers/omnilmm.py +2 -0
  28. xinference/model/llm/transformers/qwen2_audio.py +11 -4
  29. xinference/model/llm/transformers/qwen2_vl.py +2 -28
  30. xinference/model/llm/transformers/qwen_vl.py +2 -1
  31. xinference/model/llm/transformers/utils.py +35 -2
  32. xinference/model/llm/transformers/yi_vl.py +2 -0
  33. xinference/model/llm/utils.py +58 -14
  34. xinference/model/llm/vllm/core.py +52 -8
  35. xinference/model/llm/vllm/utils.py +0 -1
  36. xinference/model/utils.py +7 -4
  37. xinference/model/video/core.py +0 -2
  38. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/METADATA +3 -3
  39. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/RECORD +43 -42
  40. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
  41. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
  42. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
  43. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
@@ -71,6 +71,14 @@
71
71
  "model_ability": "audio-to-text",
72
72
  "multilingual": true
73
73
  },
74
+ {
75
+ "model_name": "whisper-large-v3-turbo",
76
+ "model_family": "whisper",
77
+ "model_id": "openai/whisper-large-v3-turbo",
78
+ "model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
79
+ "model_ability": "audio-to-text",
80
+ "multilingual": true
81
+ },
74
82
  {
75
83
  "model_name": "Belle-distilwhisper-large-v2-zh",
76
84
  "model_family": "whisper",
@@ -8,6 +8,15 @@
8
8
  "model_ability": "audio-to-text",
9
9
  "multilingual": true
10
10
  },
11
+ {
12
+ "model_name": "whisper-large-v3-turbo",
13
+ "model_family": "whisper",
14
+ "model_hub": "modelscope",
15
+ "model_id": "AI-ModelScope/whisper-large-v3-turbo",
16
+ "model_revision": "master",
17
+ "model_ability": "audio-to-text",
18
+ "multilingual": true
19
+ },
11
20
  {
12
21
  "model_name": "SenseVoiceSmall",
13
22
  "model_family": "funasr",
@@ -23,8 +23,6 @@ from ..core import CacheableModelSpec, ModelDescription
23
23
  from ..utils import valid_model_revision
24
24
  from .stable_diffusion.core import DiffusionModel
25
25
 
26
- MAX_ATTEMPTS = 3
27
-
28
26
  logger = logging.getLogger(__name__)
29
27
 
30
28
  MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
@@ -210,18 +208,19 @@ def create_image_model_instance(
210
208
  for name in controlnet:
211
209
  for cn_model_spec in model_spec.controlnet:
212
210
  if cn_model_spec.model_name == name:
213
- if not model_path:
214
- model_path = cache(cn_model_spec)
215
- controlnet_model_paths.append(model_path)
211
+ controlnet_model_path = cache(cn_model_spec)
212
+ controlnet_model_paths.append(controlnet_model_path)
216
213
  break
217
214
  else:
218
215
  raise ValueError(
219
216
  f"controlnet `{name}` is not supported for model `{model_name}`."
220
217
  )
221
218
  if len(controlnet_model_paths) == 1:
222
- kwargs["controlnet"] = controlnet_model_paths[0]
219
+ kwargs["controlnet"] = (controlnet[0], controlnet_model_paths[0])
223
220
  else:
224
- kwargs["controlnet"] = controlnet_model_paths
221
+ kwargs["controlnet"] = [
222
+ (n, path) for n, path in zip(controlnet, controlnet_model_paths)
223
+ ]
225
224
  if not model_path:
226
225
  model_path = cache(model_spec)
227
226
  if peft_model_config is not None:
@@ -11,11 +11,12 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import base64
15
16
  import io
16
17
  import warnings
17
18
 
18
- from PIL import Image
19
+ from PIL import Image, ImageOps
19
20
 
20
21
 
21
22
  class SDAPIToDiffusersConverter:
@@ -26,11 +27,12 @@ class SDAPIToDiffusersConverter:
26
27
  "width",
27
28
  "height",
28
29
  "sampler_name",
30
+ "progressor",
29
31
  }
30
32
  txt2img_arg_mapping = {
31
33
  "steps": "num_inference_steps",
32
34
  "cfg_scale": "guidance_scale",
33
- # "denoising_strength": "strength",
35
+ "denoising_strength": "strength",
34
36
  }
35
37
  img2img_identical_args = {
36
38
  "prompt",
@@ -39,12 +41,15 @@ class SDAPIToDiffusersConverter:
39
41
  "width",
40
42
  "height",
41
43
  "sampler_name",
44
+ "progressor",
42
45
  }
43
46
  img2img_arg_mapping = {
44
47
  "init_images": "image",
48
+ "mask": "mask_image",
45
49
  "steps": "num_inference_steps",
46
50
  "cfg_scale": "guidance_scale",
47
51
  "denoising_strength": "strength",
52
+ "inpaint_full_res_padding": "padding_mask_crop",
48
53
  }
49
54
 
50
55
  @staticmethod
@@ -121,12 +126,38 @@ class SDAPIDiffusionModelMixin:
121
126
 
122
127
  def img2img(self, **kwargs):
123
128
  init_images = kwargs.pop("init_images", [])
124
- kwargs["init_images"] = [self._decode_b64_img(i) for i in init_images]
129
+ kwargs["init_images"] = init_images = [
130
+ self._decode_b64_img(i) for i in init_images
131
+ ]
132
+ if len(init_images) == 1:
133
+ kwargs["init_images"] = init_images[0]
134
+ mask_image = kwargs.pop("mask", None)
135
+ if mask_image:
136
+ if kwargs.pop("inpainting_mask_invert"):
137
+ mask_image = ImageOps.invert(mask_image)
138
+
139
+ kwargs["mask"] = self._decode_b64_img(mask_image)
140
+
141
+ # process inpaint_full_res and inpaint_full_res_padding
142
+ if kwargs.pop("inpaint_full_res", None):
143
+ kwargs["inpaint_full_res_padding"] = kwargs.pop(
144
+ "inpaint_full_res_padding", 0
145
+ )
146
+ else:
147
+ # inpaint_full_res_padding is turned `into padding_mask_crop`
148
+ # in diffusers, if padding_mask_crop is passed, it will do inpaint_full_res
149
+ # so if not inpaint_full_rs, we need to pop this option
150
+ kwargs.pop("inpaint_full_res_padding", None)
151
+
125
152
  clip_skip = kwargs.get("override_settings", {}).get("clip_skip")
126
153
  converted_kwargs = self._check_kwargs("img2img", kwargs)
127
154
  if clip_skip:
128
155
  converted_kwargs["clip_skip"] = clip_skip
129
- result = self.image_to_image(response_format="b64_json", **converted_kwargs) # type: ignore
156
+
157
+ if not converted_kwargs.get("mask_image"):
158
+ result = self.image_to_image(response_format="b64_json", **converted_kwargs) # type: ignore
159
+ else:
160
+ result = self.inpainting(response_format="b64_json", **converted_kwargs) # type: ignore
130
161
 
131
162
  # convert to SD API result
132
163
  return {
@@ -14,7 +14,9 @@
14
14
 
15
15
  import base64
16
16
  import contextlib
17
+ import gc
17
18
  import inspect
19
+ import itertools
18
20
  import logging
19
21
  import os
20
22
  import re
@@ -25,7 +27,7 @@ import warnings
25
27
  from concurrent.futures import ThreadPoolExecutor
26
28
  from functools import partial
27
29
  from io import BytesIO
28
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
30
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
29
31
 
30
32
  import PIL.Image
31
33
  import torch
@@ -37,6 +39,7 @@ from ....types import Image, ImageList, LoRA
37
39
  from ..sdapi import SDAPIDiffusionModelMixin
38
40
 
39
41
  if TYPE_CHECKING:
42
+ from ....core.progress_tracker import Progressor
40
43
  from ..core import ImageModelFamilyV1
41
44
 
42
45
  logger = logging.getLogger(__name__)
@@ -93,16 +96,21 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
93
96
  self._model_uid = model_uid
94
97
  self._model_path = model_path
95
98
  self._device = device
96
- # when a model has text2image ability,
97
- # it will be loaded as AutoPipelineForText2Image
98
- # for image2image and inpainting,
99
- # we convert to the corresponding model
99
+ # model info when loading
100
100
  self._model = None
101
- self._i2i_model = None # image to image model
102
- self._inpainting_model = None # inpainting model
103
101
  self._lora_model = lora_model
104
102
  self._lora_load_kwargs = lora_load_kwargs or {}
105
103
  self._lora_fuse_kwargs = lora_fuse_kwargs or {}
104
+ # deepcache
105
+ self._deepcache_helper = None
106
+ # when a model has text2image ability,
107
+ # it will be loaded as AutoPipelineForText2Image
108
+ # for image2image and inpainting,
109
+ # we convert to the corresponding model
110
+ self._torch_dtype = None
111
+ self._ability_to_models: Dict[Tuple[str, Any], Any] = {}
112
+ self._controlnet_models: Dict[str, Any] = {}
113
+ # info
106
114
  self._model_spec = model_spec
107
115
  self._abilities = model_spec.model_ability or [] # type: ignore
108
116
  self._kwargs = kwargs
@@ -111,6 +119,63 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
111
119
  def model_ability(self):
112
120
  return self._abilities
113
121
 
122
+ @staticmethod
123
+ def _get_pipeline_type(ability: str) -> type:
124
+ if ability == "text2image":
125
+ from diffusers import AutoPipelineForText2Image as AutoPipelineModel
126
+ elif ability == "image2image":
127
+ from diffusers import AutoPipelineForImage2Image as AutoPipelineModel
128
+ elif ability == "inpainting":
129
+ from diffusers import AutoPipelineForInpainting as AutoPipelineModel
130
+ else:
131
+ raise ValueError(f"Unknown ability: {ability}")
132
+ return AutoPipelineModel
133
+
134
+ def _get_controlnet_model(self, name: str, path: str):
135
+ from diffusers import ControlNetModel
136
+
137
+ try:
138
+ return self._controlnet_models[name]
139
+ except KeyError:
140
+ logger.debug("Loading controlnet %s, from %s", name, path)
141
+ model = ControlNetModel.from_pretrained(path, torch_dtype=self._torch_dtype)
142
+ self._controlnet_models[name] = model
143
+ return model
144
+
145
+ def _get_model(
146
+ self,
147
+ ability: str,
148
+ controlnet_name: Optional[Union[str, List[str]]] = None,
149
+ controlnet_path: Optional[Union[str, List[str]]] = None,
150
+ ):
151
+ try:
152
+ return self._ability_to_models[ability, controlnet_name]
153
+ except KeyError:
154
+ model_type = self._get_pipeline_type(ability)
155
+
156
+ assert self._model is not None
157
+
158
+ if controlnet_name:
159
+ assert controlnet_path
160
+ if isinstance(controlnet_name, (list, tuple)):
161
+ controlnet = []
162
+ # multiple controlnet
163
+ for name, path in itertools.zip_longest(
164
+ controlnet_name, controlnet_path
165
+ ):
166
+ controlnet.append(self._get_controlnet_model(name, path))
167
+ else:
168
+ controlnet = self._get_controlnet_model(
169
+ controlnet_name, controlnet_path
170
+ )
171
+ model = model_type.from_pipe(self._model, controlnet=controlnet)
172
+ else:
173
+ model = model_type.from_pipe(self._model)
174
+ self._load_to_device(model)
175
+
176
+ self._ability_to_models[ability, controlnet_name] = model
177
+ return model
178
+
114
179
  def _apply_lora(self):
115
180
  if self._lora_model is not None:
116
181
  logger.info(
@@ -132,22 +197,24 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
132
197
  else:
133
198
  raise ValueError(f"Unknown ability: {self._abilities}")
134
199
 
135
- controlnet = self._kwargs.get("controlnet")
136
- if controlnet is not None:
137
- from diffusers import ControlNetModel
138
-
139
- logger.debug("Loading controlnet %s", controlnet)
140
- self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
141
-
142
- torch_dtype = self._kwargs.get("torch_dtype")
200
+ self._torch_dtype = torch_dtype = self._kwargs.get("torch_dtype")
143
201
  if sys.platform != "darwin" and torch_dtype is None:
144
202
  # The following params crashes on Mac M2
145
- self._kwargs["torch_dtype"] = torch.float16
203
+ self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
146
204
  self._kwargs["variant"] = "fp16"
147
205
  self._kwargs["use_safetensors"] = True
148
206
  if isinstance(torch_dtype, str):
149
207
  self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
150
208
 
209
+ controlnet = self._kwargs.get("controlnet")
210
+ if controlnet is not None:
211
+ if isinstance(controlnet, tuple):
212
+ self._kwargs["controlnet"] = self._get_controlnet_model(*controlnet)
213
+ else:
214
+ self._kwargs["controlnet"] = [
215
+ self._get_controlnet_model(*cn) for cn in controlnet
216
+ ]
217
+
151
218
  quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
152
219
  if quantize_text_encoder:
153
220
  try:
@@ -193,27 +260,42 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
193
260
  self._model_path,
194
261
  **self._kwargs,
195
262
  )
196
- if self._kwargs.get("deepcache", True):
197
- # NOTE: DeepCache should be loaded first before cpu_offloading
263
+ self._load_to_device(self._model)
264
+ self._apply_lora()
265
+
266
+ if self._kwargs.get("deepcache", False):
198
267
  try:
199
268
  from DeepCache import DeepCacheSDHelper
200
-
201
- helper = DeepCacheSDHelper(pipe=self._model)
202
- helper.set_params(cache_interval=3, cache_branch_id=0)
203
- helper.enable()
204
269
  except ImportError:
205
- logger.debug("deepcache is not installed")
206
- pass
270
+ error_message = "Failed to import module 'deepcache' when you launch with deepcache=True"
271
+ installation_guide = [
272
+ "Please make sure 'deepcache' is installed. ",
273
+ "You can install it by `pip install deepcache`\n",
274
+ ]
207
275
 
276
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
277
+ else:
278
+ self._deepcache_helper = helper = DeepCacheSDHelper()
279
+ helper.set_params(
280
+ cache_interval=self._kwargs.get("deepcache_cache_interval", 3),
281
+ cache_branch_id=self._kwargs.get("deepcache_cache_branch_id", 0),
282
+ )
283
+
284
+ def _load_to_device(self, model):
208
285
  if self._kwargs.get("cpu_offload", False):
209
286
  logger.debug("CPU offloading model")
210
- self._model.enable_model_cpu_offload()
287
+ model.enable_model_cpu_offload()
288
+ elif self._kwargs.get("sequential_cpu_offload", False):
289
+ logger.debug("CPU sequential offloading model")
290
+ model.enable_sequential_cpu_offload()
211
291
  elif not self._kwargs.get("device_map"):
212
292
  logger.debug("Loading model to available device")
213
- self._model = move_model_to_available_device(self._model)
293
+ model = move_model_to_available_device(self._model)
214
294
  # Recommended if your computer has < 64 GB of RAM
215
- self._model.enable_attention_slicing()
216
- self._apply_lora()
295
+ if self._kwargs.get("attention_slicing", True):
296
+ model.enable_attention_slicing()
297
+ if self._kwargs.get("vae_tiling", False):
298
+ model.enable_vae_tiling()
217
299
 
218
300
  @staticmethod
219
301
  def _get_scheduler(model: Any, sampler_name: str):
@@ -224,61 +306,78 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
224
306
 
225
307
  import diffusers
226
308
 
309
+ kwargs = {}
310
+ if (
311
+ sampler_name.startswith("DPM++")
312
+ and "final_sigmas_type" not in model.scheduler.config
313
+ ):
314
+ # `final_sigmas_type` will be set as `zero` by default which will cause error
315
+ kwargs["final_sigmas_type"] = "sigma_min"
316
+
227
317
  # see https://github.com/huggingface/diffusers/issues/4167
228
318
  # to get A1111 <> Diffusers Scheduler mapping
229
319
  if sampler_name == "DPM++ 2M":
230
320
  return diffusers.DPMSolverMultistepScheduler.from_config(
231
- model.scheduler.config
321
+ model.scheduler.config, **kwargs
232
322
  )
233
323
  elif sampler_name == "DPM++ 2M Karras":
234
324
  return diffusers.DPMSolverMultistepScheduler.from_config(
235
- model.scheduler.config, use_karras_sigmas=True
325
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
236
326
  )
237
327
  elif sampler_name == "DPM++ 2M SDE":
238
328
  return diffusers.DPMSolverMultistepScheduler.from_config(
239
- model.scheduler.config, algorithm_type="sde-dpmsolver++"
329
+ model.scheduler.config, algorithm_type="sde-dpmsolver++", **kwargs
240
330
  )
241
331
  elif sampler_name == "DPM++ 2M SDE Karras":
242
332
  return diffusers.DPMSolverMultistepScheduler.from_config(
243
333
  model.scheduler.config,
244
334
  algorithm_type="sde-dpmsolver++",
245
335
  use_karras_sigmas=True,
336
+ **kwargs,
246
337
  )
247
338
  elif sampler_name == "DPM++ SDE":
248
339
  return diffusers.DPMSolverSinglestepScheduler.from_config(
249
- model.scheduler.config
340
+ model.scheduler.config, **kwargs
250
341
  )
251
342
  elif sampler_name == "DPM++ SDE Karras":
252
343
  return diffusers.DPMSolverSinglestepScheduler.from_config(
253
- model.scheduler.config, use_karras_sigmas=True
344
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
254
345
  )
255
346
  elif sampler_name == "DPM2":
256
- return diffusers.KDPM2DiscreteScheduler.from_config(model.scheduler.config)
347
+ return diffusers.KDPM2DiscreteScheduler.from_config(
348
+ model.scheduler.config, **kwargs
349
+ )
257
350
  elif sampler_name == "DPM2 Karras":
258
351
  return diffusers.KDPM2DiscreteScheduler.from_config(
259
- model.scheduler.config, use_karras_sigmas=True
352
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
260
353
  )
261
354
  elif sampler_name == "DPM2 a":
262
355
  return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
263
- model.scheduler.config
356
+ model.scheduler.config, **kwargs
264
357
  )
265
358
  elif sampler_name == "DPM2 a Karras":
266
359
  return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
267
- model.scheduler.config, use_karras_sigmas=True
360
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
268
361
  )
269
362
  elif sampler_name == "Euler":
270
- return diffusers.EulerDiscreteScheduler.from_config(model.scheduler.config)
363
+ return diffusers.EulerDiscreteScheduler.from_config(
364
+ model.scheduler.config, **kwargs
365
+ )
271
366
  elif sampler_name == "Euler a":
272
367
  return diffusers.EulerAncestralDiscreteScheduler.from_config(
273
- model.scheduler.config
368
+ model.scheduler.config, **kwargs
274
369
  )
275
370
  elif sampler_name == "Heun":
276
- return diffusers.HeunDiscreteScheduler.from_config(model.scheduler.config)
371
+ return diffusers.HeunDiscreteScheduler.from_config(
372
+ model.scheduler.config, **kwargs
373
+ )
277
374
  elif sampler_name == "LMS":
278
- return diffusers.LMSDiscreteScheduler.from_config(model.scheduler.config)
375
+ return diffusers.LMSDiscreteScheduler.from_config(
376
+ model.scheduler.config, **kwargs
377
+ )
279
378
  elif sampler_name == "LMS Karras":
280
379
  return diffusers.LMSDiscreteScheduler.from_config(
281
- model.scheduler.config, use_karras_sigmas=True
380
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
282
381
  )
283
382
  else:
284
383
  raise ValueError(f"Unknown sampler: {sampler_name}")
@@ -298,27 +397,70 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
298
397
  else:
299
398
  yield
300
399
 
400
+ @staticmethod
401
+ @contextlib.contextmanager
402
+ def _release_after():
403
+ from ....device_utils import empty_cache
404
+
405
+ try:
406
+ yield
407
+ finally:
408
+ gc.collect()
409
+ empty_cache()
410
+
411
+ @contextlib.contextmanager
412
+ def _wrap_deepcache(self, model: Any):
413
+ if self._deepcache_helper:
414
+ self._deepcache_helper.pipe = model
415
+ self._deepcache_helper.enable()
416
+ try:
417
+ yield
418
+ finally:
419
+ if self._deepcache_helper:
420
+ self._deepcache_helper.disable()
421
+ self._deepcache_helper.pipe = None
422
+
423
+ @staticmethod
424
+ def _process_progressor(kwargs: dict):
425
+ import diffusers
426
+
427
+ progressor: Progressor = kwargs.pop("progressor", None)
428
+
429
+ def report_status_callback(
430
+ pipe: diffusers.DiffusionPipeline,
431
+ step: int,
432
+ timestep: int,
433
+ callback_kwargs: dict,
434
+ ):
435
+ num_steps = pipe.num_timesteps
436
+ progressor.set_progress((step + 1) / num_steps)
437
+
438
+ return callback_kwargs
439
+
440
+ if progressor and progressor.request_id:
441
+ kwargs["callback_on_step_end"] = report_status_callback
442
+
301
443
  def _call_model(
302
444
  self,
303
445
  response_format: str,
304
446
  model=None,
305
447
  **kwargs,
306
448
  ):
307
- import gc
308
-
309
- from ....device_utils import empty_cache
310
-
311
449
  model = model if model is not None else self._model
312
450
  is_padded = kwargs.pop("is_padded", None)
313
451
  origin_size = kwargs.pop("origin_size", None)
314
452
  seed = kwargs.pop("seed", None)
315
- if seed is not None:
453
+ return_images = kwargs.pop("_return_images", None)
454
+ if seed is not None and seed != -1:
316
455
  kwargs["generator"] = generator = torch.Generator(device=get_available_device()) # type: ignore
317
456
  if seed != -1:
318
457
  kwargs["generator"] = generator.manual_seed(seed)
319
458
  sampler_name = kwargs.pop("sampler_name", None)
459
+ self._process_progressor(kwargs)
320
460
  assert callable(model)
321
- with self._reset_when_done(model, sampler_name):
461
+ with self._reset_when_done(
462
+ model, sampler_name
463
+ ), self._release_after(), self._wrap_deepcache(model):
322
464
  logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
323
465
  self._filter_kwargs(model, kwargs)
324
466
  images = model(**kwargs).images
@@ -331,9 +473,8 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
331
473
  new_images.append(img.crop((0, 0, x, y)))
332
474
  images = new_images
333
475
 
334
- # clean cache
335
- gc.collect()
336
- empty_cache()
476
+ if return_images:
477
+ return images
337
478
 
338
479
  if response_format == "url":
339
480
  os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
@@ -378,15 +519,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
378
519
  response_format: str = "url",
379
520
  **kwargs,
380
521
  ):
381
- # References:
382
- # https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
383
522
  width, height = map(int, re.split(r"[^\d]+", size))
384
523
  generate_kwargs = self._model_spec.default_generate_config.copy() # type: ignore
385
524
  generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
525
+ generate_kwargs["width"], generate_kwargs["height"] = width, height
526
+
386
527
  return self._call_model(
387
528
  prompt=prompt,
388
- height=height,
389
- width=width,
390
529
  num_images_per_prompt=n,
391
530
  response_format=response_format,
392
531
  **generate_kwargs,
@@ -409,19 +548,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
409
548
  response_format: str = "url",
410
549
  **kwargs,
411
550
  ):
412
- if "controlnet" in self._kwargs:
551
+ if self._kwargs.get("controlnet"):
413
552
  model = self._model
414
553
  else:
415
- if "image2image" not in self._abilities:
554
+ ability = "image2image"
555
+ if ability not in self._abilities:
416
556
  raise RuntimeError(f"{self._model_uid} does not support image2image")
417
- if self._i2i_model is not None:
418
- model = self._i2i_model
419
- else:
420
- from diffusers import AutoPipelineForImage2Image
421
-
422
- self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
423
- self._model
424
- )
557
+ model = self._get_model(ability)
425
558
 
426
559
  if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
427
560
  # Model like SD3 image to image requires image's height and width is times of 16
@@ -462,24 +595,23 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
462
595
  response_format: str = "url",
463
596
  **kwargs,
464
597
  ):
465
- if "inpainting" not in self._abilities:
598
+ ability = "inpainting"
599
+ if ability not in self._abilities:
466
600
  raise RuntimeError(f"{self._model_uid} does not support inpainting")
467
601
 
468
602
  if (
469
603
  "text2image" in self._abilities or "image2image" in self._abilities
470
604
  ) and self._model is not None:
471
- from diffusers import AutoPipelineForInpainting
472
-
473
- if self._inpainting_model is not None:
474
- model = self._inpainting_model
475
- else:
476
- model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
477
- self._model
478
- )
605
+ model = self._get_model(ability)
479
606
  else:
480
607
  model = self._model
481
608
 
482
- width, height = map(int, re.split(r"[^\d]+", size))
609
+ if mask_blur := kwargs.pop("mask_blur", None):
610
+ logger.debug("Process mask image with mask_blur: %s", mask_blur)
611
+ mask_image = model.mask_processor.blur(mask_image, blur_factor=mask_blur) # type: ignore
612
+
613
+ if "width" not in kwargs:
614
+ kwargs["width"], kwargs["height"] = map(int, re.split(r"[^\d]+", size))
483
615
 
484
616
  if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
485
617
  # Model like SD3 inpainting requires image's height and width is times of 16
@@ -492,14 +624,12 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
492
624
  mask_image, multiple=int(padding_image_to_multiple)
493
625
  )
494
626
  # calculate actual image size after padding
495
- width, height = image.size
627
+ kwargs["width"], kwargs["height"] = image.size
496
628
 
497
629
  return self._call_model(
498
630
  image=image,
499
631
  mask_image=mask_image,
500
632
  prompt=prompt,
501
- height=height,
502
- width=width,
503
633
  num_images_per_prompt=n,
504
634
  response_format=response_format,
505
635
  model=model,