xinference 0.15.3__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (65) hide show
  1. xinference/__init__.py +0 -4
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +29 -2
  4. xinference/client/restful/restful_client.py +10 -0
  5. xinference/constants.py +7 -3
  6. xinference/core/image_interface.py +76 -23
  7. xinference/core/model.py +158 -46
  8. xinference/core/progress_tracker.py +187 -0
  9. xinference/core/scheduler.py +10 -7
  10. xinference/core/supervisor.py +11 -0
  11. xinference/core/utils.py +9 -0
  12. xinference/core/worker.py +1 -0
  13. xinference/deploy/supervisor.py +4 -0
  14. xinference/model/__init__.py +4 -0
  15. xinference/model/audio/chattts.py +2 -1
  16. xinference/model/audio/core.py +0 -2
  17. xinference/model/audio/model_spec.json +8 -0
  18. xinference/model/audio/model_spec_modelscope.json +9 -0
  19. xinference/model/image/core.py +6 -7
  20. xinference/model/image/scheduler/__init__.py +13 -0
  21. xinference/model/image/scheduler/flux.py +533 -0
  22. xinference/model/image/sdapi.py +35 -4
  23. xinference/model/image/stable_diffusion/core.py +215 -110
  24. xinference/model/image/utils.py +39 -3
  25. xinference/model/llm/__init__.py +2 -0
  26. xinference/model/llm/llm_family.json +185 -17
  27. xinference/model/llm/llm_family_modelscope.json +124 -12
  28. xinference/model/llm/transformers/chatglm.py +104 -0
  29. xinference/model/llm/transformers/cogvlm2.py +2 -1
  30. xinference/model/llm/transformers/cogvlm2_video.py +2 -0
  31. xinference/model/llm/transformers/core.py +43 -113
  32. xinference/model/llm/transformers/deepseek_v2.py +0 -226
  33. xinference/model/llm/transformers/deepseek_vl.py +2 -0
  34. xinference/model/llm/transformers/glm4v.py +2 -1
  35. xinference/model/llm/transformers/intern_vl.py +2 -0
  36. xinference/model/llm/transformers/internlm2.py +3 -95
  37. xinference/model/llm/transformers/minicpmv25.py +2 -0
  38. xinference/model/llm/transformers/minicpmv26.py +2 -0
  39. xinference/model/llm/transformers/omnilmm.py +2 -0
  40. xinference/model/llm/transformers/opt.py +68 -0
  41. xinference/model/llm/transformers/qwen2_audio.py +11 -4
  42. xinference/model/llm/transformers/qwen2_vl.py +2 -28
  43. xinference/model/llm/transformers/qwen_vl.py +2 -1
  44. xinference/model/llm/transformers/utils.py +36 -283
  45. xinference/model/llm/transformers/yi_vl.py +2 -0
  46. xinference/model/llm/utils.py +60 -16
  47. xinference/model/llm/vllm/core.py +68 -9
  48. xinference/model/llm/vllm/utils.py +0 -1
  49. xinference/model/utils.py +7 -4
  50. xinference/model/video/core.py +0 -2
  51. xinference/utils.py +2 -3
  52. xinference/web/ui/build/asset-manifest.json +3 -3
  53. xinference/web/ui/build/index.html +1 -1
  54. xinference/web/ui/build/static/js/{main.e51a356d.js → main.f7da0140.js} +3 -3
  55. xinference/web/ui/build/static/js/main.f7da0140.js.map +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
  57. {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/METADATA +38 -6
  58. {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/RECORD +63 -59
  59. xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
  60. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
  61. /xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.f7da0140.js.LICENSE.txt} +0 -0
  62. {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/LICENSE +0 -0
  63. {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/WHEEL +0 -0
  64. {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/entry_points.txt +0 -0
  65. {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/top_level.txt +0 -0
@@ -12,31 +12,27 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import base64
16
15
  import contextlib
16
+ import gc
17
17
  import inspect
18
+ import itertools
18
19
  import logging
19
- import os
20
20
  import re
21
21
  import sys
22
- import time
23
- import uuid
24
22
  import warnings
25
- from concurrent.futures import ThreadPoolExecutor
26
- from functools import partial
27
- from io import BytesIO
28
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
23
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
29
24
 
30
25
  import PIL.Image
31
26
  import torch
32
27
  from PIL import ImageOps
33
28
 
34
- from ....constants import XINFERENCE_IMAGE_DIR
35
29
  from ....device_utils import get_available_device, move_model_to_available_device
36
- from ....types import Image, ImageList, LoRA
30
+ from ....types import LoRA
37
31
  from ..sdapi import SDAPIDiffusionModelMixin
32
+ from ..utils import handle_image_result
38
33
 
39
34
  if TYPE_CHECKING:
35
+ from ....core.progress_tracker import Progressor
40
36
  from ..core import ImageModelFamilyV1
41
37
 
42
38
  logger = logging.getLogger(__name__)
@@ -93,16 +89,21 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
93
89
  self._model_uid = model_uid
94
90
  self._model_path = model_path
95
91
  self._device = device
96
- # when a model has text2image ability,
97
- # it will be loaded as AutoPipelineForText2Image
98
- # for image2image and inpainting,
99
- # we convert to the corresponding model
92
+ # model info when loading
100
93
  self._model = None
101
- self._i2i_model = None # image to image model
102
- self._inpainting_model = None # inpainting model
103
94
  self._lora_model = lora_model
104
95
  self._lora_load_kwargs = lora_load_kwargs or {}
105
96
  self._lora_fuse_kwargs = lora_fuse_kwargs or {}
97
+ # deepcache
98
+ self._deepcache_helper = None
99
+ # when a model has text2image ability,
100
+ # it will be loaded as AutoPipelineForText2Image
101
+ # for image2image and inpainting,
102
+ # we convert to the corresponding model
103
+ self._torch_dtype = None
104
+ self._ability_to_models: Dict[Tuple[str, Any], Any] = {}
105
+ self._controlnet_models: Dict[str, Any] = {}
106
+ # info
106
107
  self._model_spec = model_spec
107
108
  self._abilities = model_spec.model_ability or [] # type: ignore
108
109
  self._kwargs = kwargs
@@ -111,6 +112,63 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
111
112
  def model_ability(self):
112
113
  return self._abilities
113
114
 
115
+ @staticmethod
116
+ def _get_pipeline_type(ability: str) -> type:
117
+ if ability == "text2image":
118
+ from diffusers import AutoPipelineForText2Image as AutoPipelineModel
119
+ elif ability == "image2image":
120
+ from diffusers import AutoPipelineForImage2Image as AutoPipelineModel
121
+ elif ability == "inpainting":
122
+ from diffusers import AutoPipelineForInpainting as AutoPipelineModel
123
+ else:
124
+ raise ValueError(f"Unknown ability: {ability}")
125
+ return AutoPipelineModel
126
+
127
+ def _get_controlnet_model(self, name: str, path: str):
128
+ from diffusers import ControlNetModel
129
+
130
+ try:
131
+ return self._controlnet_models[name]
132
+ except KeyError:
133
+ logger.debug("Loading controlnet %s, from %s", name, path)
134
+ model = ControlNetModel.from_pretrained(path, torch_dtype=self._torch_dtype)
135
+ self._controlnet_models[name] = model
136
+ return model
137
+
138
+ def _get_model(
139
+ self,
140
+ ability: str,
141
+ controlnet_name: Optional[Union[str, List[str]]] = None,
142
+ controlnet_path: Optional[Union[str, List[str]]] = None,
143
+ ):
144
+ try:
145
+ return self._ability_to_models[ability, controlnet_name]
146
+ except KeyError:
147
+ model_type = self._get_pipeline_type(ability)
148
+
149
+ assert self._model is not None
150
+
151
+ if controlnet_name:
152
+ assert controlnet_path
153
+ if isinstance(controlnet_name, (list, tuple)):
154
+ controlnet = []
155
+ # multiple controlnet
156
+ for name, path in itertools.zip_longest(
157
+ controlnet_name, controlnet_path
158
+ ):
159
+ controlnet.append(self._get_controlnet_model(name, path))
160
+ else:
161
+ controlnet = self._get_controlnet_model(
162
+ controlnet_name, controlnet_path
163
+ )
164
+ model = model_type.from_pipe(self._model, controlnet=controlnet)
165
+ else:
166
+ model = model_type.from_pipe(self._model)
167
+ self._load_to_device(model)
168
+
169
+ self._ability_to_models[ability, controlnet_name] = model
170
+ return model
171
+
114
172
  def _apply_lora(self):
115
173
  if self._lora_model is not None:
116
174
  logger.info(
@@ -132,22 +190,24 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
132
190
  else:
133
191
  raise ValueError(f"Unknown ability: {self._abilities}")
134
192
 
135
- controlnet = self._kwargs.get("controlnet")
136
- if controlnet is not None:
137
- from diffusers import ControlNetModel
138
-
139
- logger.debug("Loading controlnet %s", controlnet)
140
- self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
141
-
142
- torch_dtype = self._kwargs.get("torch_dtype")
193
+ self._torch_dtype = torch_dtype = self._kwargs.get("torch_dtype")
143
194
  if sys.platform != "darwin" and torch_dtype is None:
144
195
  # The following params crashes on Mac M2
145
- self._kwargs["torch_dtype"] = torch.float16
196
+ self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
146
197
  self._kwargs["variant"] = "fp16"
147
198
  self._kwargs["use_safetensors"] = True
148
199
  if isinstance(torch_dtype, str):
149
200
  self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
150
201
 
202
+ controlnet = self._kwargs.get("controlnet")
203
+ if controlnet is not None:
204
+ if isinstance(controlnet, tuple):
205
+ self._kwargs["controlnet"] = self._get_controlnet_model(*controlnet)
206
+ else:
207
+ self._kwargs["controlnet"] = [
208
+ self._get_controlnet_model(*cn) for cn in controlnet
209
+ ]
210
+
151
211
  quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
152
212
  if quantize_text_encoder:
153
213
  try:
@@ -193,27 +253,45 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
193
253
  self._model_path,
194
254
  **self._kwargs,
195
255
  )
196
- if self._kwargs.get("deepcache", True):
197
- # NOTE: DeepCache should be loaded first before cpu_offloading
256
+ self._load_to_device(self._model)
257
+ self._apply_lora()
258
+
259
+ if self._kwargs.get("deepcache", False):
198
260
  try:
199
261
  from DeepCache import DeepCacheSDHelper
200
-
201
- helper = DeepCacheSDHelper(pipe=self._model)
202
- helper.set_params(cache_interval=3, cache_branch_id=0)
203
- helper.enable()
204
262
  except ImportError:
205
- logger.debug("deepcache is not installed")
206
- pass
263
+ error_message = "Failed to import module 'deepcache' when you launch with deepcache=True"
264
+ installation_guide = [
265
+ "Please make sure 'deepcache' is installed. ",
266
+ "You can install it by `pip install deepcache`\n",
267
+ ]
268
+
269
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
270
+ else:
271
+ self._deepcache_helper = helper = DeepCacheSDHelper()
272
+ helper.set_params(
273
+ cache_interval=self._kwargs.get("deepcache_cache_interval", 3),
274
+ cache_branch_id=self._kwargs.get("deepcache_cache_branch_id", 0),
275
+ )
207
276
 
277
+ def _load_to_device(self, model):
208
278
  if self._kwargs.get("cpu_offload", False):
209
279
  logger.debug("CPU offloading model")
210
- self._model.enable_model_cpu_offload()
280
+ model.enable_model_cpu_offload()
281
+ elif self._kwargs.get("sequential_cpu_offload", False):
282
+ logger.debug("CPU sequential offloading model")
283
+ model.enable_sequential_cpu_offload()
211
284
  elif not self._kwargs.get("device_map"):
212
285
  logger.debug("Loading model to available device")
213
- self._model = move_model_to_available_device(self._model)
286
+ model = move_model_to_available_device(self._model)
214
287
  # Recommended if your computer has < 64 GB of RAM
215
- self._model.enable_attention_slicing()
216
- self._apply_lora()
288
+ if self._kwargs.get("attention_slicing", True):
289
+ model.enable_attention_slicing()
290
+ if self._kwargs.get("vae_tiling", False):
291
+ model.enable_vae_tiling()
292
+
293
+ def get_max_num_images_for_batching(self):
294
+ return self._kwargs.get("max_num_images", 16)
217
295
 
218
296
  @staticmethod
219
297
  def _get_scheduler(model: Any, sampler_name: str):
@@ -224,61 +302,78 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
224
302
 
225
303
  import diffusers
226
304
 
305
+ kwargs = {}
306
+ if (
307
+ sampler_name.startswith("DPM++")
308
+ and "final_sigmas_type" not in model.scheduler.config
309
+ ):
310
+ # `final_sigmas_type` will be set as `zero` by default which will cause error
311
+ kwargs["final_sigmas_type"] = "sigma_min"
312
+
227
313
  # see https://github.com/huggingface/diffusers/issues/4167
228
314
  # to get A1111 <> Diffusers Scheduler mapping
229
315
  if sampler_name == "DPM++ 2M":
230
316
  return diffusers.DPMSolverMultistepScheduler.from_config(
231
- model.scheduler.config
317
+ model.scheduler.config, **kwargs
232
318
  )
233
319
  elif sampler_name == "DPM++ 2M Karras":
234
320
  return diffusers.DPMSolverMultistepScheduler.from_config(
235
- model.scheduler.config, use_karras_sigmas=True
321
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
236
322
  )
237
323
  elif sampler_name == "DPM++ 2M SDE":
238
324
  return diffusers.DPMSolverMultistepScheduler.from_config(
239
- model.scheduler.config, algorithm_type="sde-dpmsolver++"
325
+ model.scheduler.config, algorithm_type="sde-dpmsolver++", **kwargs
240
326
  )
241
327
  elif sampler_name == "DPM++ 2M SDE Karras":
242
328
  return diffusers.DPMSolverMultistepScheduler.from_config(
243
329
  model.scheduler.config,
244
330
  algorithm_type="sde-dpmsolver++",
245
331
  use_karras_sigmas=True,
332
+ **kwargs,
246
333
  )
247
334
  elif sampler_name == "DPM++ SDE":
248
335
  return diffusers.DPMSolverSinglestepScheduler.from_config(
249
- model.scheduler.config
336
+ model.scheduler.config, **kwargs
250
337
  )
251
338
  elif sampler_name == "DPM++ SDE Karras":
252
339
  return diffusers.DPMSolverSinglestepScheduler.from_config(
253
- model.scheduler.config, use_karras_sigmas=True
340
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
254
341
  )
255
342
  elif sampler_name == "DPM2":
256
- return diffusers.KDPM2DiscreteScheduler.from_config(model.scheduler.config)
343
+ return diffusers.KDPM2DiscreteScheduler.from_config(
344
+ model.scheduler.config, **kwargs
345
+ )
257
346
  elif sampler_name == "DPM2 Karras":
258
347
  return diffusers.KDPM2DiscreteScheduler.from_config(
259
- model.scheduler.config, use_karras_sigmas=True
348
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
260
349
  )
261
350
  elif sampler_name == "DPM2 a":
262
351
  return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
263
- model.scheduler.config
352
+ model.scheduler.config, **kwargs
264
353
  )
265
354
  elif sampler_name == "DPM2 a Karras":
266
355
  return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
267
- model.scheduler.config, use_karras_sigmas=True
356
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
268
357
  )
269
358
  elif sampler_name == "Euler":
270
- return diffusers.EulerDiscreteScheduler.from_config(model.scheduler.config)
359
+ return diffusers.EulerDiscreteScheduler.from_config(
360
+ model.scheduler.config, **kwargs
361
+ )
271
362
  elif sampler_name == "Euler a":
272
363
  return diffusers.EulerAncestralDiscreteScheduler.from_config(
273
- model.scheduler.config
364
+ model.scheduler.config, **kwargs
274
365
  )
275
366
  elif sampler_name == "Heun":
276
- return diffusers.HeunDiscreteScheduler.from_config(model.scheduler.config)
367
+ return diffusers.HeunDiscreteScheduler.from_config(
368
+ model.scheduler.config, **kwargs
369
+ )
277
370
  elif sampler_name == "LMS":
278
- return diffusers.LMSDiscreteScheduler.from_config(model.scheduler.config)
371
+ return diffusers.LMSDiscreteScheduler.from_config(
372
+ model.scheduler.config, **kwargs
373
+ )
279
374
  elif sampler_name == "LMS Karras":
280
375
  return diffusers.LMSDiscreteScheduler.from_config(
281
- model.scheduler.config, use_karras_sigmas=True
376
+ model.scheduler.config, use_karras_sigmas=True, **kwargs
282
377
  )
283
378
  else:
284
379
  raise ValueError(f"Unknown sampler: {sampler_name}")
@@ -298,27 +393,70 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
298
393
  else:
299
394
  yield
300
395
 
396
+ @staticmethod
397
+ @contextlib.contextmanager
398
+ def _release_after():
399
+ from ....device_utils import empty_cache
400
+
401
+ try:
402
+ yield
403
+ finally:
404
+ gc.collect()
405
+ empty_cache()
406
+
407
+ @contextlib.contextmanager
408
+ def _wrap_deepcache(self, model: Any):
409
+ if self._deepcache_helper:
410
+ self._deepcache_helper.pipe = model
411
+ self._deepcache_helper.enable()
412
+ try:
413
+ yield
414
+ finally:
415
+ if self._deepcache_helper:
416
+ self._deepcache_helper.disable()
417
+ self._deepcache_helper.pipe = None
418
+
419
+ @staticmethod
420
+ def _process_progressor(kwargs: dict):
421
+ import diffusers
422
+
423
+ progressor: Progressor = kwargs.pop("progressor", None)
424
+
425
+ def report_status_callback(
426
+ pipe: diffusers.DiffusionPipeline,
427
+ step: int,
428
+ timestep: int,
429
+ callback_kwargs: dict,
430
+ ):
431
+ num_steps = pipe.num_timesteps
432
+ progressor.set_progress((step + 1) / num_steps)
433
+
434
+ return callback_kwargs
435
+
436
+ if progressor and progressor.request_id:
437
+ kwargs["callback_on_step_end"] = report_status_callback
438
+
301
439
  def _call_model(
302
440
  self,
303
441
  response_format: str,
304
442
  model=None,
305
443
  **kwargs,
306
444
  ):
307
- import gc
308
-
309
- from ....device_utils import empty_cache
310
-
311
445
  model = model if model is not None else self._model
312
446
  is_padded = kwargs.pop("is_padded", None)
313
447
  origin_size = kwargs.pop("origin_size", None)
314
448
  seed = kwargs.pop("seed", None)
315
- if seed is not None:
449
+ return_images = kwargs.pop("_return_images", None)
450
+ if seed is not None and seed != -1:
316
451
  kwargs["generator"] = generator = torch.Generator(device=get_available_device()) # type: ignore
317
452
  if seed != -1:
318
453
  kwargs["generator"] = generator.manual_seed(seed)
319
454
  sampler_name = kwargs.pop("sampler_name", None)
455
+ self._process_progressor(kwargs)
320
456
  assert callable(model)
321
- with self._reset_when_done(model, sampler_name):
457
+ with self._reset_when_done(
458
+ model, sampler_name
459
+ ), self._release_after(), self._wrap_deepcache(model):
322
460
  logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
323
461
  self._filter_kwargs(model, kwargs)
324
462
  images = model(**kwargs).images
@@ -331,32 +469,10 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
331
469
  new_images.append(img.crop((0, 0, x, y)))
332
470
  images = new_images
333
471
 
334
- # clean cache
335
- gc.collect()
336
- empty_cache()
337
-
338
- if response_format == "url":
339
- os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
340
- image_list = []
341
- with ThreadPoolExecutor() as executor:
342
- for img in images:
343
- path = os.path.join(XINFERENCE_IMAGE_DIR, uuid.uuid4().hex + ".jpg")
344
- image_list.append(Image(url=path, b64_json=None))
345
- executor.submit(img.save, path, "jpeg")
346
- return ImageList(created=int(time.time()), data=image_list)
347
- elif response_format == "b64_json":
348
-
349
- def _gen_base64_image(_img):
350
- buffered = BytesIO()
351
- _img.save(buffered, format="jpeg")
352
- return base64.b64encode(buffered.getvalue()).decode()
353
-
354
- with ThreadPoolExecutor() as executor:
355
- results = list(map(partial(executor.submit, _gen_base64_image), images)) # type: ignore
356
- image_list = [Image(url=None, b64_json=s.result()) for s in results] # type: ignore
357
- return ImageList(created=int(time.time()), data=image_list)
358
- else:
359
- raise ValueError(f"Unsupported response format: {response_format}")
472
+ if return_images:
473
+ return images
474
+
475
+ return handle_image_result(response_format, images)
360
476
 
361
477
  @classmethod
362
478
  def _filter_kwargs(cls, model, kwargs: dict):
@@ -378,15 +494,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
378
494
  response_format: str = "url",
379
495
  **kwargs,
380
496
  ):
381
- # References:
382
- # https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
383
497
  width, height = map(int, re.split(r"[^\d]+", size))
384
498
  generate_kwargs = self._model_spec.default_generate_config.copy() # type: ignore
385
499
  generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
500
+ generate_kwargs["width"], generate_kwargs["height"] = width, height
501
+
386
502
  return self._call_model(
387
503
  prompt=prompt,
388
- height=height,
389
- width=width,
390
504
  num_images_per_prompt=n,
391
505
  response_format=response_format,
392
506
  **generate_kwargs,
@@ -409,19 +523,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
409
523
  response_format: str = "url",
410
524
  **kwargs,
411
525
  ):
412
- if "controlnet" in self._kwargs:
526
+ if self._kwargs.get("controlnet"):
413
527
  model = self._model
414
528
  else:
415
- if "image2image" not in self._abilities:
529
+ ability = "image2image"
530
+ if ability not in self._abilities:
416
531
  raise RuntimeError(f"{self._model_uid} does not support image2image")
417
- if self._i2i_model is not None:
418
- model = self._i2i_model
419
- else:
420
- from diffusers import AutoPipelineForImage2Image
421
-
422
- self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
423
- self._model
424
- )
532
+ model = self._get_model(ability)
425
533
 
426
534
  if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
427
535
  # Model like SD3 image to image requires image's height and width is times of 16
@@ -462,24 +570,23 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
462
570
  response_format: str = "url",
463
571
  **kwargs,
464
572
  ):
465
- if "inpainting" not in self._abilities:
573
+ ability = "inpainting"
574
+ if ability not in self._abilities:
466
575
  raise RuntimeError(f"{self._model_uid} does not support inpainting")
467
576
 
468
577
  if (
469
578
  "text2image" in self._abilities or "image2image" in self._abilities
470
579
  ) and self._model is not None:
471
- from diffusers import AutoPipelineForInpainting
472
-
473
- if self._inpainting_model is not None:
474
- model = self._inpainting_model
475
- else:
476
- model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
477
- self._model
478
- )
580
+ model = self._get_model(ability)
479
581
  else:
480
582
  model = self._model
481
583
 
482
- width, height = map(int, re.split(r"[^\d]+", size))
584
+ if mask_blur := kwargs.pop("mask_blur", None):
585
+ logger.debug("Process mask image with mask_blur: %s", mask_blur)
586
+ mask_image = model.mask_processor.blur(mask_image, blur_factor=mask_blur) # type: ignore
587
+
588
+ if "width" not in kwargs:
589
+ kwargs["width"], kwargs["height"] = map(int, re.split(r"[^\d]+", size))
483
590
 
484
591
  if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
485
592
  # Model like SD3 inpainting requires image's height and width is times of 16
@@ -492,14 +599,12 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
492
599
  mask_image, multiple=int(padding_image_to_multiple)
493
600
  )
494
601
  # calculate actual image size after padding
495
- width, height = image.size
602
+ kwargs["width"], kwargs["height"] = image.size
496
603
 
497
604
  return self._call_model(
498
605
  image=image,
499
606
  mask_image=mask_image,
500
607
  prompt=prompt,
501
- height=height,
502
- width=width,
503
608
  num_images_per_prompt=n,
504
609
  response_format=response_format,
505
610
  model=model,
@@ -11,16 +11,52 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from typing import Optional
14
+ import base64
15
+ import os
16
+ import time
17
+ import uuid
18
+ from concurrent.futures import ThreadPoolExecutor
19
+ from functools import partial
20
+ from io import BytesIO
21
+ from typing import TYPE_CHECKING, Optional
15
22
 
16
- from .core import ImageModelFamilyV1
23
+ from ...constants import XINFERENCE_IMAGE_DIR
24
+ from ...types import Image, ImageList
25
+
26
+ if TYPE_CHECKING:
27
+ from .core import ImageModelFamilyV1
17
28
 
18
29
 
19
30
  def get_model_version(
20
- image_model: ImageModelFamilyV1, controlnet: Optional[ImageModelFamilyV1]
31
+ image_model: "ImageModelFamilyV1", controlnet: Optional["ImageModelFamilyV1"]
21
32
  ) -> str:
22
33
  return (
23
34
  image_model.model_name
24
35
  if controlnet is None
25
36
  else f"{image_model.model_name}--{controlnet.model_name}"
26
37
  )
38
+
39
+
40
+ def handle_image_result(response_format: str, images) -> ImageList:
41
+ if response_format == "url":
42
+ os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
43
+ image_list = []
44
+ with ThreadPoolExecutor() as executor:
45
+ for img in images:
46
+ path = os.path.join(XINFERENCE_IMAGE_DIR, uuid.uuid4().hex + ".jpg")
47
+ image_list.append(Image(url=path, b64_json=None))
48
+ executor.submit(img.save, path, "jpeg")
49
+ return ImageList(created=int(time.time()), data=image_list)
50
+ elif response_format == "b64_json":
51
+
52
+ def _gen_base64_image(_img):
53
+ buffered = BytesIO()
54
+ _img.save(buffered, format="jpeg")
55
+ return base64.b64encode(buffered.getvalue()).decode()
56
+
57
+ with ThreadPoolExecutor() as executor:
58
+ results = list(map(partial(executor.submit, _gen_base64_image), images)) # type: ignore
59
+ image_list = [Image(url=None, b64_json=s.result()) for s in results] # type: ignore
60
+ return ImageList(created=int(time.time()), data=image_list)
61
+ else:
62
+ raise ValueError(f"Unsupported response format: {response_format}")
@@ -146,6 +146,7 @@ def _install():
146
146
  from .transformers.internlm2 import Internlm2PytorchChatModel
147
147
  from .transformers.minicpmv25 import MiniCPMV25Model
148
148
  from .transformers.minicpmv26 import MiniCPMV26Model
149
+ from .transformers.opt import OptPytorchModel
149
150
  from .transformers.qwen2_audio import Qwen2AudioChatModel
150
151
  from .transformers.qwen2_vl import Qwen2VLChatModel
151
152
  from .transformers.qwen_vl import QwenVLChatModel
@@ -190,6 +191,7 @@ def _install():
190
191
  Glm4VModel,
191
192
  DeepSeekV2PytorchModel,
192
193
  DeepSeekV2PytorchChatModel,
194
+ OptPytorchModel,
193
195
  ]
194
196
  )
195
197
  if OmniLMMModel: # type: ignore