xinference 0.15.3__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +0 -4
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +29 -2
- xinference/client/restful/restful_client.py +10 -0
- xinference/constants.py +7 -3
- xinference/core/image_interface.py +76 -23
- xinference/core/model.py +158 -46
- xinference/core/progress_tracker.py +187 -0
- xinference/core/scheduler.py +10 -7
- xinference/core/supervisor.py +11 -0
- xinference/core/utils.py +9 -0
- xinference/core/worker.py +1 -0
- xinference/deploy/supervisor.py +4 -0
- xinference/model/__init__.py +4 -0
- xinference/model/audio/chattts.py +2 -1
- xinference/model/audio/core.py +0 -2
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/core.py +6 -7
- xinference/model/image/scheduler/__init__.py +13 -0
- xinference/model/image/scheduler/flux.py +533 -0
- xinference/model/image/sdapi.py +35 -4
- xinference/model/image/stable_diffusion/core.py +215 -110
- xinference/model/image/utils.py +39 -3
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/llm_family.json +185 -17
- xinference/model/llm/llm_family_modelscope.json +124 -12
- xinference/model/llm/transformers/chatglm.py +104 -0
- xinference/model/llm/transformers/cogvlm2.py +2 -1
- xinference/model/llm/transformers/cogvlm2_video.py +2 -0
- xinference/model/llm/transformers/core.py +43 -113
- xinference/model/llm/transformers/deepseek_v2.py +0 -226
- xinference/model/llm/transformers/deepseek_vl.py +2 -0
- xinference/model/llm/transformers/glm4v.py +2 -1
- xinference/model/llm/transformers/intern_vl.py +2 -0
- xinference/model/llm/transformers/internlm2.py +3 -95
- xinference/model/llm/transformers/minicpmv25.py +2 -0
- xinference/model/llm/transformers/minicpmv26.py +2 -0
- xinference/model/llm/transformers/omnilmm.py +2 -0
- xinference/model/llm/transformers/opt.py +68 -0
- xinference/model/llm/transformers/qwen2_audio.py +11 -4
- xinference/model/llm/transformers/qwen2_vl.py +2 -28
- xinference/model/llm/transformers/qwen_vl.py +2 -1
- xinference/model/llm/transformers/utils.py +36 -283
- xinference/model/llm/transformers/yi_vl.py +2 -0
- xinference/model/llm/utils.py +60 -16
- xinference/model/llm/vllm/core.py +68 -9
- xinference/model/llm/vllm/utils.py +0 -1
- xinference/model/utils.py +7 -4
- xinference/model/video/core.py +0 -2
- xinference/utils.py +2 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.e51a356d.js → main.f7da0140.js} +3 -3
- xinference/web/ui/build/static/js/main.f7da0140.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
- {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/METADATA +38 -6
- {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/RECORD +63 -59
- xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
- /xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.f7da0140.js.LICENSE.txt} +0 -0
- {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/LICENSE +0 -0
- {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/WHEEL +0 -0
- {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/top_level.txt +0 -0
|
@@ -12,31 +12,27 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import base64
|
|
16
15
|
import contextlib
|
|
16
|
+
import gc
|
|
17
17
|
import inspect
|
|
18
|
+
import itertools
|
|
18
19
|
import logging
|
|
19
|
-
import os
|
|
20
20
|
import re
|
|
21
21
|
import sys
|
|
22
|
-
import time
|
|
23
|
-
import uuid
|
|
24
22
|
import warnings
|
|
25
|
-
from
|
|
26
|
-
from functools import partial
|
|
27
|
-
from io import BytesIO
|
|
28
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
29
24
|
|
|
30
25
|
import PIL.Image
|
|
31
26
|
import torch
|
|
32
27
|
from PIL import ImageOps
|
|
33
28
|
|
|
34
|
-
from ....constants import XINFERENCE_IMAGE_DIR
|
|
35
29
|
from ....device_utils import get_available_device, move_model_to_available_device
|
|
36
|
-
from ....types import
|
|
30
|
+
from ....types import LoRA
|
|
37
31
|
from ..sdapi import SDAPIDiffusionModelMixin
|
|
32
|
+
from ..utils import handle_image_result
|
|
38
33
|
|
|
39
34
|
if TYPE_CHECKING:
|
|
35
|
+
from ....core.progress_tracker import Progressor
|
|
40
36
|
from ..core import ImageModelFamilyV1
|
|
41
37
|
|
|
42
38
|
logger = logging.getLogger(__name__)
|
|
@@ -93,16 +89,21 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
93
89
|
self._model_uid = model_uid
|
|
94
90
|
self._model_path = model_path
|
|
95
91
|
self._device = device
|
|
96
|
-
#
|
|
97
|
-
# it will be loaded as AutoPipelineForText2Image
|
|
98
|
-
# for image2image and inpainting,
|
|
99
|
-
# we convert to the corresponding model
|
|
92
|
+
# model info when loading
|
|
100
93
|
self._model = None
|
|
101
|
-
self._i2i_model = None # image to image model
|
|
102
|
-
self._inpainting_model = None # inpainting model
|
|
103
94
|
self._lora_model = lora_model
|
|
104
95
|
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
105
96
|
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
97
|
+
# deepcache
|
|
98
|
+
self._deepcache_helper = None
|
|
99
|
+
# when a model has text2image ability,
|
|
100
|
+
# it will be loaded as AutoPipelineForText2Image
|
|
101
|
+
# for image2image and inpainting,
|
|
102
|
+
# we convert to the corresponding model
|
|
103
|
+
self._torch_dtype = None
|
|
104
|
+
self._ability_to_models: Dict[Tuple[str, Any], Any] = {}
|
|
105
|
+
self._controlnet_models: Dict[str, Any] = {}
|
|
106
|
+
# info
|
|
106
107
|
self._model_spec = model_spec
|
|
107
108
|
self._abilities = model_spec.model_ability or [] # type: ignore
|
|
108
109
|
self._kwargs = kwargs
|
|
@@ -111,6 +112,63 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
111
112
|
def model_ability(self):
|
|
112
113
|
return self._abilities
|
|
113
114
|
|
|
115
|
+
@staticmethod
|
|
116
|
+
def _get_pipeline_type(ability: str) -> type:
|
|
117
|
+
if ability == "text2image":
|
|
118
|
+
from diffusers import AutoPipelineForText2Image as AutoPipelineModel
|
|
119
|
+
elif ability == "image2image":
|
|
120
|
+
from diffusers import AutoPipelineForImage2Image as AutoPipelineModel
|
|
121
|
+
elif ability == "inpainting":
|
|
122
|
+
from diffusers import AutoPipelineForInpainting as AutoPipelineModel
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Unknown ability: {ability}")
|
|
125
|
+
return AutoPipelineModel
|
|
126
|
+
|
|
127
|
+
def _get_controlnet_model(self, name: str, path: str):
|
|
128
|
+
from diffusers import ControlNetModel
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
return self._controlnet_models[name]
|
|
132
|
+
except KeyError:
|
|
133
|
+
logger.debug("Loading controlnet %s, from %s", name, path)
|
|
134
|
+
model = ControlNetModel.from_pretrained(path, torch_dtype=self._torch_dtype)
|
|
135
|
+
self._controlnet_models[name] = model
|
|
136
|
+
return model
|
|
137
|
+
|
|
138
|
+
def _get_model(
|
|
139
|
+
self,
|
|
140
|
+
ability: str,
|
|
141
|
+
controlnet_name: Optional[Union[str, List[str]]] = None,
|
|
142
|
+
controlnet_path: Optional[Union[str, List[str]]] = None,
|
|
143
|
+
):
|
|
144
|
+
try:
|
|
145
|
+
return self._ability_to_models[ability, controlnet_name]
|
|
146
|
+
except KeyError:
|
|
147
|
+
model_type = self._get_pipeline_type(ability)
|
|
148
|
+
|
|
149
|
+
assert self._model is not None
|
|
150
|
+
|
|
151
|
+
if controlnet_name:
|
|
152
|
+
assert controlnet_path
|
|
153
|
+
if isinstance(controlnet_name, (list, tuple)):
|
|
154
|
+
controlnet = []
|
|
155
|
+
# multiple controlnet
|
|
156
|
+
for name, path in itertools.zip_longest(
|
|
157
|
+
controlnet_name, controlnet_path
|
|
158
|
+
):
|
|
159
|
+
controlnet.append(self._get_controlnet_model(name, path))
|
|
160
|
+
else:
|
|
161
|
+
controlnet = self._get_controlnet_model(
|
|
162
|
+
controlnet_name, controlnet_path
|
|
163
|
+
)
|
|
164
|
+
model = model_type.from_pipe(self._model, controlnet=controlnet)
|
|
165
|
+
else:
|
|
166
|
+
model = model_type.from_pipe(self._model)
|
|
167
|
+
self._load_to_device(model)
|
|
168
|
+
|
|
169
|
+
self._ability_to_models[ability, controlnet_name] = model
|
|
170
|
+
return model
|
|
171
|
+
|
|
114
172
|
def _apply_lora(self):
|
|
115
173
|
if self._lora_model is not None:
|
|
116
174
|
logger.info(
|
|
@@ -132,22 +190,24 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
132
190
|
else:
|
|
133
191
|
raise ValueError(f"Unknown ability: {self._abilities}")
|
|
134
192
|
|
|
135
|
-
|
|
136
|
-
if controlnet is not None:
|
|
137
|
-
from diffusers import ControlNetModel
|
|
138
|
-
|
|
139
|
-
logger.debug("Loading controlnet %s", controlnet)
|
|
140
|
-
self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
|
|
141
|
-
|
|
142
|
-
torch_dtype = self._kwargs.get("torch_dtype")
|
|
193
|
+
self._torch_dtype = torch_dtype = self._kwargs.get("torch_dtype")
|
|
143
194
|
if sys.platform != "darwin" and torch_dtype is None:
|
|
144
195
|
# The following params crashes on Mac M2
|
|
145
|
-
self._kwargs["torch_dtype"] = torch.float16
|
|
196
|
+
self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
|
|
146
197
|
self._kwargs["variant"] = "fp16"
|
|
147
198
|
self._kwargs["use_safetensors"] = True
|
|
148
199
|
if isinstance(torch_dtype, str):
|
|
149
200
|
self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
|
|
150
201
|
|
|
202
|
+
controlnet = self._kwargs.get("controlnet")
|
|
203
|
+
if controlnet is not None:
|
|
204
|
+
if isinstance(controlnet, tuple):
|
|
205
|
+
self._kwargs["controlnet"] = self._get_controlnet_model(*controlnet)
|
|
206
|
+
else:
|
|
207
|
+
self._kwargs["controlnet"] = [
|
|
208
|
+
self._get_controlnet_model(*cn) for cn in controlnet
|
|
209
|
+
]
|
|
210
|
+
|
|
151
211
|
quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
|
|
152
212
|
if quantize_text_encoder:
|
|
153
213
|
try:
|
|
@@ -193,27 +253,45 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
193
253
|
self._model_path,
|
|
194
254
|
**self._kwargs,
|
|
195
255
|
)
|
|
196
|
-
|
|
197
|
-
|
|
256
|
+
self._load_to_device(self._model)
|
|
257
|
+
self._apply_lora()
|
|
258
|
+
|
|
259
|
+
if self._kwargs.get("deepcache", False):
|
|
198
260
|
try:
|
|
199
261
|
from DeepCache import DeepCacheSDHelper
|
|
200
|
-
|
|
201
|
-
helper = DeepCacheSDHelper(pipe=self._model)
|
|
202
|
-
helper.set_params(cache_interval=3, cache_branch_id=0)
|
|
203
|
-
helper.enable()
|
|
204
262
|
except ImportError:
|
|
205
|
-
|
|
206
|
-
|
|
263
|
+
error_message = "Failed to import module 'deepcache' when you launch with deepcache=True"
|
|
264
|
+
installation_guide = [
|
|
265
|
+
"Please make sure 'deepcache' is installed. ",
|
|
266
|
+
"You can install it by `pip install deepcache`\n",
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
270
|
+
else:
|
|
271
|
+
self._deepcache_helper = helper = DeepCacheSDHelper()
|
|
272
|
+
helper.set_params(
|
|
273
|
+
cache_interval=self._kwargs.get("deepcache_cache_interval", 3),
|
|
274
|
+
cache_branch_id=self._kwargs.get("deepcache_cache_branch_id", 0),
|
|
275
|
+
)
|
|
207
276
|
|
|
277
|
+
def _load_to_device(self, model):
|
|
208
278
|
if self._kwargs.get("cpu_offload", False):
|
|
209
279
|
logger.debug("CPU offloading model")
|
|
210
|
-
|
|
280
|
+
model.enable_model_cpu_offload()
|
|
281
|
+
elif self._kwargs.get("sequential_cpu_offload", False):
|
|
282
|
+
logger.debug("CPU sequential offloading model")
|
|
283
|
+
model.enable_sequential_cpu_offload()
|
|
211
284
|
elif not self._kwargs.get("device_map"):
|
|
212
285
|
logger.debug("Loading model to available device")
|
|
213
|
-
|
|
286
|
+
model = move_model_to_available_device(self._model)
|
|
214
287
|
# Recommended if your computer has < 64 GB of RAM
|
|
215
|
-
self.
|
|
216
|
-
|
|
288
|
+
if self._kwargs.get("attention_slicing", True):
|
|
289
|
+
model.enable_attention_slicing()
|
|
290
|
+
if self._kwargs.get("vae_tiling", False):
|
|
291
|
+
model.enable_vae_tiling()
|
|
292
|
+
|
|
293
|
+
def get_max_num_images_for_batching(self):
|
|
294
|
+
return self._kwargs.get("max_num_images", 16)
|
|
217
295
|
|
|
218
296
|
@staticmethod
|
|
219
297
|
def _get_scheduler(model: Any, sampler_name: str):
|
|
@@ -224,61 +302,78 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
224
302
|
|
|
225
303
|
import diffusers
|
|
226
304
|
|
|
305
|
+
kwargs = {}
|
|
306
|
+
if (
|
|
307
|
+
sampler_name.startswith("DPM++")
|
|
308
|
+
and "final_sigmas_type" not in model.scheduler.config
|
|
309
|
+
):
|
|
310
|
+
# `final_sigmas_type` will be set as `zero` by default which will cause error
|
|
311
|
+
kwargs["final_sigmas_type"] = "sigma_min"
|
|
312
|
+
|
|
227
313
|
# see https://github.com/huggingface/diffusers/issues/4167
|
|
228
314
|
# to get A1111 <> Diffusers Scheduler mapping
|
|
229
315
|
if sampler_name == "DPM++ 2M":
|
|
230
316
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
231
|
-
model.scheduler.config
|
|
317
|
+
model.scheduler.config, **kwargs
|
|
232
318
|
)
|
|
233
319
|
elif sampler_name == "DPM++ 2M Karras":
|
|
234
320
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
235
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
321
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
236
322
|
)
|
|
237
323
|
elif sampler_name == "DPM++ 2M SDE":
|
|
238
324
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
239
|
-
model.scheduler.config, algorithm_type="sde-dpmsolver++"
|
|
325
|
+
model.scheduler.config, algorithm_type="sde-dpmsolver++", **kwargs
|
|
240
326
|
)
|
|
241
327
|
elif sampler_name == "DPM++ 2M SDE Karras":
|
|
242
328
|
return diffusers.DPMSolverMultistepScheduler.from_config(
|
|
243
329
|
model.scheduler.config,
|
|
244
330
|
algorithm_type="sde-dpmsolver++",
|
|
245
331
|
use_karras_sigmas=True,
|
|
332
|
+
**kwargs,
|
|
246
333
|
)
|
|
247
334
|
elif sampler_name == "DPM++ SDE":
|
|
248
335
|
return diffusers.DPMSolverSinglestepScheduler.from_config(
|
|
249
|
-
model.scheduler.config
|
|
336
|
+
model.scheduler.config, **kwargs
|
|
250
337
|
)
|
|
251
338
|
elif sampler_name == "DPM++ SDE Karras":
|
|
252
339
|
return diffusers.DPMSolverSinglestepScheduler.from_config(
|
|
253
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
340
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
254
341
|
)
|
|
255
342
|
elif sampler_name == "DPM2":
|
|
256
|
-
return diffusers.KDPM2DiscreteScheduler.from_config(
|
|
343
|
+
return diffusers.KDPM2DiscreteScheduler.from_config(
|
|
344
|
+
model.scheduler.config, **kwargs
|
|
345
|
+
)
|
|
257
346
|
elif sampler_name == "DPM2 Karras":
|
|
258
347
|
return diffusers.KDPM2DiscreteScheduler.from_config(
|
|
259
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
348
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
260
349
|
)
|
|
261
350
|
elif sampler_name == "DPM2 a":
|
|
262
351
|
return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
|
|
263
|
-
model.scheduler.config
|
|
352
|
+
model.scheduler.config, **kwargs
|
|
264
353
|
)
|
|
265
354
|
elif sampler_name == "DPM2 a Karras":
|
|
266
355
|
return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
|
|
267
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
356
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
268
357
|
)
|
|
269
358
|
elif sampler_name == "Euler":
|
|
270
|
-
return diffusers.EulerDiscreteScheduler.from_config(
|
|
359
|
+
return diffusers.EulerDiscreteScheduler.from_config(
|
|
360
|
+
model.scheduler.config, **kwargs
|
|
361
|
+
)
|
|
271
362
|
elif sampler_name == "Euler a":
|
|
272
363
|
return diffusers.EulerAncestralDiscreteScheduler.from_config(
|
|
273
|
-
model.scheduler.config
|
|
364
|
+
model.scheduler.config, **kwargs
|
|
274
365
|
)
|
|
275
366
|
elif sampler_name == "Heun":
|
|
276
|
-
return diffusers.HeunDiscreteScheduler.from_config(
|
|
367
|
+
return diffusers.HeunDiscreteScheduler.from_config(
|
|
368
|
+
model.scheduler.config, **kwargs
|
|
369
|
+
)
|
|
277
370
|
elif sampler_name == "LMS":
|
|
278
|
-
return diffusers.LMSDiscreteScheduler.from_config(
|
|
371
|
+
return diffusers.LMSDiscreteScheduler.from_config(
|
|
372
|
+
model.scheduler.config, **kwargs
|
|
373
|
+
)
|
|
279
374
|
elif sampler_name == "LMS Karras":
|
|
280
375
|
return diffusers.LMSDiscreteScheduler.from_config(
|
|
281
|
-
model.scheduler.config, use_karras_sigmas=True
|
|
376
|
+
model.scheduler.config, use_karras_sigmas=True, **kwargs
|
|
282
377
|
)
|
|
283
378
|
else:
|
|
284
379
|
raise ValueError(f"Unknown sampler: {sampler_name}")
|
|
@@ -298,27 +393,70 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
298
393
|
else:
|
|
299
394
|
yield
|
|
300
395
|
|
|
396
|
+
@staticmethod
|
|
397
|
+
@contextlib.contextmanager
|
|
398
|
+
def _release_after():
|
|
399
|
+
from ....device_utils import empty_cache
|
|
400
|
+
|
|
401
|
+
try:
|
|
402
|
+
yield
|
|
403
|
+
finally:
|
|
404
|
+
gc.collect()
|
|
405
|
+
empty_cache()
|
|
406
|
+
|
|
407
|
+
@contextlib.contextmanager
|
|
408
|
+
def _wrap_deepcache(self, model: Any):
|
|
409
|
+
if self._deepcache_helper:
|
|
410
|
+
self._deepcache_helper.pipe = model
|
|
411
|
+
self._deepcache_helper.enable()
|
|
412
|
+
try:
|
|
413
|
+
yield
|
|
414
|
+
finally:
|
|
415
|
+
if self._deepcache_helper:
|
|
416
|
+
self._deepcache_helper.disable()
|
|
417
|
+
self._deepcache_helper.pipe = None
|
|
418
|
+
|
|
419
|
+
@staticmethod
|
|
420
|
+
def _process_progressor(kwargs: dict):
|
|
421
|
+
import diffusers
|
|
422
|
+
|
|
423
|
+
progressor: Progressor = kwargs.pop("progressor", None)
|
|
424
|
+
|
|
425
|
+
def report_status_callback(
|
|
426
|
+
pipe: diffusers.DiffusionPipeline,
|
|
427
|
+
step: int,
|
|
428
|
+
timestep: int,
|
|
429
|
+
callback_kwargs: dict,
|
|
430
|
+
):
|
|
431
|
+
num_steps = pipe.num_timesteps
|
|
432
|
+
progressor.set_progress((step + 1) / num_steps)
|
|
433
|
+
|
|
434
|
+
return callback_kwargs
|
|
435
|
+
|
|
436
|
+
if progressor and progressor.request_id:
|
|
437
|
+
kwargs["callback_on_step_end"] = report_status_callback
|
|
438
|
+
|
|
301
439
|
def _call_model(
|
|
302
440
|
self,
|
|
303
441
|
response_format: str,
|
|
304
442
|
model=None,
|
|
305
443
|
**kwargs,
|
|
306
444
|
):
|
|
307
|
-
import gc
|
|
308
|
-
|
|
309
|
-
from ....device_utils import empty_cache
|
|
310
|
-
|
|
311
445
|
model = model if model is not None else self._model
|
|
312
446
|
is_padded = kwargs.pop("is_padded", None)
|
|
313
447
|
origin_size = kwargs.pop("origin_size", None)
|
|
314
448
|
seed = kwargs.pop("seed", None)
|
|
315
|
-
|
|
449
|
+
return_images = kwargs.pop("_return_images", None)
|
|
450
|
+
if seed is not None and seed != -1:
|
|
316
451
|
kwargs["generator"] = generator = torch.Generator(device=get_available_device()) # type: ignore
|
|
317
452
|
if seed != -1:
|
|
318
453
|
kwargs["generator"] = generator.manual_seed(seed)
|
|
319
454
|
sampler_name = kwargs.pop("sampler_name", None)
|
|
455
|
+
self._process_progressor(kwargs)
|
|
320
456
|
assert callable(model)
|
|
321
|
-
with self._reset_when_done(
|
|
457
|
+
with self._reset_when_done(
|
|
458
|
+
model, sampler_name
|
|
459
|
+
), self._release_after(), self._wrap_deepcache(model):
|
|
322
460
|
logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
|
|
323
461
|
self._filter_kwargs(model, kwargs)
|
|
324
462
|
images = model(**kwargs).images
|
|
@@ -331,32 +469,10 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
331
469
|
new_images.append(img.crop((0, 0, x, y)))
|
|
332
470
|
images = new_images
|
|
333
471
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
if response_format == "url":
|
|
339
|
-
os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
|
|
340
|
-
image_list = []
|
|
341
|
-
with ThreadPoolExecutor() as executor:
|
|
342
|
-
for img in images:
|
|
343
|
-
path = os.path.join(XINFERENCE_IMAGE_DIR, uuid.uuid4().hex + ".jpg")
|
|
344
|
-
image_list.append(Image(url=path, b64_json=None))
|
|
345
|
-
executor.submit(img.save, path, "jpeg")
|
|
346
|
-
return ImageList(created=int(time.time()), data=image_list)
|
|
347
|
-
elif response_format == "b64_json":
|
|
348
|
-
|
|
349
|
-
def _gen_base64_image(_img):
|
|
350
|
-
buffered = BytesIO()
|
|
351
|
-
_img.save(buffered, format="jpeg")
|
|
352
|
-
return base64.b64encode(buffered.getvalue()).decode()
|
|
353
|
-
|
|
354
|
-
with ThreadPoolExecutor() as executor:
|
|
355
|
-
results = list(map(partial(executor.submit, _gen_base64_image), images)) # type: ignore
|
|
356
|
-
image_list = [Image(url=None, b64_json=s.result()) for s in results] # type: ignore
|
|
357
|
-
return ImageList(created=int(time.time()), data=image_list)
|
|
358
|
-
else:
|
|
359
|
-
raise ValueError(f"Unsupported response format: {response_format}")
|
|
472
|
+
if return_images:
|
|
473
|
+
return images
|
|
474
|
+
|
|
475
|
+
return handle_image_result(response_format, images)
|
|
360
476
|
|
|
361
477
|
@classmethod
|
|
362
478
|
def _filter_kwargs(cls, model, kwargs: dict):
|
|
@@ -378,15 +494,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
378
494
|
response_format: str = "url",
|
|
379
495
|
**kwargs,
|
|
380
496
|
):
|
|
381
|
-
# References:
|
|
382
|
-
# https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
|
|
383
497
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
384
498
|
generate_kwargs = self._model_spec.default_generate_config.copy() # type: ignore
|
|
385
499
|
generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
|
|
500
|
+
generate_kwargs["width"], generate_kwargs["height"] = width, height
|
|
501
|
+
|
|
386
502
|
return self._call_model(
|
|
387
503
|
prompt=prompt,
|
|
388
|
-
height=height,
|
|
389
|
-
width=width,
|
|
390
504
|
num_images_per_prompt=n,
|
|
391
505
|
response_format=response_format,
|
|
392
506
|
**generate_kwargs,
|
|
@@ -409,19 +523,13 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
409
523
|
response_format: str = "url",
|
|
410
524
|
**kwargs,
|
|
411
525
|
):
|
|
412
|
-
if "controlnet"
|
|
526
|
+
if self._kwargs.get("controlnet"):
|
|
413
527
|
model = self._model
|
|
414
528
|
else:
|
|
415
|
-
|
|
529
|
+
ability = "image2image"
|
|
530
|
+
if ability not in self._abilities:
|
|
416
531
|
raise RuntimeError(f"{self._model_uid} does not support image2image")
|
|
417
|
-
|
|
418
|
-
model = self._i2i_model
|
|
419
|
-
else:
|
|
420
|
-
from diffusers import AutoPipelineForImage2Image
|
|
421
|
-
|
|
422
|
-
self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
|
|
423
|
-
self._model
|
|
424
|
-
)
|
|
532
|
+
model = self._get_model(ability)
|
|
425
533
|
|
|
426
534
|
if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
|
|
427
535
|
# Model like SD3 image to image requires image's height and width is times of 16
|
|
@@ -462,24 +570,23 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
462
570
|
response_format: str = "url",
|
|
463
571
|
**kwargs,
|
|
464
572
|
):
|
|
465
|
-
|
|
573
|
+
ability = "inpainting"
|
|
574
|
+
if ability not in self._abilities:
|
|
466
575
|
raise RuntimeError(f"{self._model_uid} does not support inpainting")
|
|
467
576
|
|
|
468
577
|
if (
|
|
469
578
|
"text2image" in self._abilities or "image2image" in self._abilities
|
|
470
579
|
) and self._model is not None:
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
if self._inpainting_model is not None:
|
|
474
|
-
model = self._inpainting_model
|
|
475
|
-
else:
|
|
476
|
-
model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
|
|
477
|
-
self._model
|
|
478
|
-
)
|
|
580
|
+
model = self._get_model(ability)
|
|
479
581
|
else:
|
|
480
582
|
model = self._model
|
|
481
583
|
|
|
482
|
-
|
|
584
|
+
if mask_blur := kwargs.pop("mask_blur", None):
|
|
585
|
+
logger.debug("Process mask image with mask_blur: %s", mask_blur)
|
|
586
|
+
mask_image = model.mask_processor.blur(mask_image, blur_factor=mask_blur) # type: ignore
|
|
587
|
+
|
|
588
|
+
if "width" not in kwargs:
|
|
589
|
+
kwargs["width"], kwargs["height"] = map(int, re.split(r"[^\d]+", size))
|
|
483
590
|
|
|
484
591
|
if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
|
|
485
592
|
# Model like SD3 inpainting requires image's height and width is times of 16
|
|
@@ -492,14 +599,12 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
492
599
|
mask_image, multiple=int(padding_image_to_multiple)
|
|
493
600
|
)
|
|
494
601
|
# calculate actual image size after padding
|
|
495
|
-
width, height = image.size
|
|
602
|
+
kwargs["width"], kwargs["height"] = image.size
|
|
496
603
|
|
|
497
604
|
return self._call_model(
|
|
498
605
|
image=image,
|
|
499
606
|
mask_image=mask_image,
|
|
500
607
|
prompt=prompt,
|
|
501
|
-
height=height,
|
|
502
|
-
width=width,
|
|
503
608
|
num_images_per_prompt=n,
|
|
504
609
|
response_format=response_format,
|
|
505
610
|
model=model,
|
xinference/model/image/utils.py
CHANGED
|
@@ -11,16 +11,52 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import base64
|
|
15
|
+
import os
|
|
16
|
+
import time
|
|
17
|
+
import uuid
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
+
from functools import partial
|
|
20
|
+
from io import BytesIO
|
|
21
|
+
from typing import TYPE_CHECKING, Optional
|
|
15
22
|
|
|
16
|
-
from
|
|
23
|
+
from ...constants import XINFERENCE_IMAGE_DIR
|
|
24
|
+
from ...types import Image, ImageList
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from .core import ImageModelFamilyV1
|
|
17
28
|
|
|
18
29
|
|
|
19
30
|
def get_model_version(
|
|
20
|
-
image_model: ImageModelFamilyV1, controlnet: Optional[ImageModelFamilyV1]
|
|
31
|
+
image_model: "ImageModelFamilyV1", controlnet: Optional["ImageModelFamilyV1"]
|
|
21
32
|
) -> str:
|
|
22
33
|
return (
|
|
23
34
|
image_model.model_name
|
|
24
35
|
if controlnet is None
|
|
25
36
|
else f"{image_model.model_name}--{controlnet.model_name}"
|
|
26
37
|
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def handle_image_result(response_format: str, images) -> ImageList:
|
|
41
|
+
if response_format == "url":
|
|
42
|
+
os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
|
|
43
|
+
image_list = []
|
|
44
|
+
with ThreadPoolExecutor() as executor:
|
|
45
|
+
for img in images:
|
|
46
|
+
path = os.path.join(XINFERENCE_IMAGE_DIR, uuid.uuid4().hex + ".jpg")
|
|
47
|
+
image_list.append(Image(url=path, b64_json=None))
|
|
48
|
+
executor.submit(img.save, path, "jpeg")
|
|
49
|
+
return ImageList(created=int(time.time()), data=image_list)
|
|
50
|
+
elif response_format == "b64_json":
|
|
51
|
+
|
|
52
|
+
def _gen_base64_image(_img):
|
|
53
|
+
buffered = BytesIO()
|
|
54
|
+
_img.save(buffered, format="jpeg")
|
|
55
|
+
return base64.b64encode(buffered.getvalue()).decode()
|
|
56
|
+
|
|
57
|
+
with ThreadPoolExecutor() as executor:
|
|
58
|
+
results = list(map(partial(executor.submit, _gen_base64_image), images)) # type: ignore
|
|
59
|
+
image_list = [Image(url=None, b64_json=s.result()) for s in results] # type: ignore
|
|
60
|
+
return ImageList(created=int(time.time()), data=image_list)
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(f"Unsupported response format: {response_format}")
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -146,6 +146,7 @@ def _install():
|
|
|
146
146
|
from .transformers.internlm2 import Internlm2PytorchChatModel
|
|
147
147
|
from .transformers.minicpmv25 import MiniCPMV25Model
|
|
148
148
|
from .transformers.minicpmv26 import MiniCPMV26Model
|
|
149
|
+
from .transformers.opt import OptPytorchModel
|
|
149
150
|
from .transformers.qwen2_audio import Qwen2AudioChatModel
|
|
150
151
|
from .transformers.qwen2_vl import Qwen2VLChatModel
|
|
151
152
|
from .transformers.qwen_vl import QwenVLChatModel
|
|
@@ -190,6 +191,7 @@ def _install():
|
|
|
190
191
|
Glm4VModel,
|
|
191
192
|
DeepSeekV2PytorchModel,
|
|
192
193
|
DeepSeekV2PytorchChatModel,
|
|
194
|
+
OptPytorchModel,
|
|
193
195
|
]
|
|
194
196
|
)
|
|
195
197
|
if OmniLMMModel: # type: ignore
|