xinference 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +415 -1
- xinference/constants.py +2 -0
- xinference/core/model.py +3 -4
- xinference/core/supervisor.py +29 -1
- xinference/core/worker.py +4 -1
- xinference/deploy/cmdline.py +2 -0
- xinference/deploy/test/test_cmdline.py +1 -1
- xinference/model/audio/core.py +5 -0
- xinference/model/audio/cosyvoice.py +0 -1
- xinference/model/audio/kokoro.py +1 -1
- xinference/model/audio/kokoro_zh.py +124 -0
- xinference/model/audio/model_spec.json +64 -20
- xinference/model/embedding/flag/core.py +5 -0
- xinference/model/embedding/llama_cpp/core.py +22 -19
- xinference/model/embedding/sentence_transformers/core.py +19 -4
- xinference/model/embedding/vllm/core.py +40 -8
- xinference/model/image/cache_manager.py +56 -0
- xinference/model/image/core.py +9 -0
- xinference/model/image/model_spec.json +116 -9
- xinference/model/image/stable_diffusion/core.py +141 -31
- xinference/model/llm/core.py +10 -0
- xinference/model/llm/llama_cpp/core.py +42 -40
- xinference/model/llm/llm_family.json +435 -23
- xinference/model/llm/llm_family.py +1 -0
- xinference/model/llm/mlx/core.py +52 -33
- xinference/model/llm/sglang/core.py +2 -44
- xinference/model/llm/tool_parsers/__init__.py +58 -0
- xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +128 -0
- xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
- xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
- xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
- xinference/model/llm/transformers/core.py +6 -12
- xinference/model/llm/utils.py +128 -46
- xinference/model/llm/vllm/core.py +8 -61
- xinference/model/rerank/core.py +3 -0
- xinference/model/rerank/sentence_transformers/core.py +1 -1
- xinference/model/rerank/vllm/core.py +56 -6
- xinference/model/utils.py +1 -2
- xinference/model/video/model_spec.json +95 -1
- xinference/thirdparty/cosyvoice/bin/export_jit.py +3 -4
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +49 -126
- xinference/thirdparty/cosyvoice/bin/{inference.py → inference_deprecated.py} +1 -0
- xinference/thirdparty/cosyvoice/bin/train.py +23 -3
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +8 -4
- xinference/thirdparty/cosyvoice/cli/frontend.py +4 -4
- xinference/thirdparty/cosyvoice/cli/model.py +53 -75
- xinference/thirdparty/cosyvoice/dataset/dataset.py +5 -18
- xinference/thirdparty/cosyvoice/dataset/processor.py +24 -25
- xinference/thirdparty/cosyvoice/flow/decoder.py +24 -433
- xinference/thirdparty/cosyvoice/flow/flow.py +6 -14
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +33 -145
- xinference/thirdparty/cosyvoice/hifigan/generator.py +169 -1
- xinference/thirdparty/cosyvoice/llm/llm.py +108 -17
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +14 -115
- xinference/thirdparty/cosyvoice/utils/common.py +20 -0
- xinference/thirdparty/cosyvoice/utils/executor.py +8 -4
- xinference/thirdparty/cosyvoice/utils/file_utils.py +45 -1
- xinference/thirdparty/cosyvoice/utils/losses.py +37 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +35 -1
- xinference/thirdparty/cosyvoice/utils/train_utils.py +24 -6
- xinference/thirdparty/cosyvoice/vllm/cosyvoice2.py +103 -0
- xinference/types.py +105 -2
- xinference/ui/gradio/chat_interface.py +2 -0
- xinference/ui/gradio/media_interface.py +353 -7
- xinference/ui/web/ui/build/asset-manifest.json +3 -3
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +3 -0
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/3c5758bd12fa334294b1de0ff6b1a4bac8d963c45472eab9dc3e530d82aa6b3f.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +1 -0
- xinference/ui/web/ui/src/locales/en.json +2 -0
- xinference/ui/web/ui/src/locales/ja.json +2 -0
- xinference/ui/web/ui/src/locales/ko.json +2 -0
- xinference/ui/web/ui/src/locales/zh.json +2 -0
- {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/METADATA +16 -12
- {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/RECORD +86 -77
- xinference/ui/web/ui/build/static/js/main.4918643a.js +0 -3
- xinference/ui/web/ui/build/static/js/main.4918643a.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/3d2a89f0eccc1f90fc5036c9a1d587c2120e6a6b128aae31d1db7d6bad52722b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/89179f8f51887b9167721860a12412549ff04f78162e921a7b6aa6532646deb2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8e5cb82c2ff3299c6a44563fe6b1c5515c9750613c51bb63abee0b1d70fc5019.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/9dc5cfc67dd0617b0272aeef8651f1589b2155a4ff1fd72ad3166b217089b619.json +0 -1
- /xinference/ui/web/ui/build/static/js/{main.4918643a.js.LICENSE.txt → main.1086c759.js.LICENSE.txt} +0 -0
- {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/WHEEL +0 -0
- {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/top_level.txt +0 -0
xinference/types.py
CHANGED
|
@@ -351,6 +351,11 @@ class ModelAndPrompt(BaseModel):
|
|
|
351
351
|
prompt: str
|
|
352
352
|
|
|
353
353
|
|
|
354
|
+
class ModelAndMessages(BaseModel):
|
|
355
|
+
model: str
|
|
356
|
+
messages: List[Dict[str, Any]]
|
|
357
|
+
|
|
358
|
+
|
|
354
359
|
class CreateCompletionTorch(BaseModel):
|
|
355
360
|
echo: bool = echo_field
|
|
356
361
|
max_tokens: Optional[int] = max_tokens_field
|
|
@@ -371,7 +376,6 @@ class CreateCompletionTorch(BaseModel):
|
|
|
371
376
|
# This type is for openai API compatibility
|
|
372
377
|
CreateCompletionOpenAI: BaseModel
|
|
373
378
|
|
|
374
|
-
|
|
375
379
|
from openai.types.completion_create_params import CompletionCreateParamsNonStreaming
|
|
376
380
|
|
|
377
381
|
CreateCompletionOpenAI = create_model_from_typeddict(
|
|
@@ -395,7 +399,6 @@ class CreateChatModel(BaseModel):
|
|
|
395
399
|
# Currently, chat calls generates, so the params share the same one.
|
|
396
400
|
CreateChatCompletionTorch = CreateCompletionTorch
|
|
397
401
|
|
|
398
|
-
|
|
399
402
|
from ._compat import CreateChatCompletionOpenAI
|
|
400
403
|
|
|
401
404
|
|
|
@@ -462,3 +465,103 @@ class PeftModelConfig:
|
|
|
462
465
|
image_lora_load_kwargs=data.get("image_lora_load_kwargs"),
|
|
463
466
|
image_lora_fuse_kwargs=data.get("image_lora_fuse_kwargs"),
|
|
464
467
|
)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# This type is for Anthropic API compatibility
|
|
471
|
+
ANTHROPIC_AVAILABLE = False
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
from anthropic.types import ContentBlock, Usage
|
|
475
|
+
|
|
476
|
+
ANTHROPIC_AVAILABLE = True
|
|
477
|
+
except ImportError:
|
|
478
|
+
ContentBlock = None
|
|
479
|
+
Usage = None
|
|
480
|
+
|
|
481
|
+
# Use TYPE_CHECKING to avoid runtime issues with mypy
|
|
482
|
+
from typing import TYPE_CHECKING
|
|
483
|
+
|
|
484
|
+
if TYPE_CHECKING:
|
|
485
|
+
# For type checking, define the types as if Anthropic is available
|
|
486
|
+
from anthropic.types import ContentBlock as ContentBlock_
|
|
487
|
+
from anthropic.types import Usage as Usage_
|
|
488
|
+
|
|
489
|
+
class AnthropicMessage(TypedDict):
|
|
490
|
+
id: str
|
|
491
|
+
type: str
|
|
492
|
+
role: str
|
|
493
|
+
content: List[ContentBlock_]
|
|
494
|
+
model: str
|
|
495
|
+
stop_reason: str
|
|
496
|
+
stop_sequence: str
|
|
497
|
+
usage: Usage_
|
|
498
|
+
container: Dict[str, Any]
|
|
499
|
+
|
|
500
|
+
class MessageCreateParams(TypedDict):
|
|
501
|
+
model: str
|
|
502
|
+
messages: List[Dict[str, Any]]
|
|
503
|
+
max_tokens: int
|
|
504
|
+
stream: NotRequired[bool]
|
|
505
|
+
temperature: NotRequired[float]
|
|
506
|
+
top_p: NotRequired[float]
|
|
507
|
+
top_k: NotRequired[int]
|
|
508
|
+
stop_sequences: NotRequired[List[str]]
|
|
509
|
+
metadata: NotRequired[Dict[str, Any]]
|
|
510
|
+
tools: NotRequired[List[Dict[str, Any]]]
|
|
511
|
+
tool_choice: NotRequired[Union[str, Dict[str, Any]]]
|
|
512
|
+
|
|
513
|
+
CreateMessageAnthropic: BaseModel
|
|
514
|
+
|
|
515
|
+
class CreateMessage(
|
|
516
|
+
ModelAndMessages,
|
|
517
|
+
):
|
|
518
|
+
pass
|
|
519
|
+
|
|
520
|
+
else:
|
|
521
|
+
# Runtime definitions
|
|
522
|
+
if ANTHROPIC_AVAILABLE:
|
|
523
|
+
|
|
524
|
+
class AnthropicMessage(TypedDict):
|
|
525
|
+
id: str
|
|
526
|
+
type: str
|
|
527
|
+
role: str
|
|
528
|
+
content: List[ContentBlock]
|
|
529
|
+
model: str
|
|
530
|
+
stop_reason: str
|
|
531
|
+
stop_sequence: str
|
|
532
|
+
usage: Usage
|
|
533
|
+
container: Dict[str, Any]
|
|
534
|
+
|
|
535
|
+
class MessageCreateParams(TypedDict):
|
|
536
|
+
model: str
|
|
537
|
+
messages: List[Dict[str, Any]]
|
|
538
|
+
max_tokens: int
|
|
539
|
+
stream: NotRequired[bool]
|
|
540
|
+
temperature: NotRequired[float]
|
|
541
|
+
top_p: NotRequired[float]
|
|
542
|
+
top_k: NotRequired[int]
|
|
543
|
+
stop_sequences: NotRequired[List[str]]
|
|
544
|
+
metadata: NotRequired[Dict[str, Any]]
|
|
545
|
+
tools: NotRequired[List[Dict[str, Any]]]
|
|
546
|
+
tool_choice: NotRequired[Union[str, Dict[str, Any]]]
|
|
547
|
+
|
|
548
|
+
CreateMessageAnthropic: BaseModel = create_model_from_typeddict(
|
|
549
|
+
MessageCreateParams,
|
|
550
|
+
)
|
|
551
|
+
CreateMessageAnthropic = fix_forward_ref(CreateMessageAnthropic)
|
|
552
|
+
|
|
553
|
+
class CreateMessage(CreateMessageAnthropic):
|
|
554
|
+
pass
|
|
555
|
+
|
|
556
|
+
else:
|
|
557
|
+
# Define dummy types when Anthropic is not available
|
|
558
|
+
class AnthropicMessage:
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
class MessageCreateParams:
|
|
562
|
+
pass
|
|
563
|
+
|
|
564
|
+
CreateMessageAnthropic = None
|
|
565
|
+
|
|
566
|
+
class CreateMessage:
|
|
567
|
+
pass
|
|
@@ -224,6 +224,7 @@ class MediaInterface:
|
|
|
224
224
|
guidance_scale: int,
|
|
225
225
|
num_inference_steps: int,
|
|
226
226
|
padding_image_to_multiple: int,
|
|
227
|
+
strength: float,
|
|
227
228
|
sampler_name: Optional[str] = None,
|
|
228
229
|
progress=gr.Progress(),
|
|
229
230
|
) -> PIL.Image.Image:
|
|
@@ -243,6 +244,10 @@ class MediaInterface:
|
|
|
243
244
|
None if num_inference_steps == -1 else num_inference_steps # type: ignore
|
|
244
245
|
)
|
|
245
246
|
padding_image_to_multiple = None if padding_image_to_multiple == -1 else padding_image_to_multiple # type: ignore
|
|
247
|
+
# Initialize kwargs and handle strength parameter
|
|
248
|
+
kwargs = {}
|
|
249
|
+
if strength is not None:
|
|
250
|
+
kwargs["strength"] = strength
|
|
246
251
|
sampler_name = None if sampler_name == "default" else sampler_name
|
|
247
252
|
|
|
248
253
|
bio = io.BytesIO()
|
|
@@ -267,6 +272,7 @@ class MediaInterface:
|
|
|
267
272
|
guidance_scale=guidance_scale,
|
|
268
273
|
padding_image_to_multiple=padding_image_to_multiple,
|
|
269
274
|
sampler_name=sampler_name,
|
|
275
|
+
**kwargs,
|
|
270
276
|
)
|
|
271
277
|
except Exception as e:
|
|
272
278
|
exc = e
|
|
@@ -324,6 +330,9 @@ class MediaInterface:
|
|
|
324
330
|
padding_image_to_multiple = gr.Number(
|
|
325
331
|
label="Padding image to multiple", value=-1
|
|
326
332
|
)
|
|
333
|
+
strength = gr.Slider(
|
|
334
|
+
label="Strength", value=0.6, step=0.1, minimum=0.0, maximum=1.0
|
|
335
|
+
)
|
|
327
336
|
sampler_name = gr.Dropdown(
|
|
328
337
|
choices=SAMPLING_METHODS,
|
|
329
338
|
value="default",
|
|
@@ -348,12 +357,311 @@ class MediaInterface:
|
|
|
348
357
|
guidance_scale,
|
|
349
358
|
num_inference_steps,
|
|
350
359
|
padding_image_to_multiple,
|
|
360
|
+
strength,
|
|
351
361
|
sampler_name,
|
|
352
362
|
],
|
|
353
363
|
outputs=output_gallery,
|
|
354
364
|
)
|
|
355
365
|
return image2image_inteface
|
|
356
366
|
|
|
367
|
+
def inpainting_interface(self) -> "gr.Blocks":
|
|
368
|
+
from ...model.image.stable_diffusion.core import SAMPLING_METHODS
|
|
369
|
+
|
|
370
|
+
def preview_mask(
|
|
371
|
+
image_editor_output: Dict[str, Any],
|
|
372
|
+
) -> PIL.Image.Image:
|
|
373
|
+
"""Preview the generated mask without submitting inpainting task"""
|
|
374
|
+
# Extract original image and mask from ImageEditor output
|
|
375
|
+
if not image_editor_output or "background" not in image_editor_output:
|
|
376
|
+
return PIL.Image.new(
|
|
377
|
+
"L", (512, 512), 0
|
|
378
|
+
) # Return black image if no input
|
|
379
|
+
|
|
380
|
+
# Get the original image (background)
|
|
381
|
+
original_image = image_editor_output["background"]
|
|
382
|
+
|
|
383
|
+
# Get the composite image which contains the edits
|
|
384
|
+
composite_image = image_editor_output.get("composite", original_image)
|
|
385
|
+
|
|
386
|
+
# Create mask from the differences between original and composite
|
|
387
|
+
# White areas in composite indicate regions to inpaint
|
|
388
|
+
if original_image.mode != "RGB":
|
|
389
|
+
original_image = original_image.convert("RGB")
|
|
390
|
+
if composite_image.mode != "RGB":
|
|
391
|
+
composite_image = composite_image.convert("RGB")
|
|
392
|
+
|
|
393
|
+
# Create mask by finding differences (white drawn areas)
|
|
394
|
+
mask_image = PIL.Image.new("L", original_image.size, 0)
|
|
395
|
+
orig_data = original_image.load()
|
|
396
|
+
comp_data = composite_image.load()
|
|
397
|
+
mask_data = mask_image.load()
|
|
398
|
+
|
|
399
|
+
for y in range(original_image.size[1]):
|
|
400
|
+
for x in range(original_image.size[0]):
|
|
401
|
+
orig_pixel = orig_data[x, y]
|
|
402
|
+
comp_pixel = comp_data[x, y]
|
|
403
|
+
# If pixels are different, assume it's a drawn area (white for inpainting)
|
|
404
|
+
if orig_pixel != comp_pixel:
|
|
405
|
+
mask_data[x, y] = 255 # White for inpainting
|
|
406
|
+
|
|
407
|
+
return mask_image
|
|
408
|
+
|
|
409
|
+
def process_inpainting(
|
|
410
|
+
prompt: str,
|
|
411
|
+
negative_prompt: str,
|
|
412
|
+
image_editor_output: Dict[str, Any],
|
|
413
|
+
uploaded_mask: Optional[PIL.Image.Image],
|
|
414
|
+
n: int,
|
|
415
|
+
size_width: int,
|
|
416
|
+
size_height: int,
|
|
417
|
+
guidance_scale: int,
|
|
418
|
+
num_inference_steps: int,
|
|
419
|
+
padding_image_to_multiple: int,
|
|
420
|
+
strength: float,
|
|
421
|
+
sampler_name: Optional[str] = None,
|
|
422
|
+
progress=gr.Progress(),
|
|
423
|
+
) -> List[PIL.Image.Image]:
|
|
424
|
+
from ...client import RESTfulClient
|
|
425
|
+
|
|
426
|
+
client = RESTfulClient(self.endpoint)
|
|
427
|
+
client._set_token(self.access_token)
|
|
428
|
+
model = client.get_model(self.model_uid)
|
|
429
|
+
assert isinstance(model, RESTfulImageModelHandle)
|
|
430
|
+
|
|
431
|
+
if size_width > 0 and size_height > 0:
|
|
432
|
+
size = f"{int(size_width)}*{int(size_height)}"
|
|
433
|
+
else:
|
|
434
|
+
size = None
|
|
435
|
+
guidance_scale = None if guidance_scale == -1 else guidance_scale # type: ignore
|
|
436
|
+
num_inference_steps = (
|
|
437
|
+
None if num_inference_steps == -1 else num_inference_steps # type: ignore
|
|
438
|
+
)
|
|
439
|
+
padding_image_to_multiple = None if padding_image_to_multiple == -1 else padding_image_to_multiple # type: ignore
|
|
440
|
+
# Initialize kwargs and handle strength parameter
|
|
441
|
+
kwargs = {}
|
|
442
|
+
if strength is not None:
|
|
443
|
+
kwargs["strength"] = strength
|
|
444
|
+
sampler_name = None if sampler_name == "default" else sampler_name
|
|
445
|
+
|
|
446
|
+
# Get the original image for inpainting
|
|
447
|
+
if not image_editor_output or "background" not in image_editor_output:
|
|
448
|
+
raise ValueError("Please upload and edit an image first")
|
|
449
|
+
original_image = image_editor_output["background"]
|
|
450
|
+
|
|
451
|
+
# Convert original image to RGB if needed
|
|
452
|
+
if original_image.mode == "RGBA":
|
|
453
|
+
# Create a white background and paste the RGBA image onto it
|
|
454
|
+
rgb_image = PIL.Image.new("RGB", original_image.size, (255, 255, 255))
|
|
455
|
+
rgb_image.paste(
|
|
456
|
+
original_image, mask=original_image.split()[3]
|
|
457
|
+
) # Use alpha channel as mask
|
|
458
|
+
original_image = rgb_image
|
|
459
|
+
elif original_image.mode != "RGB":
|
|
460
|
+
original_image = original_image.convert("RGB")
|
|
461
|
+
|
|
462
|
+
# Assert that original image is RGB format
|
|
463
|
+
assert (
|
|
464
|
+
original_image.mode == "RGB"
|
|
465
|
+
), f"Expected RGB image, got {original_image.mode}"
|
|
466
|
+
|
|
467
|
+
# Use uploaded mask if provided, otherwise generate from editor
|
|
468
|
+
if uploaded_mask is not None:
|
|
469
|
+
mask_image = uploaded_mask
|
|
470
|
+
|
|
471
|
+
# Convert RGBA to RGB if needed
|
|
472
|
+
if mask_image.mode == "RGBA":
|
|
473
|
+
# Create a white background and paste the RGBA image onto it
|
|
474
|
+
rgb_mask = PIL.Image.new("RGB", mask_image.size, (255, 255, 255))
|
|
475
|
+
rgb_mask.paste(
|
|
476
|
+
mask_image, mask=(mask_image.split()[3])
|
|
477
|
+
) # Use alpha channel as mask
|
|
478
|
+
mask_image = rgb_mask
|
|
479
|
+
elif mask_image.mode != "RGB":
|
|
480
|
+
mask_image = mask_image.convert("RGB")
|
|
481
|
+
|
|
482
|
+
# Ensure mask is the same size as original image
|
|
483
|
+
if mask_image.size != original_image.size:
|
|
484
|
+
mask_image = mask_image.resize(original_image.size)
|
|
485
|
+
|
|
486
|
+
# Assert that mask image is RGB format
|
|
487
|
+
assert (
|
|
488
|
+
mask_image.mode == "RGB"
|
|
489
|
+
), f"Expected RGB mask, got {mask_image.mode}"
|
|
490
|
+
else:
|
|
491
|
+
# Generate mask using the preview function
|
|
492
|
+
mask_image = preview_mask(image_editor_output)
|
|
493
|
+
# Assert that generated mask is L format (grayscale)
|
|
494
|
+
assert mask_image.mode == "L", f"Expected L mask, got {mask_image.mode}"
|
|
495
|
+
|
|
496
|
+
bio = io.BytesIO()
|
|
497
|
+
original_image.save(bio, format="png")
|
|
498
|
+
|
|
499
|
+
mask_bio = io.BytesIO()
|
|
500
|
+
mask_image.save(mask_bio, format="png")
|
|
501
|
+
|
|
502
|
+
response = None
|
|
503
|
+
exc = None
|
|
504
|
+
request_id = str(uuid.uuid4())
|
|
505
|
+
|
|
506
|
+
def run_in_thread():
|
|
507
|
+
nonlocal exc, response
|
|
508
|
+
try:
|
|
509
|
+
response = model.inpainting(
|
|
510
|
+
request_id=request_id,
|
|
511
|
+
prompt=prompt,
|
|
512
|
+
negative_prompt=negative_prompt,
|
|
513
|
+
n=n,
|
|
514
|
+
image=bio.getvalue(),
|
|
515
|
+
mask_image=mask_bio.getvalue(),
|
|
516
|
+
size=size,
|
|
517
|
+
response_format="b64_json",
|
|
518
|
+
num_inference_steps=num_inference_steps,
|
|
519
|
+
guidance_scale=guidance_scale,
|
|
520
|
+
padding_image_to_multiple=padding_image_to_multiple,
|
|
521
|
+
sampler_name=sampler_name,
|
|
522
|
+
**kwargs,
|
|
523
|
+
)
|
|
524
|
+
except Exception as e:
|
|
525
|
+
exc = e
|
|
526
|
+
|
|
527
|
+
t = threading.Thread(target=run_in_thread)
|
|
528
|
+
t.start()
|
|
529
|
+
while t.is_alive():
|
|
530
|
+
try:
|
|
531
|
+
cur_progress = client.get_progress(request_id)["progress"]
|
|
532
|
+
except (KeyError, RuntimeError):
|
|
533
|
+
cur_progress = 0.0
|
|
534
|
+
|
|
535
|
+
progress(cur_progress, desc="Inpainting images")
|
|
536
|
+
time.sleep(1)
|
|
537
|
+
|
|
538
|
+
if exc:
|
|
539
|
+
raise exc
|
|
540
|
+
|
|
541
|
+
images = []
|
|
542
|
+
for image_dict in response["data"]: # type: ignore
|
|
543
|
+
assert image_dict["b64_json"] is not None
|
|
544
|
+
image_data = base64.b64decode(image_dict["b64_json"])
|
|
545
|
+
image = PIL.Image.open(io.BytesIO(image_data))
|
|
546
|
+
images.append(image)
|
|
547
|
+
|
|
548
|
+
return images
|
|
549
|
+
|
|
550
|
+
with gr.Blocks() as inpainting_interface:
|
|
551
|
+
with gr.Column():
|
|
552
|
+
with gr.Row():
|
|
553
|
+
with gr.Column(scale=10):
|
|
554
|
+
prompt = gr.Textbox(
|
|
555
|
+
label="Prompt",
|
|
556
|
+
show_label=True,
|
|
557
|
+
placeholder="Enter prompt here...",
|
|
558
|
+
)
|
|
559
|
+
negative_prompt = gr.Textbox(
|
|
560
|
+
label="Negative Prompt",
|
|
561
|
+
show_label=True,
|
|
562
|
+
placeholder="Enter negative prompt here...",
|
|
563
|
+
)
|
|
564
|
+
with gr.Column(scale=1):
|
|
565
|
+
generate_button = gr.Button("Generate")
|
|
566
|
+
|
|
567
|
+
with gr.Row():
|
|
568
|
+
n = gr.Number(label="Number of image", value=1)
|
|
569
|
+
size_width = gr.Number(label="Width", value=-1)
|
|
570
|
+
size_height = gr.Number(label="Height", value=-1)
|
|
571
|
+
|
|
572
|
+
with gr.Row():
|
|
573
|
+
guidance_scale = gr.Number(label="Guidance scale", value=-1)
|
|
574
|
+
num_inference_steps = gr.Number(
|
|
575
|
+
label="Inference Step Number", value=-1
|
|
576
|
+
)
|
|
577
|
+
padding_image_to_multiple = gr.Number(
|
|
578
|
+
label="Padding image to multiple", value=-1
|
|
579
|
+
)
|
|
580
|
+
strength = gr.Slider(
|
|
581
|
+
label="Strength", value=0.6, step=0.1, minimum=0.0, maximum=1.0
|
|
582
|
+
)
|
|
583
|
+
sampler_name = gr.Dropdown(
|
|
584
|
+
choices=SAMPLING_METHODS,
|
|
585
|
+
value="default",
|
|
586
|
+
label="Sampling method",
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
with gr.Row():
|
|
590
|
+
with gr.Column(scale=2):
|
|
591
|
+
image_editor = gr.ImageEditor(
|
|
592
|
+
type="pil",
|
|
593
|
+
label="Edit Image and Create Mask (Draw white areas to inpaint)",
|
|
594
|
+
interactive=True,
|
|
595
|
+
height=400,
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
# Mask controls below the editor
|
|
599
|
+
with gr.Row():
|
|
600
|
+
preview_button = gr.Button("Preview Mask", size="sm")
|
|
601
|
+
upload_mask = gr.Image(
|
|
602
|
+
type="pil",
|
|
603
|
+
label="Or upload mask image directly",
|
|
604
|
+
interactive=True,
|
|
605
|
+
)
|
|
606
|
+
with gr.Row():
|
|
607
|
+
mask_output = gr.Image(
|
|
608
|
+
label="Current Mask Preview",
|
|
609
|
+
interactive=False,
|
|
610
|
+
height=200,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
with gr.Column(scale=1):
|
|
614
|
+
gr.Markdown("### Inpainting Results")
|
|
615
|
+
output_gallery = gr.Gallery()
|
|
616
|
+
|
|
617
|
+
preview_button.click(
|
|
618
|
+
preview_mask,
|
|
619
|
+
inputs=[image_editor],
|
|
620
|
+
outputs=[mask_output],
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
# When user uploads a mask, display it
|
|
624
|
+
def process_uploaded_mask(
|
|
625
|
+
mask: Optional[PIL.Image.Image],
|
|
626
|
+
) -> PIL.Image.Image:
|
|
627
|
+
if mask is None:
|
|
628
|
+
return PIL.Image.new("L", (512, 512), 0)
|
|
629
|
+
|
|
630
|
+
# Convert RGBA to grayscale for preview
|
|
631
|
+
if mask.mode == "RGBA":
|
|
632
|
+
# Use alpha channel for mask preview
|
|
633
|
+
alpha = mask.split()[3]
|
|
634
|
+
mask = alpha.convert("L")
|
|
635
|
+
elif mask.mode != "L":
|
|
636
|
+
# Convert to grayscale
|
|
637
|
+
mask = mask.convert("L")
|
|
638
|
+
|
|
639
|
+
return mask
|
|
640
|
+
|
|
641
|
+
upload_mask.change(
|
|
642
|
+
process_uploaded_mask, inputs=[upload_mask], outputs=[mask_output]
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
generate_button.click(
|
|
646
|
+
process_inpainting,
|
|
647
|
+
inputs=[
|
|
648
|
+
prompt,
|
|
649
|
+
negative_prompt,
|
|
650
|
+
image_editor,
|
|
651
|
+
upload_mask,
|
|
652
|
+
n,
|
|
653
|
+
size_width,
|
|
654
|
+
size_height,
|
|
655
|
+
guidance_scale,
|
|
656
|
+
num_inference_steps,
|
|
657
|
+
padding_image_to_multiple,
|
|
658
|
+
strength,
|
|
659
|
+
sampler_name,
|
|
660
|
+
],
|
|
661
|
+
outputs=[output_gallery],
|
|
662
|
+
)
|
|
663
|
+
return inpainting_interface
|
|
664
|
+
|
|
357
665
|
def text2video_interface(self) -> "gr.Blocks":
|
|
358
666
|
def text_generate_video(
|
|
359
667
|
prompt: str,
|
|
@@ -797,8 +1105,24 @@ class MediaInterface:
|
|
|
797
1105
|
|
|
798
1106
|
return audio_path
|
|
799
1107
|
|
|
1108
|
+
# Determine model abilities
|
|
1109
|
+
supports_basic_tts = "text2audio" in self.model_ability
|
|
1110
|
+
supports_zero_shot = "text2audio_zero_shot" in self.model_ability
|
|
1111
|
+
supports_voice_cloning = "text2audio_voice_cloning" in self.model_ability
|
|
1112
|
+
|
|
1113
|
+
# Show ability info
|
|
1114
|
+
ability_info = []
|
|
1115
|
+
if supports_basic_tts:
|
|
1116
|
+
ability_info.append("✅ Basic TTS (text-to-speech)")
|
|
1117
|
+
if supports_zero_shot:
|
|
1118
|
+
ability_info.append("✅ Zero-shot TTS (voice selection)")
|
|
1119
|
+
if supports_voice_cloning:
|
|
1120
|
+
ability_info.append("✅ Voice Cloning (requires reference audio)")
|
|
1121
|
+
|
|
800
1122
|
# Gradio UI
|
|
801
1123
|
with gr.Blocks() as tts_ui:
|
|
1124
|
+
gr.Markdown(f"**Model Abilities:**\n{chr(10).join(ability_info)}")
|
|
1125
|
+
|
|
802
1126
|
with gr.Row():
|
|
803
1127
|
with gr.Column():
|
|
804
1128
|
input_text = gr.Textbox(
|
|
@@ -811,13 +1135,32 @@ class MediaInterface:
|
|
|
811
1135
|
label="Speed", minimum=0.5, maximum=2.0, value=1.0, step=0.1
|
|
812
1136
|
)
|
|
813
1137
|
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
1138
|
+
# Show voice cloning controls if supported
|
|
1139
|
+
if supports_voice_cloning:
|
|
1140
|
+
gr.Markdown("---\n**Voice Cloning Options**")
|
|
1141
|
+
# Make voice cloning required if model doesn't support zero-shot
|
|
1142
|
+
if supports_zero_shot:
|
|
1143
|
+
prompt_speech = gr.Audio(
|
|
1144
|
+
label="Prompt Speech (for cloning, optional)",
|
|
1145
|
+
type="filepath",
|
|
1146
|
+
)
|
|
1147
|
+
prompt_text = gr.Textbox(
|
|
1148
|
+
label="Prompt Text (for cloning, optional)",
|
|
1149
|
+
placeholder="Text of the prompt speech",
|
|
1150
|
+
)
|
|
1151
|
+
else:
|
|
1152
|
+
prompt_speech = gr.Audio(
|
|
1153
|
+
label="Prompt Speech (for cloning, required)",
|
|
1154
|
+
type="filepath",
|
|
1155
|
+
)
|
|
1156
|
+
prompt_text = gr.Textbox(
|
|
1157
|
+
label="Prompt Text (for cloning, optional)",
|
|
1158
|
+
placeholder="Text of the prompt speech (optional)",
|
|
1159
|
+
)
|
|
1160
|
+
else:
|
|
1161
|
+
# Hidden components for API compatibility
|
|
1162
|
+
prompt_speech = gr.Audio(visible=False)
|
|
1163
|
+
prompt_text = gr.Textbox(visible=False)
|
|
821
1164
|
|
|
822
1165
|
generate = gr.Button("Generate")
|
|
823
1166
|
|
|
@@ -871,6 +1214,9 @@ class MediaInterface:
|
|
|
871
1214
|
if "image2image" in self.model_ability:
|
|
872
1215
|
with gr.Tab("Image to Image"):
|
|
873
1216
|
self.image2image_interface()
|
|
1217
|
+
if "inpainting" in self.model_ability:
|
|
1218
|
+
with gr.Tab("Inpainting"):
|
|
1219
|
+
self.inpainting_interface()
|
|
874
1220
|
if "text2video" in self.model_ability:
|
|
875
1221
|
with gr.Tab("Text to Video"):
|
|
876
1222
|
self.text2video_interface()
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
3
|
"main.css": "./static/css/main.013f296b.css",
|
|
4
|
-
"main.js": "./static/js/main.
|
|
4
|
+
"main.js": "./static/js/main.1086c759.js",
|
|
5
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
6
6
|
"index.html": "./index.html",
|
|
7
7
|
"main.013f296b.css.map": "./static/css/main.013f296b.css.map",
|
|
8
|
-
"main.
|
|
8
|
+
"main.1086c759.js.map": "./static/js/main.1086c759.js.map"
|
|
9
9
|
},
|
|
10
10
|
"entrypoints": [
|
|
11
11
|
"static/css/main.013f296b.css",
|
|
12
|
-
"static/js/main.
|
|
12
|
+
"static/js/main.1086c759.js"
|
|
13
13
|
]
|
|
14
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.1086c759.js"></script><link href="./static/css/main.013f296b.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|