xinference 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (92) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +415 -1
  3. xinference/constants.py +2 -0
  4. xinference/core/model.py +3 -4
  5. xinference/core/supervisor.py +29 -1
  6. xinference/core/worker.py +4 -1
  7. xinference/deploy/cmdline.py +2 -0
  8. xinference/deploy/test/test_cmdline.py +1 -1
  9. xinference/model/audio/core.py +5 -0
  10. xinference/model/audio/cosyvoice.py +0 -1
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +64 -20
  14. xinference/model/embedding/flag/core.py +5 -0
  15. xinference/model/embedding/llama_cpp/core.py +22 -19
  16. xinference/model/embedding/sentence_transformers/core.py +19 -4
  17. xinference/model/embedding/vllm/core.py +40 -8
  18. xinference/model/image/cache_manager.py +56 -0
  19. xinference/model/image/core.py +9 -0
  20. xinference/model/image/model_spec.json +116 -9
  21. xinference/model/image/stable_diffusion/core.py +141 -31
  22. xinference/model/llm/core.py +10 -0
  23. xinference/model/llm/llama_cpp/core.py +42 -40
  24. xinference/model/llm/llm_family.json +435 -23
  25. xinference/model/llm/llm_family.py +1 -0
  26. xinference/model/llm/mlx/core.py +52 -33
  27. xinference/model/llm/sglang/core.py +2 -44
  28. xinference/model/llm/tool_parsers/__init__.py +58 -0
  29. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  30. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +128 -0
  31. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  32. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  33. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  34. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  35. xinference/model/llm/transformers/core.py +6 -12
  36. xinference/model/llm/utils.py +128 -46
  37. xinference/model/llm/vllm/core.py +8 -61
  38. xinference/model/rerank/core.py +3 -0
  39. xinference/model/rerank/sentence_transformers/core.py +1 -1
  40. xinference/model/rerank/vllm/core.py +56 -6
  41. xinference/model/utils.py +1 -2
  42. xinference/model/video/model_spec.json +95 -1
  43. xinference/thirdparty/cosyvoice/bin/export_jit.py +3 -4
  44. xinference/thirdparty/cosyvoice/bin/export_onnx.py +49 -126
  45. xinference/thirdparty/cosyvoice/bin/{inference.py → inference_deprecated.py} +1 -0
  46. xinference/thirdparty/cosyvoice/bin/train.py +23 -3
  47. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +8 -4
  48. xinference/thirdparty/cosyvoice/cli/frontend.py +4 -4
  49. xinference/thirdparty/cosyvoice/cli/model.py +53 -75
  50. xinference/thirdparty/cosyvoice/dataset/dataset.py +5 -18
  51. xinference/thirdparty/cosyvoice/dataset/processor.py +24 -25
  52. xinference/thirdparty/cosyvoice/flow/decoder.py +24 -433
  53. xinference/thirdparty/cosyvoice/flow/flow.py +6 -14
  54. xinference/thirdparty/cosyvoice/flow/flow_matching.py +33 -145
  55. xinference/thirdparty/cosyvoice/hifigan/generator.py +169 -1
  56. xinference/thirdparty/cosyvoice/llm/llm.py +108 -17
  57. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +14 -115
  58. xinference/thirdparty/cosyvoice/utils/common.py +20 -0
  59. xinference/thirdparty/cosyvoice/utils/executor.py +8 -4
  60. xinference/thirdparty/cosyvoice/utils/file_utils.py +45 -1
  61. xinference/thirdparty/cosyvoice/utils/losses.py +37 -0
  62. xinference/thirdparty/cosyvoice/utils/mask.py +35 -1
  63. xinference/thirdparty/cosyvoice/utils/train_utils.py +24 -6
  64. xinference/thirdparty/cosyvoice/vllm/cosyvoice2.py +103 -0
  65. xinference/types.py +105 -2
  66. xinference/ui/gradio/chat_interface.py +2 -0
  67. xinference/ui/gradio/media_interface.py +353 -7
  68. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  69. xinference/ui/web/ui/build/index.html +1 -1
  70. xinference/ui/web/ui/build/static/js/main.1086c759.js +3 -0
  71. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +1 -0
  72. xinference/ui/web/ui/node_modules/.cache/babel-loader/3c5758bd12fa334294b1de0ff6b1a4bac8d963c45472eab9dc3e530d82aa6b3f.json +1 -0
  73. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +1 -0
  74. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +1 -0
  75. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +1 -0
  76. xinference/ui/web/ui/src/locales/en.json +2 -0
  77. xinference/ui/web/ui/src/locales/ja.json +2 -0
  78. xinference/ui/web/ui/src/locales/ko.json +2 -0
  79. xinference/ui/web/ui/src/locales/zh.json +2 -0
  80. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/METADATA +16 -12
  81. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/RECORD +86 -77
  82. xinference/ui/web/ui/build/static/js/main.4918643a.js +0 -3
  83. xinference/ui/web/ui/build/static/js/main.4918643a.js.map +0 -1
  84. xinference/ui/web/ui/node_modules/.cache/babel-loader/3d2a89f0eccc1f90fc5036c9a1d587c2120e6a6b128aae31d1db7d6bad52722b.json +0 -1
  85. xinference/ui/web/ui/node_modules/.cache/babel-loader/89179f8f51887b9167721860a12412549ff04f78162e921a7b6aa6532646deb2.json +0 -1
  86. xinference/ui/web/ui/node_modules/.cache/babel-loader/8e5cb82c2ff3299c6a44563fe6b1c5515c9750613c51bb63abee0b1d70fc5019.json +0 -1
  87. xinference/ui/web/ui/node_modules/.cache/babel-loader/9dc5cfc67dd0617b0272aeef8651f1589b2155a4ff1fd72ad3166b217089b619.json +0 -1
  88. /xinference/ui/web/ui/build/static/js/{main.4918643a.js.LICENSE.txt → main.1086c759.js.LICENSE.txt} +0 -0
  89. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/WHEEL +0 -0
  90. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/entry_points.txt +0 -0
  91. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/licenses/LICENSE +0 -0
  92. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/top_level.txt +0 -0
xinference/types.py CHANGED
@@ -351,6 +351,11 @@ class ModelAndPrompt(BaseModel):
351
351
  prompt: str
352
352
 
353
353
 
354
+ class ModelAndMessages(BaseModel):
355
+ model: str
356
+ messages: List[Dict[str, Any]]
357
+
358
+
354
359
  class CreateCompletionTorch(BaseModel):
355
360
  echo: bool = echo_field
356
361
  max_tokens: Optional[int] = max_tokens_field
@@ -371,7 +376,6 @@ class CreateCompletionTorch(BaseModel):
371
376
  # This type is for openai API compatibility
372
377
  CreateCompletionOpenAI: BaseModel
373
378
 
374
-
375
379
  from openai.types.completion_create_params import CompletionCreateParamsNonStreaming
376
380
 
377
381
  CreateCompletionOpenAI = create_model_from_typeddict(
@@ -395,7 +399,6 @@ class CreateChatModel(BaseModel):
395
399
  # Currently, chat calls generates, so the params share the same one.
396
400
  CreateChatCompletionTorch = CreateCompletionTorch
397
401
 
398
-
399
402
  from ._compat import CreateChatCompletionOpenAI
400
403
 
401
404
 
@@ -462,3 +465,103 @@ class PeftModelConfig:
462
465
  image_lora_load_kwargs=data.get("image_lora_load_kwargs"),
463
466
  image_lora_fuse_kwargs=data.get("image_lora_fuse_kwargs"),
464
467
  )
468
+
469
+
470
+ # This type is for Anthropic API compatibility
471
+ ANTHROPIC_AVAILABLE = False
472
+
473
+ try:
474
+ from anthropic.types import ContentBlock, Usage
475
+
476
+ ANTHROPIC_AVAILABLE = True
477
+ except ImportError:
478
+ ContentBlock = None
479
+ Usage = None
480
+
481
+ # Use TYPE_CHECKING to avoid runtime issues with mypy
482
+ from typing import TYPE_CHECKING
483
+
484
+ if TYPE_CHECKING:
485
+ # For type checking, define the types as if Anthropic is available
486
+ from anthropic.types import ContentBlock as ContentBlock_
487
+ from anthropic.types import Usage as Usage_
488
+
489
+ class AnthropicMessage(TypedDict):
490
+ id: str
491
+ type: str
492
+ role: str
493
+ content: List[ContentBlock_]
494
+ model: str
495
+ stop_reason: str
496
+ stop_sequence: str
497
+ usage: Usage_
498
+ container: Dict[str, Any]
499
+
500
+ class MessageCreateParams(TypedDict):
501
+ model: str
502
+ messages: List[Dict[str, Any]]
503
+ max_tokens: int
504
+ stream: NotRequired[bool]
505
+ temperature: NotRequired[float]
506
+ top_p: NotRequired[float]
507
+ top_k: NotRequired[int]
508
+ stop_sequences: NotRequired[List[str]]
509
+ metadata: NotRequired[Dict[str, Any]]
510
+ tools: NotRequired[List[Dict[str, Any]]]
511
+ tool_choice: NotRequired[Union[str, Dict[str, Any]]]
512
+
513
+ CreateMessageAnthropic: BaseModel
514
+
515
+ class CreateMessage(
516
+ ModelAndMessages,
517
+ ):
518
+ pass
519
+
520
+ else:
521
+ # Runtime definitions
522
+ if ANTHROPIC_AVAILABLE:
523
+
524
+ class AnthropicMessage(TypedDict):
525
+ id: str
526
+ type: str
527
+ role: str
528
+ content: List[ContentBlock]
529
+ model: str
530
+ stop_reason: str
531
+ stop_sequence: str
532
+ usage: Usage
533
+ container: Dict[str, Any]
534
+
535
+ class MessageCreateParams(TypedDict):
536
+ model: str
537
+ messages: List[Dict[str, Any]]
538
+ max_tokens: int
539
+ stream: NotRequired[bool]
540
+ temperature: NotRequired[float]
541
+ top_p: NotRequired[float]
542
+ top_k: NotRequired[int]
543
+ stop_sequences: NotRequired[List[str]]
544
+ metadata: NotRequired[Dict[str, Any]]
545
+ tools: NotRequired[List[Dict[str, Any]]]
546
+ tool_choice: NotRequired[Union[str, Dict[str, Any]]]
547
+
548
+ CreateMessageAnthropic: BaseModel = create_model_from_typeddict(
549
+ MessageCreateParams,
550
+ )
551
+ CreateMessageAnthropic = fix_forward_ref(CreateMessageAnthropic)
552
+
553
+ class CreateMessage(CreateMessageAnthropic):
554
+ pass
555
+
556
+ else:
557
+ # Define dummy types when Anthropic is not available
558
+ class AnthropicMessage:
559
+ pass
560
+
561
+ class MessageCreateParams:
562
+ pass
563
+
564
+ CreateMessageAnthropic = None
565
+
566
+ class CreateMessage:
567
+ pass
@@ -135,6 +135,8 @@ class GradioInterface:
135
135
  generate_config=generate_config, # type: ignore
136
136
  ):
137
137
  assert isinstance(chunk, dict)
138
+ if not chunk["choices"]:
139
+ continue
138
140
  delta = chunk["choices"][0]["delta"]
139
141
 
140
142
  if (
@@ -224,6 +224,7 @@ class MediaInterface:
224
224
  guidance_scale: int,
225
225
  num_inference_steps: int,
226
226
  padding_image_to_multiple: int,
227
+ strength: float,
227
228
  sampler_name: Optional[str] = None,
228
229
  progress=gr.Progress(),
229
230
  ) -> PIL.Image.Image:
@@ -243,6 +244,10 @@ class MediaInterface:
243
244
  None if num_inference_steps == -1 else num_inference_steps # type: ignore
244
245
  )
245
246
  padding_image_to_multiple = None if padding_image_to_multiple == -1 else padding_image_to_multiple # type: ignore
247
+ # Initialize kwargs and handle strength parameter
248
+ kwargs = {}
249
+ if strength is not None:
250
+ kwargs["strength"] = strength
246
251
  sampler_name = None if sampler_name == "default" else sampler_name
247
252
 
248
253
  bio = io.BytesIO()
@@ -267,6 +272,7 @@ class MediaInterface:
267
272
  guidance_scale=guidance_scale,
268
273
  padding_image_to_multiple=padding_image_to_multiple,
269
274
  sampler_name=sampler_name,
275
+ **kwargs,
270
276
  )
271
277
  except Exception as e:
272
278
  exc = e
@@ -324,6 +330,9 @@ class MediaInterface:
324
330
  padding_image_to_multiple = gr.Number(
325
331
  label="Padding image to multiple", value=-1
326
332
  )
333
+ strength = gr.Slider(
334
+ label="Strength", value=0.6, step=0.1, minimum=0.0, maximum=1.0
335
+ )
327
336
  sampler_name = gr.Dropdown(
328
337
  choices=SAMPLING_METHODS,
329
338
  value="default",
@@ -348,12 +357,311 @@ class MediaInterface:
348
357
  guidance_scale,
349
358
  num_inference_steps,
350
359
  padding_image_to_multiple,
360
+ strength,
351
361
  sampler_name,
352
362
  ],
353
363
  outputs=output_gallery,
354
364
  )
355
365
  return image2image_inteface
356
366
 
367
+ def inpainting_interface(self) -> "gr.Blocks":
368
+ from ...model.image.stable_diffusion.core import SAMPLING_METHODS
369
+
370
+ def preview_mask(
371
+ image_editor_output: Dict[str, Any],
372
+ ) -> PIL.Image.Image:
373
+ """Preview the generated mask without submitting inpainting task"""
374
+ # Extract original image and mask from ImageEditor output
375
+ if not image_editor_output or "background" not in image_editor_output:
376
+ return PIL.Image.new(
377
+ "L", (512, 512), 0
378
+ ) # Return black image if no input
379
+
380
+ # Get the original image (background)
381
+ original_image = image_editor_output["background"]
382
+
383
+ # Get the composite image which contains the edits
384
+ composite_image = image_editor_output.get("composite", original_image)
385
+
386
+ # Create mask from the differences between original and composite
387
+ # White areas in composite indicate regions to inpaint
388
+ if original_image.mode != "RGB":
389
+ original_image = original_image.convert("RGB")
390
+ if composite_image.mode != "RGB":
391
+ composite_image = composite_image.convert("RGB")
392
+
393
+ # Create mask by finding differences (white drawn areas)
394
+ mask_image = PIL.Image.new("L", original_image.size, 0)
395
+ orig_data = original_image.load()
396
+ comp_data = composite_image.load()
397
+ mask_data = mask_image.load()
398
+
399
+ for y in range(original_image.size[1]):
400
+ for x in range(original_image.size[0]):
401
+ orig_pixel = orig_data[x, y]
402
+ comp_pixel = comp_data[x, y]
403
+ # If pixels are different, assume it's a drawn area (white for inpainting)
404
+ if orig_pixel != comp_pixel:
405
+ mask_data[x, y] = 255 # White for inpainting
406
+
407
+ return mask_image
408
+
409
+ def process_inpainting(
410
+ prompt: str,
411
+ negative_prompt: str,
412
+ image_editor_output: Dict[str, Any],
413
+ uploaded_mask: Optional[PIL.Image.Image],
414
+ n: int,
415
+ size_width: int,
416
+ size_height: int,
417
+ guidance_scale: int,
418
+ num_inference_steps: int,
419
+ padding_image_to_multiple: int,
420
+ strength: float,
421
+ sampler_name: Optional[str] = None,
422
+ progress=gr.Progress(),
423
+ ) -> List[PIL.Image.Image]:
424
+ from ...client import RESTfulClient
425
+
426
+ client = RESTfulClient(self.endpoint)
427
+ client._set_token(self.access_token)
428
+ model = client.get_model(self.model_uid)
429
+ assert isinstance(model, RESTfulImageModelHandle)
430
+
431
+ if size_width > 0 and size_height > 0:
432
+ size = f"{int(size_width)}*{int(size_height)}"
433
+ else:
434
+ size = None
435
+ guidance_scale = None if guidance_scale == -1 else guidance_scale # type: ignore
436
+ num_inference_steps = (
437
+ None if num_inference_steps == -1 else num_inference_steps # type: ignore
438
+ )
439
+ padding_image_to_multiple = None if padding_image_to_multiple == -1 else padding_image_to_multiple # type: ignore
440
+ # Initialize kwargs and handle strength parameter
441
+ kwargs = {}
442
+ if strength is not None:
443
+ kwargs["strength"] = strength
444
+ sampler_name = None if sampler_name == "default" else sampler_name
445
+
446
+ # Get the original image for inpainting
447
+ if not image_editor_output or "background" not in image_editor_output:
448
+ raise ValueError("Please upload and edit an image first")
449
+ original_image = image_editor_output["background"]
450
+
451
+ # Convert original image to RGB if needed
452
+ if original_image.mode == "RGBA":
453
+ # Create a white background and paste the RGBA image onto it
454
+ rgb_image = PIL.Image.new("RGB", original_image.size, (255, 255, 255))
455
+ rgb_image.paste(
456
+ original_image, mask=original_image.split()[3]
457
+ ) # Use alpha channel as mask
458
+ original_image = rgb_image
459
+ elif original_image.mode != "RGB":
460
+ original_image = original_image.convert("RGB")
461
+
462
+ # Assert that original image is RGB format
463
+ assert (
464
+ original_image.mode == "RGB"
465
+ ), f"Expected RGB image, got {original_image.mode}"
466
+
467
+ # Use uploaded mask if provided, otherwise generate from editor
468
+ if uploaded_mask is not None:
469
+ mask_image = uploaded_mask
470
+
471
+ # Convert RGBA to RGB if needed
472
+ if mask_image.mode == "RGBA":
473
+ # Create a white background and paste the RGBA image onto it
474
+ rgb_mask = PIL.Image.new("RGB", mask_image.size, (255, 255, 255))
475
+ rgb_mask.paste(
476
+ mask_image, mask=(mask_image.split()[3])
477
+ ) # Use alpha channel as mask
478
+ mask_image = rgb_mask
479
+ elif mask_image.mode != "RGB":
480
+ mask_image = mask_image.convert("RGB")
481
+
482
+ # Ensure mask is the same size as original image
483
+ if mask_image.size != original_image.size:
484
+ mask_image = mask_image.resize(original_image.size)
485
+
486
+ # Assert that mask image is RGB format
487
+ assert (
488
+ mask_image.mode == "RGB"
489
+ ), f"Expected RGB mask, got {mask_image.mode}"
490
+ else:
491
+ # Generate mask using the preview function
492
+ mask_image = preview_mask(image_editor_output)
493
+ # Assert that generated mask is L format (grayscale)
494
+ assert mask_image.mode == "L", f"Expected L mask, got {mask_image.mode}"
495
+
496
+ bio = io.BytesIO()
497
+ original_image.save(bio, format="png")
498
+
499
+ mask_bio = io.BytesIO()
500
+ mask_image.save(mask_bio, format="png")
501
+
502
+ response = None
503
+ exc = None
504
+ request_id = str(uuid.uuid4())
505
+
506
+ def run_in_thread():
507
+ nonlocal exc, response
508
+ try:
509
+ response = model.inpainting(
510
+ request_id=request_id,
511
+ prompt=prompt,
512
+ negative_prompt=negative_prompt,
513
+ n=n,
514
+ image=bio.getvalue(),
515
+ mask_image=mask_bio.getvalue(),
516
+ size=size,
517
+ response_format="b64_json",
518
+ num_inference_steps=num_inference_steps,
519
+ guidance_scale=guidance_scale,
520
+ padding_image_to_multiple=padding_image_to_multiple,
521
+ sampler_name=sampler_name,
522
+ **kwargs,
523
+ )
524
+ except Exception as e:
525
+ exc = e
526
+
527
+ t = threading.Thread(target=run_in_thread)
528
+ t.start()
529
+ while t.is_alive():
530
+ try:
531
+ cur_progress = client.get_progress(request_id)["progress"]
532
+ except (KeyError, RuntimeError):
533
+ cur_progress = 0.0
534
+
535
+ progress(cur_progress, desc="Inpainting images")
536
+ time.sleep(1)
537
+
538
+ if exc:
539
+ raise exc
540
+
541
+ images = []
542
+ for image_dict in response["data"]: # type: ignore
543
+ assert image_dict["b64_json"] is not None
544
+ image_data = base64.b64decode(image_dict["b64_json"])
545
+ image = PIL.Image.open(io.BytesIO(image_data))
546
+ images.append(image)
547
+
548
+ return images
549
+
550
+ with gr.Blocks() as inpainting_interface:
551
+ with gr.Column():
552
+ with gr.Row():
553
+ with gr.Column(scale=10):
554
+ prompt = gr.Textbox(
555
+ label="Prompt",
556
+ show_label=True,
557
+ placeholder="Enter prompt here...",
558
+ )
559
+ negative_prompt = gr.Textbox(
560
+ label="Negative Prompt",
561
+ show_label=True,
562
+ placeholder="Enter negative prompt here...",
563
+ )
564
+ with gr.Column(scale=1):
565
+ generate_button = gr.Button("Generate")
566
+
567
+ with gr.Row():
568
+ n = gr.Number(label="Number of image", value=1)
569
+ size_width = gr.Number(label="Width", value=-1)
570
+ size_height = gr.Number(label="Height", value=-1)
571
+
572
+ with gr.Row():
573
+ guidance_scale = gr.Number(label="Guidance scale", value=-1)
574
+ num_inference_steps = gr.Number(
575
+ label="Inference Step Number", value=-1
576
+ )
577
+ padding_image_to_multiple = gr.Number(
578
+ label="Padding image to multiple", value=-1
579
+ )
580
+ strength = gr.Slider(
581
+ label="Strength", value=0.6, step=0.1, minimum=0.0, maximum=1.0
582
+ )
583
+ sampler_name = gr.Dropdown(
584
+ choices=SAMPLING_METHODS,
585
+ value="default",
586
+ label="Sampling method",
587
+ )
588
+
589
+ with gr.Row():
590
+ with gr.Column(scale=2):
591
+ image_editor = gr.ImageEditor(
592
+ type="pil",
593
+ label="Edit Image and Create Mask (Draw white areas to inpaint)",
594
+ interactive=True,
595
+ height=400,
596
+ )
597
+
598
+ # Mask controls below the editor
599
+ with gr.Row():
600
+ preview_button = gr.Button("Preview Mask", size="sm")
601
+ upload_mask = gr.Image(
602
+ type="pil",
603
+ label="Or upload mask image directly",
604
+ interactive=True,
605
+ )
606
+ with gr.Row():
607
+ mask_output = gr.Image(
608
+ label="Current Mask Preview",
609
+ interactive=False,
610
+ height=200,
611
+ )
612
+
613
+ with gr.Column(scale=1):
614
+ gr.Markdown("### Inpainting Results")
615
+ output_gallery = gr.Gallery()
616
+
617
+ preview_button.click(
618
+ preview_mask,
619
+ inputs=[image_editor],
620
+ outputs=[mask_output],
621
+ )
622
+
623
+ # When user uploads a mask, display it
624
+ def process_uploaded_mask(
625
+ mask: Optional[PIL.Image.Image],
626
+ ) -> PIL.Image.Image:
627
+ if mask is None:
628
+ return PIL.Image.new("L", (512, 512), 0)
629
+
630
+ # Convert RGBA to grayscale for preview
631
+ if mask.mode == "RGBA":
632
+ # Use alpha channel for mask preview
633
+ alpha = mask.split()[3]
634
+ mask = alpha.convert("L")
635
+ elif mask.mode != "L":
636
+ # Convert to grayscale
637
+ mask = mask.convert("L")
638
+
639
+ return mask
640
+
641
+ upload_mask.change(
642
+ process_uploaded_mask, inputs=[upload_mask], outputs=[mask_output]
643
+ )
644
+
645
+ generate_button.click(
646
+ process_inpainting,
647
+ inputs=[
648
+ prompt,
649
+ negative_prompt,
650
+ image_editor,
651
+ upload_mask,
652
+ n,
653
+ size_width,
654
+ size_height,
655
+ guidance_scale,
656
+ num_inference_steps,
657
+ padding_image_to_multiple,
658
+ strength,
659
+ sampler_name,
660
+ ],
661
+ outputs=[output_gallery],
662
+ )
663
+ return inpainting_interface
664
+
357
665
  def text2video_interface(self) -> "gr.Blocks":
358
666
  def text_generate_video(
359
667
  prompt: str,
@@ -797,8 +1105,24 @@ class MediaInterface:
797
1105
 
798
1106
  return audio_path
799
1107
 
1108
+ # Determine model abilities
1109
+ supports_basic_tts = "text2audio" in self.model_ability
1110
+ supports_zero_shot = "text2audio_zero_shot" in self.model_ability
1111
+ supports_voice_cloning = "text2audio_voice_cloning" in self.model_ability
1112
+
1113
+ # Show ability info
1114
+ ability_info = []
1115
+ if supports_basic_tts:
1116
+ ability_info.append("✅ Basic TTS (text-to-speech)")
1117
+ if supports_zero_shot:
1118
+ ability_info.append("✅ Zero-shot TTS (voice selection)")
1119
+ if supports_voice_cloning:
1120
+ ability_info.append("✅ Voice Cloning (requires reference audio)")
1121
+
800
1122
  # Gradio UI
801
1123
  with gr.Blocks() as tts_ui:
1124
+ gr.Markdown(f"**Model Abilities:**\n{chr(10).join(ability_info)}")
1125
+
802
1126
  with gr.Row():
803
1127
  with gr.Column():
804
1128
  input_text = gr.Textbox(
@@ -811,13 +1135,32 @@ class MediaInterface:
811
1135
  label="Speed", minimum=0.5, maximum=2.0, value=1.0, step=0.1
812
1136
  )
813
1137
 
814
- prompt_speech = gr.Audio(
815
- label="Prompt Speech (for cloning)", type="filepath"
816
- )
817
- prompt_text = gr.Textbox(
818
- label="Prompt Text (for cloning)",
819
- placeholder="Text of the prompt speech",
820
- )
1138
+ # Show voice cloning controls if supported
1139
+ if supports_voice_cloning:
1140
+ gr.Markdown("---\n**Voice Cloning Options**")
1141
+ # Make voice cloning required if model doesn't support zero-shot
1142
+ if supports_zero_shot:
1143
+ prompt_speech = gr.Audio(
1144
+ label="Prompt Speech (for cloning, optional)",
1145
+ type="filepath",
1146
+ )
1147
+ prompt_text = gr.Textbox(
1148
+ label="Prompt Text (for cloning, optional)",
1149
+ placeholder="Text of the prompt speech",
1150
+ )
1151
+ else:
1152
+ prompt_speech = gr.Audio(
1153
+ label="Prompt Speech (for cloning, required)",
1154
+ type="filepath",
1155
+ )
1156
+ prompt_text = gr.Textbox(
1157
+ label="Prompt Text (for cloning, optional)",
1158
+ placeholder="Text of the prompt speech (optional)",
1159
+ )
1160
+ else:
1161
+ # Hidden components for API compatibility
1162
+ prompt_speech = gr.Audio(visible=False)
1163
+ prompt_text = gr.Textbox(visible=False)
821
1164
 
822
1165
  generate = gr.Button("Generate")
823
1166
 
@@ -871,6 +1214,9 @@ class MediaInterface:
871
1214
  if "image2image" in self.model_ability:
872
1215
  with gr.Tab("Image to Image"):
873
1216
  self.image2image_interface()
1217
+ if "inpainting" in self.model_ability:
1218
+ with gr.Tab("Inpainting"):
1219
+ self.inpainting_interface()
874
1220
  if "text2video" in self.model_ability:
875
1221
  with gr.Tab("Text to Video"):
876
1222
  self.text2video_interface()
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.013f296b.css",
4
- "main.js": "./static/js/main.4918643a.js",
4
+ "main.js": "./static/js/main.1086c759.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.013f296b.css.map": "./static/css/main.013f296b.css.map",
8
- "main.4918643a.js.map": "./static/js/main.4918643a.js.map"
8
+ "main.1086c759.js.map": "./static/js/main.1086c759.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.013f296b.css",
12
- "static/js/main.4918643a.js"
12
+ "static/js/main.1086c759.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.4918643a.js"></script><link href="./static/css/main.013f296b.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.1086c759.js"></script><link href="./static/css/main.013f296b.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>