PyPI - xinference - Versions diffs - 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.5.0.post2py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show

xinference/core/image_interface.py DELETED Viewed

@@ -1,377 +0,0 @@
-# Copyright 2022-2023 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import base64
-import io
-import logging
-import os
-import threading
-import time
-import uuid
-from typing import Dict, List, Optional, Union
-import gradio as gr
-import PIL.Image
-from gradio import Markdown
-from ..client.restful.restful_client import RESTfulImageModelHandle
-logger = logging.getLogger(__name__)
-class ImageInterface:
-    def __init__(
-        self,
-        endpoint: str,
-        model_uid: str,
-        model_family: str,
-        model_name: str,
-        model_id: str,
-        model_revision: str,
-        model_ability: List[str],
-        controlnet: Union[None, List[Dict[str, Union[str, None]]]],
-        access_token: Optional[str],
-    ):
-        self.endpoint = endpoint
-        self.model_uid = model_uid
-        self.model_family = model_family
-        self.model_name = model_name
-        self.model_id = model_id
-        self.model_revision = model_revision
-        self.model_ability = model_ability
-        self.controlnet = controlnet
-        self.access_token = (
-            access_token.replace("Bearer ", "") if access_token is not None else None
-        )
-    def build(self) -> gr.Blocks:
-        assert "stable_diffusion" in self.model_family
-        interface = self.build_main_interface()
-        interface.queue()
-        # Gradio initiates the queue during a startup event, but since the app has already been
-        # started, that event will not run, so manually invoke the startup events.
-        # See: https://github.com/gradio-app/gradio/issues/5228
-        try:
-            interface.run_startup_events()
-        except AttributeError:
-            # compatibility
-            interface.startup_events()
-        favicon_path = os.path.join(
-            os.path.dirname(os.path.abspath(__file__)),
-            os.path.pardir,
-            "web",
-            "ui",
-            "public",
-            "favicon.svg",
-        )
-        interface.favicon_path = favicon_path
-        return interface
-    def text2image_interface(self) -> "gr.Blocks":
-        from ..model.image.stable_diffusion.core import SAMPLING_METHODS
-        def text_generate_image(
-            prompt: str,
-            n: int,
-            size_width: int,
-            size_height: int,
-            guidance_scale: int,
-            num_inference_steps: int,
-            negative_prompt: Optional[str] = None,
-            sampler_name: Optional[str] = None,
-            progress=gr.Progress(),
-        ) -> PIL.Image.Image:
-            from ..client import RESTfulClient
-            client = RESTfulClient(self.endpoint)
-            client._set_token(self.access_token)
-            model = client.get_model(self.model_uid)
-            assert isinstance(model, RESTfulImageModelHandle)
-            size = f"{int(size_width)}*{int(size_height)}"
-            guidance_scale = None if guidance_scale == -1 else guidance_scale  # type: ignore
-            num_inference_steps = (
-                None if num_inference_steps == -1 else num_inference_steps  # type: ignore
-            )
-            sampler_name = None if sampler_name == "default" else sampler_name
-            response = None
-            exc = None
-            request_id = str(uuid.uuid4())
-            def run_in_thread():
-                nonlocal exc, response
-                try:
-                    response = model.text_to_image(
-                        request_id=request_id,
-                        prompt=prompt,
-                        n=n,
-                        size=size,
-                        num_inference_steps=num_inference_steps,
-                        guidance_scale=guidance_scale,
-                        negative_prompt=negative_prompt,
-                        sampler_name=sampler_name,
-                        response_format="b64_json",
-                    )
-                except Exception as e:
-                    exc = e
-            t = threading.Thread(target=run_in_thread)
-            t.start()
-            while t.is_alive():
-                try:
-                    cur_progress = client.get_progress(request_id)["progress"]
-                except (KeyError, RuntimeError):
-                    cur_progress = 0.0
-                progress(cur_progress, desc="Generating images")
-                time.sleep(1)
-            if exc:
-                raise exc
-            images = []
-            for image_dict in response["data"]:  # type: ignore
-                assert image_dict["b64_json"] is not None
-                image_data = base64.b64decode(image_dict["b64_json"])
-                image = PIL.Image.open(io.BytesIO(image_data))
-                images.append(image)
-            return images
-        with gr.Blocks() as text2image_vl_interface:
-            with gr.Column():
-                with gr.Row():
-                    with gr.Column(scale=10):
-                        prompt = gr.Textbox(
-                            label="Prompt",
-                            show_label=True,
-                            placeholder="Enter prompt here...",
-                        )
-                        negative_prompt = gr.Textbox(
-                            label="Negative prompt",
-                            show_label=True,
-                            placeholder="Enter negative prompt here...",
-                        )
-                    with gr.Column(scale=1):
-                        generate_button = gr.Button("Generate")
-                with gr.Row():
-                    n = gr.Number(label="Number of Images", value=1)
-                    size_width = gr.Number(label="Width", value=1024)
-                    size_height = gr.Number(label="Height", value=1024)
-                with gr.Row():
-                    guidance_scale = gr.Number(label="Guidance scale", value=-1)
-                    num_inference_steps = gr.Number(
-                        label="Inference Step Number", value=-1
-                    )
-                    sampler_name = gr.Dropdown(
-                        choices=SAMPLING_METHODS,
-                        value="default",
-                        label="Sampling method",
-                    )
-                with gr.Column():
-                    image_output = gr.Gallery()
-            generate_button.click(
-                text_generate_image,
-                inputs=[
-                    prompt,
-                    n,
-                    size_width,
-                    size_height,
-                    guidance_scale,
-                    num_inference_steps,
-                    negative_prompt,
-                    sampler_name,
-                ],
-                outputs=image_output,
-            )
-        return text2image_vl_interface
-    def image2image_interface(self) -> "gr.Blocks":
-        from ..model.image.stable_diffusion.core import SAMPLING_METHODS
-        def image_generate_image(
-            prompt: str,
-            negative_prompt: str,
-            image: PIL.Image.Image,
-            n: int,
-            size_width: int,
-            size_height: int,
-            num_inference_steps: int,
-            padding_image_to_multiple: int,
-            sampler_name: Optional[str] = None,
-            progress=gr.Progress(),
-        ) -> PIL.Image.Image:
-            from ..client import RESTfulClient
-            client = RESTfulClient(self.endpoint)
-            client._set_token(self.access_token)
-            model = client.get_model(self.model_uid)
-            assert isinstance(model, RESTfulImageModelHandle)
-            if size_width > 0 and size_height > 0:
-                size = f"{int(size_width)}*{int(size_height)}"
-            else:
-                size = None
-            num_inference_steps = (
-                None if num_inference_steps == -1 else num_inference_steps  # type: ignore
-            )
-            padding_image_to_multiple = None if padding_image_to_multiple == -1 else padding_image_to_multiple  # type: ignore
-            sampler_name = None if sampler_name == "default" else sampler_name
-            bio = io.BytesIO()
-            image.save(bio, format="png")
-            response = None
-            exc = None
-            request_id = str(uuid.uuid4())
-            def run_in_thread():
-                nonlocal exc, response
-                try:
-                    response = model.image_to_image(
-                        request_id=request_id,
-                        prompt=prompt,
-                        negative_prompt=negative_prompt,
-                        n=n,
-                        image=bio.getvalue(),
-                        size=size,
-                        response_format="b64_json",
-                        num_inference_steps=num_inference_steps,
-                        padding_image_to_multiple=padding_image_to_multiple,
-                        sampler_name=sampler_name,
-                    )
-                except Exception as e:
-                    exc = e
-            t = threading.Thread(target=run_in_thread)
-            t.start()
-            while t.is_alive():
-                try:
-                    cur_progress = client.get_progress(request_id)["progress"]
-                except (KeyError, RuntimeError):
-                    cur_progress = 0.0
-                progress(cur_progress, desc="Generating images")
-                time.sleep(1)
-            if exc:
-                raise exc
-            images = []
-            for image_dict in response["data"]:  # type: ignore
-                assert image_dict["b64_json"] is not None
-                image_data = base64.b64decode(image_dict["b64_json"])
-                image = PIL.Image.open(io.BytesIO(image_data))
-                images.append(image)
-            return images
-        with gr.Blocks() as image2image_inteface:
-            with gr.Column():
-                with gr.Row():
-                    with gr.Column(scale=10):
-                        prompt = gr.Textbox(
-                            label="Prompt",
-                            show_label=True,
-                            placeholder="Enter prompt here...",
-                        )
-                        negative_prompt = gr.Textbox(
-                            label="Negative Prompt",
-                            show_label=True,
-                            placeholder="Enter negative prompt here...",
-                        )
-                    with gr.Column(scale=1):
-                        generate_button = gr.Button("Generate")
-                with gr.Row():
-                    n = gr.Number(label="Number of image", value=1)
-                    size_width = gr.Number(label="Width", value=-1)
-                    size_height = gr.Number(label="Height", value=-1)
-                with gr.Row():
-                    num_inference_steps = gr.Number(
-                        label="Inference Step Number", value=-1
-                    )
-                    padding_image_to_multiple = gr.Number(
-                        label="Padding image to multiple", value=-1
-                    )
-                    sampler_name = gr.Dropdown(
-                        choices=SAMPLING_METHODS,
-                        value="default",
-                        label="Sampling method",
-                    )
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        uploaded_image = gr.Image(type="pil", label="Upload Image")
-                    with gr.Column(scale=1):
-                        output_gallery = gr.Gallery()
-            generate_button.click(
-                image_generate_image,
-                inputs=[
-                    prompt,
-                    negative_prompt,
-                    uploaded_image,
-                    n,
-                    size_width,
-                    size_height,
-                    num_inference_steps,
-                    padding_image_to_multiple,
-                    sampler_name,
-                ],
-                outputs=output_gallery,
-            )
-        return image2image_inteface
-    def build_main_interface(self) -> "gr.Blocks":
-        with gr.Blocks(
-            title=f"🎨 Xinference Stable Diffusion: {self.model_name} 🎨",
-            css="""
-                    .center{
-                        display: flex;
-                        justify-content: center;
-                        align-items: center;
-                        padding: 0px;
-                        color: #9ea4b0 !important;
-                    }
-                    """,
-            analytics_enabled=False,
-        ) as app:
-            Markdown(
-                f"""
-                    <h1 class="center" style='text-align: center; margin-bottom: 1rem'>🎨 Xinference Stable Diffusion: {self.model_name} 🎨</h1>
-                    """
-            )
-            Markdown(
-                f"""
-                    <div class="center">
-                    Model ID: {self.model_uid}
-                    </div>
-                    """
-            )
-            if "text2image" in self.model_ability:
-                with gr.Tab("Text to Image"):
-                    self.text2image_interface()
-            if "image2image" in self.model_ability:
-                with gr.Tab("Image to Image"):
-                    self.image2image_interface()
-        return app

xinference/model/llm/transformers/compression.py DELETED Viewed

@@ -1,258 +0,0 @@
-# Copyright 2022-2023 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import dataclasses
-import gc
-import glob
-import os
-import torch
-import torch.nn as nn
-from huggingface_hub import snapshot_download
-from torch import Tensor
-from torch.nn import functional as F
-from tqdm import tqdm
-from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
-from ....device_utils import empty_cache
-@dataclasses.dataclass
-class CompressionConfig:
-    """Group-wise quantization."""
-    num_bits: int
-    group_size: int
-    group_dim: int
-    symmetric: bool
-    enabled: bool = True
-default_compression_config = CompressionConfig(
-    num_bits=8, group_size=256, group_dim=1, symmetric=True, enabled=True
-)
-class CLinear(nn.Module):
-    """Compressed Linear Layer."""
-    def __init__(self, weight=None, bias=None, device=None):
-        super().__init__()
-        if weight is None:
-            self.weight = None
-        elif isinstance(weight, Tensor):
-            self.weight = compress(weight.data.to(device), default_compression_config)
-        else:
-            self.weight = weight
-        self.bias = bias
-    def forward(self, input: Tensor) -> Tensor:
-        weight = decompress(self.weight, default_compression_config)
-        if self.bias is None:
-            return F.linear(input.to(weight.dtype), weight)
-        return F.linear(input.to(weight.dtype), weight, self.bias.to(weight.dtype))
-def get_compressed_list(module, prefix=""):
-    compressed_list = []
-    for attr_str in dir(module):
-        target_attr = getattr(module, attr_str)
-        if type(target_attr) == torch.nn.Linear:
-            full_name = (
-                f"{prefix}.{attr_str}.weight" if prefix else f"{attr_str}.weight"
-            )
-            compressed_list.append(full_name)
-    for name, child in module.named_children():
-        child_prefix = f"{prefix}.{name}" if prefix else name
-        for each in get_compressed_list(child, child_prefix):
-            compressed_list.append(each)
-    return compressed_list
-def apply_compressed_weight(module, compressed_state_dict, target_device, prefix=""):
-    for attr_str in dir(module):
-        target_attr = getattr(module, attr_str)
-        if type(target_attr) == torch.nn.Linear:
-            full_name = (
-                f"{prefix}.{attr_str}.weight" if prefix else f"{attr_str}.weight"
-            )
-            setattr(
-                module,
-                attr_str,
-                CLinear(
-                    compressed_state_dict[full_name], target_attr.bias, target_device
-                ),
-            )
-    for name, child in module.named_children():
-        child_prefix = f"{prefix}.{name}" if prefix else name
-        apply_compressed_weight(
-            child, compressed_state_dict, target_device, child_prefix
-        )
-def load_compress_model(
-    model_path: str,
-    device: str,
-    torch_dtype: torch.dtype,
-    use_fast: bool,
-    revision: str = "main",
-):
-    from accelerate import init_empty_weights
-    from accelerate.utils import set_module_tensor_to_device
-    # partially load model
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_path,
-        use_fast=use_fast,
-        trust_remote_code=True,
-        revision=revision,
-    )
-    with init_empty_weights():
-        config = AutoConfig.from_pretrained(
-            model_path,
-            low_cpu_mem_usage=True,
-            torch_dtype=torch_dtype,
-            trust_remote_code=True,
-            revision=revision,
-        )
-        model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
-        linear_weights = get_compressed_list(model)
-    if os.path.exists(model_path):
-        # `model_path` is a local folder
-        base_pattern = os.path.join(model_path, "pytorch_model*.bin")
-    else:
-        # `model_path` is a cached Hugging Face repo
-        model_path = snapshot_download(model_path, revision=revision)
-        base_pattern = os.path.join(model_path, "pytorch_model*.bin")
-    files = glob.glob(base_pattern)
-    compressed_state_dict = {}
-    for filename in tqdm(files):
-        tmp_state_dict = torch.load(filename, map_location=torch.device(device))
-        for name in tmp_state_dict:
-            if name in linear_weights:
-                tensor = tmp_state_dict[name].to(device).data.to(torch_dtype)
-                compressed_state_dict[name] = compress(
-                    tensor, default_compression_config
-                )
-            else:
-                compressed_state_dict[name] = tmp_state_dict[name].to(device)
-            tmp_state_dict[name] = None
-            tensor = None
-            gc.collect()
-            empty_cache()
-    for name in model.state_dict():
-        if name not in linear_weights:
-            set_module_tensor_to_device(
-                model, name, device, value=compressed_state_dict[name]
-            )
-    apply_compressed_weight(model, compressed_state_dict, device)
-    model.to(device)
-    return model, tokenizer
-def compress(tensor, config):
-    """Simulate group-wise quantization."""
-    if not config.enabled:
-        return tensor
-    group_size, num_bits, group_dim, symmetric = (
-        config.group_size,
-        config.num_bits,
-        config.group_dim,
-        config.symmetric,
-    )
-    assert num_bits <= 8
-    original_shape = tensor.shape
-    num_groups = (original_shape[group_dim] + group_size - 1) // group_size
-    new_shape = (
-        original_shape[:group_dim]
-        + (num_groups, group_size)
-        + original_shape[group_dim + 1 :]
-    )
-    # Pad
-    pad_len = (group_size - original_shape[group_dim] % group_size) % group_size
-    if pad_len != 0:
-        pad_shape = (
-            original_shape[:group_dim] + (pad_len,) + original_shape[group_dim + 1 :]
-        )
-        tensor = torch.cat(
-            [tensor, torch.zeros(pad_shape, dtype=tensor.dtype, device=tensor.device)],
-            dim=group_dim,
-        )
-    data = tensor.view(new_shape)
-    # Quantize
-    if symmetric:
-        B = 2 ** (num_bits - 1) - 1
-        scale = B / torch.max(data.abs(), dim=group_dim + 1, keepdim=True)[0]
-        data = data * scale
-        data = data.clamp_(-B, B).round_().to(torch.int8)
-        return data, scale, original_shape
-    else:
-        B = 2**num_bits - 1
-        mn = torch.min(data, dim=group_dim + 1, keepdim=True)[0]
-        mx = torch.max(data, dim=group_dim + 1, keepdim=True)[0]
-        scale = B / (mx - mn)
-        data = data - mn
-        data.mul_(scale)
-        data = data.clamp_(0, B).round_().to(torch.uint8)
-        return data, mn, scale, original_shape
-def decompress(packed_data, config):
-    """Simulate group-wise dequantization."""
-    if not config.enabled:
-        return packed_data
-    group_size, _, group_dim, symmetric = (
-        config.group_size,
-        config.num_bits,
-        config.group_dim,
-        config.symmetric,
-    )
-    # Dequantize
-    if symmetric:
-        data, scale, original_shape = packed_data
-        data = data / scale
-    else:
-        data, mn, scale, original_shape = packed_data
-        data = data / scale
-        data.add_(mn)
-    # Unpad
-    pad_len = (group_size - original_shape[group_dim] % group_size) % group_size
-    if pad_len:
-        padded_original_shape = (
-            original_shape[:group_dim]
-            + (original_shape[group_dim] + pad_len,)
-            + original_shape[group_dim + 1 :]
-        )
-        data = data.reshape(padded_original_shape)
-        indices = [slice(0, x) for x in original_shape]
-        return data[indices].contiguous()
-    else:
-        return data.view(original_shape)