PyPI - xinference - Versions diffs - 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl - Mend

xinference 0.13.1py3-none-any.whl → 0.13.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (19) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +73 -1
xinference/client/restful/restful_client.py +82 -0
xinference/core/model.py +78 -24
xinference/model/audio/chattts.py +40 -8
xinference/model/image/core.py +3 -0
xinference/model/image/model_spec.json +14 -0
xinference/model/image/stable_diffusion/core.py +43 -6
xinference/model/llm/llm_family.json +240 -1
xinference/model/llm/llm_family.py +26 -6
xinference/model/llm/llm_family_modelscope.json +165 -0
xinference/model/llm/sglang/core.py +7 -2
xinference/model/llm/vllm/core.py +3 -0
{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/METADATA +3 -1
{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/RECORD +19 -19
{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/LICENSE +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/WHEEL +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/entry_points.txt +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-07-12T17:56:13+0800",
+ "date": "2024-07-19T19:15:54+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "5e3f254d48383f37d849dd16db564ad9449e5163",
- "version": "0.13.1"
+ "full-revisionid": "880929cbbc73e5206ca069591b03d9d16dd858bf",
+ "version": "0.13.2"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -129,6 +129,7 @@ class SpeechRequest(BaseModel):
     voice: Optional[str]
     response_format: Optional[str] = "mp3"
     speed: Optional[float] = 1.0
+    stream: Optional[bool] = False
 class RegisterModelRequest(BaseModel):
@@ -491,6 +492,17 @@ class RESTfulAPI:
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/images/inpainting",
+            self.create_inpainting,
+            methods=["POST"],
+            response_model=ImageList,
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/v1/chat/completions",
             self.create_chat_completion,
@@ -1317,8 +1329,14 @@ class RESTfulAPI:
                 voice=body.voice,
                 response_format=body.response_format,
                 speed=body.speed,
+                stream=body.stream,
             )
-            return Response(media_type="application/octet-stream", content=out)
+            if body.stream:
+                return EventSourceResponse(
+                    media_type="application/octet-stream", content=out
+                )
+            else:
+                return Response(media_type="application/octet-stream", content=out)
         except RuntimeError as re:
             logger.error(re, exc_info=True)
             await self._report_error_event(model_uid, str(re))
@@ -1410,6 +1428,60 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+    async def create_inpainting(
+        self,
+        model: str = Form(...),
+        image: UploadFile = File(media_type="application/octet-stream"),
+        mask_image: UploadFile = File(media_type="application/octet-stream"),
+        prompt: Optional[Union[str, List[str]]] = Form(None),
+        negative_prompt: Optional[Union[str, List[str]]] = Form(None),
+        n: Optional[int] = Form(1),
+        response_format: Optional[str] = Form("url"),
+        size: Optional[str] = Form(None),
+        kwargs: Optional[str] = Form(None),
+    ) -> Response:
+        model_uid = model
+        try:
+            model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        try:
+            if kwargs is not None:
+                parsed_kwargs = json.loads(kwargs)
+            else:
+                parsed_kwargs = {}
+            im = Image.open(image.file)
+            mask_im = Image.open(mask_image.file)
+            if not size:
+                w, h = im.size
+                size = f"{w}*{h}"
+            image_list = await model_ref.inpainting(
+                image=im,
+                mask_image=mask_im,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                n=n,
+                size=size,
+                response_format=response_format,
+                **parsed_kwargs,
+            )
+            return Response(content=image_list, media_type="application/json")
+        except RuntimeError as re:
+            logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
     async def create_flexible_infer(self, request: Request) -> Response:
         payload = await request.json()

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -294,6 +294,81 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         response_data = response.json()
         return response_data
+    def inpainting(
+        self,
+        image: Union[str, bytes],
+        mask_image: Union[str, bytes],
+        prompt: str,
+        negative_prompt: Optional[str] = None,
+        n: int = 1,
+        size: Optional[str] = None,
+        response_format: str = "url",
+        **kwargs,
+    ) -> "ImageList":
+        """
+        Inpaint an image by the input text.
+        Parameters
+        ----------
+        image: `Union[str, bytes]`
+            an image batch to be inpainted (which parts of the image to
+            be masked out with `mask_image` and repainted according to `prompt`). For both numpy array and pytorch
+            tensor, the expected value range is between `[0, 1]` If it's a tensor or a list or tensors, the
+            expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a list of arrays, the
+            expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image latents as `image`, but
+            if passing latents directly it is not encoded again.
+        mask_image: `Union[str, bytes]`
+            representing an image batch to mask `image`. White pixels in the mask
+            are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
+            single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
+            color channel (L) instead of 3, so the expected shape for pytorch tensor would be `(B, 1, H, W)`, `(B,
+            H, W)`, `(1, H, W)`, `(H, W)`. And for numpy array would be for `(B, H, W, 1)`, `(B, H, W)`, `(H, W,
+            1)`, or `(H, W)`.
+        prompt: `str` or `List[str]`
+            The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
+        negative_prompt (`str` or `List[str]`, *optional*):
+            The prompt or prompts not to guide the image generation. If not defined, one has to pass
+            `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+            less than `1`).
+        n: `int`, defaults to 1
+            The number of images to generate per prompt. Must be between 1 and 10.
+        size: `str`, defaults to None
+            The width*height in pixels of the generated image.
+        response_format: `str`, defaults to `url`
+            The format in which the generated images are returned. Must be one of url or b64_json.
+        Returns
+        -------
+        ImageList
+            A list of image objects.
+            :param prompt:
+            :param image:
+        """
+        url = f"{self._base_url}/v1/images/inpainting"
+        params = {
+            "model": self._model_uid,
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "n": n,
+            "size": size,
+            "response_format": response_format,
+            "kwargs": json.dumps(kwargs),
+        }
+        files: List[Any] = []
+        for key, value in params.items():
+            files.append((key, (None, value)))
+        files.append(("image", ("image", image, "application/octet-stream")))
+        files.append(
+            ("mask_image", ("mask_image", mask_image, "application/octet-stream"))
+        )
+        response = requests.post(url, files=files, headers=self.auth_headers)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to inpaint the images, detail: {_get_error_string(response)}"
+            )
+        response_data = response.json()
+        return response_data
 class RESTfulGenerateModelHandle(RESTfulModelHandle):
     def generate(
@@ -692,6 +767,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
         voice: str = "",
         response_format: str = "mp3",
         speed: float = 1.0,
+        stream: bool = False,
     ):
         """
         Generates audio from the input text.
@@ -707,6 +783,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
             The format to audio in.
         speed: str
             The speed of the generated audio.
+        stream: bool
+            Use stream or not.
         Returns
         -------
@@ -720,6 +798,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
             "voice": voice,
             "response_format": response_format,
             "speed": speed,
+            "stream": stream,
         }
         response = requests.post(url, json=params, headers=self.auth_headers)
         if response.status_code != 200:
@@ -727,6 +806,9 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
                 f"Failed to speech the text, detail: {_get_error_string(response)}"
             )
+        if stream:
+            return response.iter_content(chunk_size=1024)
         return response.content

xinference/core/model.py CHANGED Viewed

@@ -310,7 +310,7 @@ class ModelActor(xo.StatelessActor):
             )
         )
-    def _to_json_generator(self, gen: types.GeneratorType):
+    def _to_generator(self, output_type: str, gen: types.GeneratorType):
         start_time = time.time()
         time_to_first_token = None
         final_usage = None
@@ -318,8 +318,13 @@ class ModelActor(xo.StatelessActor):
             for v in gen:
                 if time_to_first_token is None:
                     time_to_first_token = (time.time() - start_time) * 1000
-                final_usage = v.get("usage", None)
-                v = dict(data=json.dumps(v, ensure_ascii=False))
+                if output_type == "json":
+                    final_usage = v.get("usage", None)
+                    v = dict(data=json.dumps(v, ensure_ascii=False))
+                else:
+                    assert (
+                        output_type == "binary"
+                    ), f"Unknown output type '{output_type}'"
                 yield sse_starlette.sse.ensure_bytes(v, None)
         except OutOfMemoryError:
             logger.exception(
@@ -342,7 +347,7 @@ class ModelActor(xo.StatelessActor):
                 )
                 asyncio.run_coroutine_threadsafe(coro, loop=self._loop)
-    async def _to_json_async_gen(self, gen: types.AsyncGeneratorType):
+    async def _to_async_gen(self, output_type: str, gen: types.AsyncGeneratorType):
         start_time = time.time()
         time_to_first_token = None
         final_usage = None
@@ -351,8 +356,13 @@ class ModelActor(xo.StatelessActor):
                 if time_to_first_token is None:
                     time_to_first_token = (time.time() - start_time) * 1000
                 final_usage = v.get("usage", None)
-                v = await asyncio.to_thread(json.dumps, v)
-                v = dict(data=v)  # noqa: F821
+                if output_type == "json":
+                    v = await asyncio.to_thread(json.dumps, v, ensure_ascii=False)
+                    v = dict(data=v)  # noqa: F821
+                else:
+                    assert (
+                        output_type == "binary"
+                    ), f"Unknown output type '{output_type}'"
                 yield await asyncio.to_thread(sse_starlette.sse.ensure_bytes, v, None)
         except OutOfMemoryError:
             logger.exception(
@@ -379,8 +389,14 @@ class ModelActor(xo.StatelessActor):
                 )
             await asyncio.gather(*coros)
+    async def _call_wrapper_json(self, fn: Callable, *args, **kwargs):
+        return await self._call_wrapper("json", fn, *args, **kwargs)
+    async def _call_wrapper_binary(self, fn: Callable, *args, **kwargs):
+        return await self._call_wrapper("binary", fn, *args, **kwargs)
     @oom_check
-    async def _call_wrapper(self, fn: Callable, *args, **kwargs):
+    async def _call_wrapper(self, output_type: str, fn: Callable, *args, **kwargs):
         if self._lock is None:
             if inspect.iscoroutinefunction(fn):
                 ret = await fn(*args, **kwargs)
@@ -397,16 +413,18 @@ class ModelActor(xo.StatelessActor):
             raise Exception("Parallel generation is not supported by ggml.")
         if inspect.isgenerator(ret):
-            gen = self._to_json_generator(ret)
+            gen = self._to_generator(output_type, ret)
             self._current_generator = weakref.ref(gen)
             return gen
         if inspect.isasyncgen(ret):
-            gen = self._to_json_async_gen(ret)
+            gen = self._to_async_gen(output_type, ret)
             self._current_generator = weakref.ref(gen)
             return gen
-        if isinstance(ret, bytes):
+        if output_type == "json":
+            return await asyncio.to_thread(json_dumps, ret)
+        else:
+            assert output_type == "binary", f"Unknown output type '{output_type}'"
             return ret
-        return await asyncio.to_thread(json_dumps, ret)
     @log_async(logger=logger)
     @request_limit
@@ -419,11 +437,11 @@ class ModelActor(xo.StatelessActor):
         else:
             kwargs.pop("raw_params", None)
             if hasattr(self._model, "generate"):
-                return await self._call_wrapper(
+                return await self._call_wrapper_json(
                     self._model.generate, prompt, *args, **kwargs
                 )
             if hasattr(self._model, "async_generate"):
-                return await self._call_wrapper(
+                return await self._call_wrapper_json(
                     self._model.async_generate, prompt, *args, **kwargs
                 )
             raise AttributeError(f"Model {self._model.model_spec} is not for generate.")
@@ -471,7 +489,7 @@ class ModelActor(xo.StatelessActor):
             queue: Queue[Any] = Queue()
             ret = self._queue_consumer(queue)
             await self._scheduler_ref.add_request(prompt, queue, *args, **kwargs)
-            gen = self._to_json_async_gen(ret)
+            gen = self._to_async_gen("json", ret)
             self._current_generator = weakref.ref(gen)
             return gen
         else:
@@ -502,12 +520,12 @@ class ModelActor(xo.StatelessActor):
             else:
                 kwargs.pop("raw_params", None)
                 if hasattr(self._model, "chat"):
-                    response = await self._call_wrapper(
+                    response = await self._call_wrapper_json(
                         self._model.chat, prompt, *args, **kwargs
                     )
                     return response
                 if hasattr(self._model, "async_chat"):
-                    response = await self._call_wrapper(
+                    response = await self._call_wrapper_json(
                         self._model.async_chat, prompt, *args, **kwargs
                     )
                     return response
@@ -543,7 +561,7 @@ class ModelActor(xo.StatelessActor):
     @request_limit
     async def create_embedding(self, input: Union[str, List[str]], *args, **kwargs):
         if hasattr(self._model, "create_embedding"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_json(
                 self._model.create_embedding, input, *args, **kwargs
             )
@@ -565,7 +583,7 @@ class ModelActor(xo.StatelessActor):
         **kwargs,
     ):
         if hasattr(self._model, "rerank"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_json(
                 self._model.rerank,
                 documents,
                 query,
@@ -590,7 +608,7 @@ class ModelActor(xo.StatelessActor):
         timestamp_granularities: Optional[List[str]] = None,
     ):
         if hasattr(self._model, "transcriptions"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_json(
                 self._model.transcriptions,
                 audio,
                 language,
@@ -615,7 +633,7 @@ class ModelActor(xo.StatelessActor):
         timestamp_granularities: Optional[List[str]] = None,
     ):
         if hasattr(self._model, "translations"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_json(
                 self._model.translations,
                 audio,
                 language,
@@ -630,16 +648,23 @@ class ModelActor(xo.StatelessActor):
     @log_async(logger=logger)
     @request_limit
+    @xo.generator
     async def speech(
-        self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
     ):
         if hasattr(self._model, "speech"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_binary(
                 self._model.speech,
                 input,
                 voice,
                 response_format,
                 speed,
+                stream,
             )
         raise AttributeError(
             f"Model {self._model.model_spec} is not for creating speech."
@@ -657,7 +682,7 @@ class ModelActor(xo.StatelessActor):
         **kwargs,
     ):
         if hasattr(self._model, "text_to_image"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_json(
                 self._model.text_to_image,
                 prompt,
                 n,
@@ -682,7 +707,7 @@ class ModelActor(xo.StatelessActor):
         **kwargs,
     ):
         if hasattr(self._model, "image_to_image"):
-            return await self._call_wrapper(
+            return await self._call_wrapper_json(
                 self._model.image_to_image,
                 image,
                 prompt,
@@ -697,6 +722,35 @@ class ModelActor(xo.StatelessActor):
             f"Model {self._model.model_spec} is not for creating image."
         )
+    async def inpainting(
+        self,
+        image: "PIL.Image",
+        mask_image: "PIL.Image",
+        prompt: str,
+        negative_prompt: str,
+        n: int = 1,
+        size: str = "1024*1024",
+        response_format: str = "url",
+        *args,
+        **kwargs,
+    ):
+        if hasattr(self._model, "inpainting"):
+            return await self._call_wrapper(
+                self._model.inpainting,
+                image,
+                mask_image,
+                prompt,
+                negative_prompt,
+                n,
+                size,
+                response_format,
+                *args,
+                **kwargs,
+            )
+        raise AttributeError(
+            f"Model {self._model.model_spec} is not for creating image."
+        )
     @log_async(logger=logger)
     @request_limit
     async def infer(

xinference/model/audio/chattts.py CHANGED Viewed

@@ -48,7 +48,12 @@ class ChatTTSModel:
         self._model.load(source="custom", custom_path=self._model_path, compile=True)
     def speech(
-        self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
     ):
         import ChatTTS
         import numpy as np
@@ -74,11 +79,38 @@ class ChatTTSModel:
         )
         assert self._model is not None
-        wavs = self._model.infer([input], params_infer_code=params_infer_code)
-        # Save the generated audio
-        with BytesIO() as out:
-            torchaudio.save(
-                out, torch.from_numpy(wavs[0]), 24000, format=response_format
+        if stream:
+            iter = self._model.infer(
+                [input], params_infer_code=params_infer_code, stream=True
             )
-            return out.getvalue()
+            def _generator():
+                with BytesIO() as out:
+                    writer = torchaudio.io.StreamWriter(out, format=response_format)
+                    writer.add_audio_stream(sample_rate=24000, num_channels=1)
+                    i = 0
+                    last_pos = 0
+                    with writer.open():
+                        for it in iter:
+                            for itt in it:
+                                for chunk in itt:
+                                    chunk = np.array([chunk]).transpose()
+                                    writer.write_audio_chunk(i, torch.from_numpy(chunk))
+                                    new_last_pos = out.tell()
+                                    if new_last_pos != last_pos:
+                                        out.seek(last_pos)
+                                        encoded_bytes = out.read()
+                                        print(len(encoded_bytes))
+                                        yield encoded_bytes
+                                        last_pos = new_last_pos
+            return _generator()
+        else:
+            wavs = self._model.infer([input], params_infer_code=params_infer_code)
+            # Save the generated audio
+            with BytesIO() as out:
+                torchaudio.save(
+                    out, torch.from_numpy(wavs[0]), 24000, format=response_format
+                )
+                return out.getvalue()

xinference/model/image/core.py CHANGED Viewed

@@ -45,6 +45,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     model_id: str
     model_revision: str
     model_hub: str = "huggingface"
+    ability: Optional[str]
     controlnet: Optional[List["ImageModelFamilyV1"]]
@@ -71,6 +72,7 @@ class ImageModelDescription(ModelDescription):
             "model_name": self._model_spec.model_name,
             "model_family": self._model_spec.model_family,
             "model_revision": self._model_spec.model_revision,
+            "ability": self._model_spec.ability,
             "controlnet": controlnet,
         }
@@ -234,6 +236,7 @@ def create_image_model_instance(
         lora_model_paths=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
+        ability=model_spec.ability,
         **kwargs,
     )
     model_description = ImageModelDescription(

xinference/model/image/model_spec.json CHANGED Viewed

@@ -92,5 +92,19 @@
         "model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
       }
     ]
+  },
+  {
+    "model_name": "stable-diffusion-inpainting",
+    "model_family": "stable_diffusion",
+    "model_id": "runwayml/stable-diffusion-inpainting",
+    "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
+    "ability": "inpainting"
+  },
+  {
+    "model_name": "stable-diffusion-2-inpainting",
+    "model_family": "stable_diffusion",
+    "model_id": "stabilityai/stable-diffusion-2-inpainting",
+    "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
+    "ability": "inpainting"
   }
 ]

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -16,6 +16,7 @@ import base64
 import logging
 import os
 import re
+import sys
 import time
 import uuid
 from concurrent.futures import ThreadPoolExecutor
@@ -39,6 +40,7 @@ class DiffusionModel:
         lora_model: Optional[List[LoRA]] = None,
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
+        ability: Optional[str] = None,
         **kwargs,
     ):
         self._model_uid = model_uid
@@ -48,6 +50,7 @@ class DiffusionModel:
         self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
+        self._ability = ability
         self._kwargs = kwargs
     def _apply_lora(self):
@@ -64,8 +67,14 @@ class DiffusionModel:
             logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
     def load(self):
-        # import torch
-        from diffusers import AutoPipelineForText2Image
+        import torch
+        if self._ability in [None, "text2image", "image2image"]:
+            from diffusers import AutoPipelineForText2Image as AutoPipelineModel
+        elif self._ability == "inpainting":
+            from diffusers import AutoPipelineForInpainting as AutoPipelineModel
+        else:
+            raise ValueError(f"Unknown ability: {self._ability}")
         controlnet = self._kwargs.get("controlnet")
         if controlnet is not None:
@@ -74,12 +83,16 @@ class DiffusionModel:
             logger.debug("Loading controlnet %s", controlnet)
             self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
-        self._model = AutoPipelineForText2Image.from_pretrained(
+        torch_dtype = self._kwargs.get("torch_dtype")
+        if sys.platform != "darwin" and torch_dtype is None:
+            # The following params crashes on Mac M2
+            self._kwargs["torch_dtype"] = torch.float16
+            self._kwargs["use_safetensors"] = True
+        logger.debug("Loading model %s", AutoPipelineModel)
+        self._model = AutoPipelineModel.from_pretrained(
             self._model_path,
             **self._kwargs,
-            # The following params crashes on Mac M2
-            # torch_dtype=torch.float16,
-            # use_safetensors=True,
         )
         self._model = move_model_to_available_device(self._model)
         # Recommended if your computer has < 64 GB of RAM
@@ -174,3 +187,27 @@ class DiffusionModel:
             response_format=response_format,
             **kwargs,
         )
+    def inpainting(
+        self,
+        image: bytes,
+        mask_image: bytes,
+        prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        n: int = 1,
+        size: str = "1024*1024",
+        response_format: str = "url",
+        **kwargs,
+    ):
+        width, height = map(int, re.split(r"[^\d]+", size))
+        return self._call_model(
+            image=image,
+            mask_image=mask_image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            num_images_per_prompt=n,
+            response_format=response_format,
+            **kwargs,
+        )

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -983,6 +983,65 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "codegeex4",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "the open-source version of the latest CodeGeeX4 model series",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/codegeex4-all-9b",
+        "model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K_L",
+          "Q8_0"
+        ],
+        "model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
+        "model_id": "THUDM/codegeex4-all-9b-GGUF",
+        "model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        151329,
+        151336,
+        151338
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -5791,7 +5850,7 @@
   },
   {
     "version": 1,
-    "context_length": 204800,
+    "context_length": 32768,
     "model_name": "internlm2-chat",
     "model_lang": [
       "en",
@@ -5839,6 +5898,140 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm2.5-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "InternLM2.5 series of the InternLM model.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "internlm/internlm2_5-7b-chat",
+        "model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
+        "model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0",
+          "fp16"
+        ],
+        "model_id": "internlm/internlm2_5-7b-chat-gguf",
+        "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM2",
+      "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "stop_token_ids": [
+        2,
+        92542
+      ],
+      "stop": [
+        "</s>",
+        "<|im_end|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 262144,
+    "model_name": "internlm2.5-chat-1m",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "internlm/internlm2_5-7b-chat-1m",
+        "model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
+        "model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0",
+          "fp16"
+        ],
+        "model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
+        "model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM2",
+      "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "stop_token_ids": [
+        2,
+        92542
+      ],
+      "stop": [
+        "</s>",
+        "<|im_end|>"
+      ]
+    }
+  },
   {
     "version":1,
     "context_length":2048,
@@ -6192,6 +6385,52 @@
         ],
         "model_id": "google/gemma-2-27b-it"
       },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "bartowski/gemma-2-9b-it-GGUF",
+        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "bartowski/gemma-2-27b-it-GGUF",
+        "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
+      },
       {
         "model_format": "mlx",
         "model_size_in_billions": 9,

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -554,16 +554,36 @@ def _get_cache_dir(
                 quant_suffix = q
                 break
-    cache_dir_name = (
+    # some model name includes ".", e.g. qwen1.5-chat
+    # if the model does not require trust_remote_code, it's OK
+    # because no need to import modeling_xxx.py from the path
+    # but when the model need to trust_remote_code,
+    # e.g. internlm2.5-chat, the import will fail,
+    # but before the model may have been downloaded,
+    # thus we check it first, if exist, return it,
+    # otherwise, we replace the "." with "_" in model name
+    old_cache_dir_name = (
         f"{llm_family.model_name}-{llm_spec.model_format}"
         f"-{llm_spec.model_size_in_billions}b"
     )
     if quant_suffix:
-        cache_dir_name += f"-{quant_suffix}"
-    cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, cache_dir_name))
-    if create_if_not_exist and not os.path.exists(cache_dir):
-        os.makedirs(cache_dir, exist_ok=True)
-    return cache_dir
+        old_cache_dir_name += f"-{quant_suffix}"
+    old_cache_dir = os.path.realpath(
+        os.path.join(XINFERENCE_CACHE_DIR, old_cache_dir_name)
+    )
+    if os.path.exists(old_cache_dir):
+        return old_cache_dir
+    else:
+        cache_dir_name = (
+            f"{llm_family.model_name.replace('.', '_')}-{llm_spec.model_format}"
+            f"-{llm_spec.model_size_in_billions}b"
+        )
+        if quant_suffix:
+            cache_dir_name += f"-{quant_suffix}"
+        cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, cache_dir_name))
+        if create_if_not_exist and not os.path.exists(cache_dir):
+            os.makedirs(cache_dir, exist_ok=True)
+        return cache_dir
 def _get_meta_path(

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -688,6 +688,66 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "codegeex4",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "the open-source version of the latest CodeGeeX4 model series",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "ZhipuAI/codegeex4-all-9b",
+        "model_hub": "modelscope",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K_L",
+          "Q8_0"
+        ],
+        "model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
+        "model_id": "ZhipuAI/codegeex4-all-9b-GGUF",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        151329,
+        151336,
+        151338
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -928,6 +988,88 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm2.5-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "InternLM2.5 series of the InternLM model.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM2",
+      "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "stop_token_ids": [
+        2,
+        92542
+      ],
+      "stop": [
+        "</s>",
+        "<|im_end|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 262144,
+    "model_name": "internlm2.5-chat-1m",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM2",
+      "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "stop_token_ids": [
+        2,
+        92542
+      ],
+      "stop": [
+        "</s>",
+        "<|im_end|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 100000,
@@ -3799,6 +3941,29 @@
         ],
         "model_id": "AI-ModelScope/gemma-2-27b-it",
         "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "LLM-Research/gemma-2-9b-it-GGUF",
+        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
+        "model_hub": "modelscope"
       }
     ],
     "prompt_style": {

xinference/model/llm/sglang/core.py CHANGED Viewed

@@ -269,8 +269,13 @@ class SGLANGModel(LLM):
         )
         stream = sanitized_generate_config.pop("stream")
         stream_options = sanitized_generate_config.pop("stream_options")
-        if isinstance(stream_options, dict):
-            include_usage = stream_options.pop("include_usage", False)
+        include_usage = (
+            stream_options.pop("include_usage")
+            if isinstance(stream_options, dict)
+            else False
+        )
         request_id = str(uuid.uuid1())
         state = pipeline.run(
             question=prompt,

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -112,6 +112,8 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "internlm-chat-8k",
     "internlm-chat-20b",
     "internlm2-chat",
+    "internlm2.5-chat",
+    "internlm2.5-chat-1m",
     "qwen-chat",
     "Yi-chat",
     "Yi-1.5-chat",
@@ -127,6 +129,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "chatglm3-128k",
     "glm4-chat",
     "glm4-chat-1m",
+    "codegeex4",
     "deepseek-chat",
     "deepseek-coder-instruct",
 ]

{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xinference
-Version: 0.13.1
+Version: 0.13.2
 Summary: Model Serving Made Easy
 Home-page: https://github.com/xorbitsai/inference
 Author: Qin Xuye
@@ -72,6 +72,7 @@ Requires-Dist: nemo-text-processing ; extra == 'all'
 Requires-Dist: WeTextProcessing ; extra == 'all'
 Requires-Dist: librosa ; extra == 'all'
 Requires-Dist: xxhash ; extra == 'all'
+Requires-Dist: torchaudio ; extra == 'all'
 Requires-Dist: ChatTTS >0.1 ; extra == 'all'
 Requires-Dist: boto3 <1.28.65,>=1.28.55 ; extra == 'all'
 Requires-Dist: tensorizer ~=2.9.0 ; extra == 'all'
@@ -86,6 +87,7 @@ Requires-Dist: nemo-text-processing ; extra == 'audio'
 Requires-Dist: WeTextProcessing ; extra == 'audio'
 Requires-Dist: librosa ; extra == 'audio'
 Requires-Dist: xxhash ; extra == 'audio'
+Requires-Dist: torchaudio ; extra == 'audio'
 Requires-Dist: ChatTTS >0.1 ; extra == 'audio'
 Provides-Extra: benchmark
 Requires-Dist: psutil ; extra == 'benchmark'

{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 xinference/__init__.py,sha256=0LgIveLP6CXxoIaSrxhlFyOh0lOqPgJBVcBe0tkWJjc,987
 xinference/_compat.py,sha256=SQAjZMGxtBIce45qtW7ob7RWzA0zhv2yB3AxT0rb0uU,1778
-xinference/_version.py,sha256=uHlOZ7Byu6M1gveNIzmgZapOLatEU9Yocfj3tEKXElM,498
+xinference/_version.py,sha256=npzZYwlsc_rih9EcKTaoGkkL3KLm52-9xfgm8jq_R4A,498
 xinference/conftest.py,sha256=FF-ZkqkfOxQw4hz_8G7p5aB7gFdsJlr6u2ZdFuuauAA,9744
 xinference/constants.py,sha256=_uyBB84fgZM64J3mw8_RELVJfm_dgeNRUZF9t9ZuFcM,3541
 xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
@@ -9,7 +9,7 @@ xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
 xinference/types.py,sha256=mN6lTFGqwFCycCMCwNELtRm2lmvuynvzD7Wwq_NEINY,14255
 xinference/utils.py,sha256=VSOJMFd9H7kce98OtJZbcDjjpfzRpHAFs8WU0xXPBM8,717
 xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/api/restful_api.py,sha256=RRU4EItwywwOLvYwEOvbAB9p6eBF_ZX0eDPIDBJE0ag,71697
+xinference/api/restful_api.py,sha256=7n77U-5t0SDzpOOad4SqbFbZx-fSIQJJdM_bLwdozus,74572
 xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/api/oauth2/auth_service.py,sha256=74JzB42fbbmBu4Q1dW3A9Fp_N7167KgRGB42Z0NHjAM,6119
 xinference/api/oauth2/types.py,sha256=K923sv_XySIUtM2Eozl9IG082IJcDOS5SFLrPZ5ELBg,996
@@ -18,14 +18,14 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
 xinference/client/common.py,sha256=iciZRs5YjM2gYsXnwACPMaiBZp4_XpawWwfym0Iyu40,1617
 xinference/client/handlers.py,sha256=3gd9C7u4URbcVdR6Eyv8cpEZ175Ll4q_jGL07CnEIpg,648
 xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/client/restful/restful_client.py,sha256=3-OxNq9hoxbUrKbpDq0kCi3GssV_BZeenzVDlCeRrvE,50536
+xinference/client/restful/restful_client.py,sha256=fMgazFQRSMefUx0_40Q_9c3o7mf0G39D8HmlkQ9KFhs,54304
 xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/core/cache_tracker.py,sha256=2hk8ANOYruhxAt4MPz482tYEQcvYBh_B7sq0eYd0rTU,6963
 xinference/core/chat_interface.py,sha256=7SOm6Qi-iFh1otycHpn6CpISq2wTLlJzEUngJtOwMIk,19558
 xinference/core/event.py,sha256=Lkx_-Ohwyzyt-MBbkrZy9N-7aeYs-wux0fDtZpa2SJY,1632
 xinference/core/image_interface.py,sha256=G2iK24auEN4MrLkPlu1CAA_gf-BQrGQTjazi_FYqIxE,8825
 xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
-xinference/core/model.py,sha256=2f0eaKxW0Vk0_or4hnEPrZDKxvBk36tsEcqbDAnw500,24951
+xinference/core/model.py,sha256=QWz9LeUyWwwlgxzE0JQmsNqaMc1uwtU_Q3wm6H430rw,26778
 xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
 xinference/core/scheduler.py,sha256=e-fhhMeWmVdx_37sNDf2BOkvHt_17wclNcby7DcUNso,15627
 xinference/core/status_guard.py,sha256=fF5hisvfn6es9DV6Z6RRD6V_S_uLcb8lHM6PArGgb04,2820
@@ -47,7 +47,7 @@ xinference/model/__init__.py,sha256=IRC3ojiqYkVLIK_xsIxYeKypEeeTTdrovnVzK_4L4eg,
 xinference/model/core.py,sha256=5dr7y2cq2OS3aFgqIIR3uQbT1ln3xiolUsbXgu2dHGw,3999
 xinference/model/utils.py,sha256=NGIXgpkUY0dXGxnh-FsfeNq6OS9SPwBzNfASLXWCqUo,15146
 xinference/model/audio/__init__.py,sha256=QyQwELIYk7DuD5Hen2q45pLMJ4K8iAnto8zlOA9QUSY,2839
-xinference/model/audio/chattts.py,sha256=EISJj6mgppAZwjiPmStvUuM2H9ogXP0FXCD5rrL2AwA,2618
+xinference/model/audio/chattts.py,sha256=JZA_0TR4nMGqJ-2WYqwb8DcjhsTC57D0QlkPBl4v788,3973
 xinference/model/audio/core.py,sha256=uMkZpd5IIs9WK8K0t2FWiGKagcicSjK20w4USKGSCEw,5708
 xinference/model/audio/custom.py,sha256=01NTD927pairIBWOo9At6Bjqpo1kdcIn3AVijbOdp7Y,5056
 xinference/model/audio/model_spec.json,sha256=ueOHO14d8lIzuiExJyPUgC3swYA3CfgOgMiDu5L1cOA,3205
@@ -66,19 +66,19 @@ xinference/model/flexible/utils.py,sha256=_GlEarRHKPAxT7o6N39VOd9sB580zKzdSktqjb
 xinference/model/flexible/launchers/__init__.py,sha256=x_5s73qABN_94hnf5UyrfyxUObayntD6Gh1UOtctCe8,642
 xinference/model/flexible/launchers/transformers_launcher.py,sha256=OZeeogDfopRUGhulP4PRJ4fZEJ2D9cfv7lcC2qJBoDE,2012
 xinference/model/image/__init__.py,sha256=lDtP961bpu6h5TK57kJ531Zoch2xU5DM-Eco_YQne-Y,2780
-xinference/model/image/core.py,sha256=U70IcFXEFrow0HyUoc5401z5H01l0dQD5aCdq8s11so,8697
+xinference/model/image/core.py,sha256=zpaiym5t5cWrBOOscvFFBBUD4-YWBU_NZLsyuqeeamA,8809
 xinference/model/image/custom.py,sha256=nn1iZDTYNz68A2gWFXvUuv__Gx8EGdkz_sHvHnPnSoA,3841
-xinference/model/image/model_spec.json,sha256=xSEmKnzi4n2hzu9FspusFjeX6pAd8w05ZgvkMV_9HzE,3178
+xinference/model/image/model_spec.json,sha256=kQMWtQo-Z4tawKdgckYFJz1fvbGnXVSZGQsGwjOxa3M,3681
 xinference/model/image/model_spec_modelscope.json,sha256=vWAoR1gsexay6jn8vnObslYF3YE5SAfqMcJPkYQ-Wc4,3176
 xinference/model/image/utils.py,sha256=gxg8jJ2nYaDknzCcSC53WCy1slbB5aWU14AbJbfm6Z4,906
 xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/image/stable_diffusion/core.py,sha256=ib_ZeSg7hzynmRqSnhjtrVuhoLOgZPrR1ZH2LjBmH2E,6063
+xinference/model/image/stable_diffusion/core.py,sha256=e2kpID5iTUTWuMO01aF7z4uGMpAttF1g1nSnwdDNIz4,7349
 xinference/model/llm/__init__.py,sha256=D9zXjltqlzKahDiOFYyn_EcLoiw_6tO8bhj3u8wnT0A,11462
 xinference/model/llm/core.py,sha256=ZAzRGphjRZ2KAdTPADIuqSbVU9dTQrHgLvCEgNP9pOk,8088
-xinference/model/llm/llm_family.json,sha256=DzRDfpkBqX4VePEMxdCuExuSnMvqgf7sdMfQs8hIhbQ,178453
-xinference/model/llm/llm_family.py,sha256=l1utaKl_XXXNIhFbMHzIn0nLI_8JhMZ2005nZ8u-auM,42038
+xinference/model/llm/llm_family.json,sha256=6ZXDEqlZddhrR9A3lnOtzmEdOMd7rfc4DexRzeJVodw,184400
+xinference/model/llm/llm_family.py,sha256=2XykGoXMIffDIOCI1hefprgPJTOvE80r7Rh6Zosb6dY,42934
 xinference/model/llm/llm_family_csghub.json,sha256=zWiMlX0mbCvuaR7gZh0qDPRPaswFJ-zKssuN6XuAQ6s,1417
-xinference/model/llm/llm_family_modelscope.json,sha256=jqTg0YAdN5Px7v0XTnze6BS2gu-v8iga2Y9DozUG0BI,113046
+xinference/model/llm/llm_family_modelscope.json,sha256=BQR99BYPXxXxq0CnFiVlAEUUeuOLXezCTBVPhdZs1Jg,116982
 xinference/model/llm/memory.py,sha256=PTD8m6TCZVU1zrwc9wepX9cUjCqAXBENj6X7tjua0to,10207
 xinference/model/llm/utils.py,sha256=3KkpM-HaI97jAFj5Pb1-Kau3BL8-8d-SypDkKCWFqPs,32655
 xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -108,9 +108,9 @@ xinference/model/llm/pytorch/utils.py,sha256=HZhJKQG1O1P1qTpxvVzIjBp-2J8aTRxUmS9
 xinference/model/llm/pytorch/vicuna.py,sha256=avNOgt9fBjwYzahL-j6-EcQS-7km167h8ttJolnNWnE,2334
 xinference/model/llm/pytorch/yi_vl.py,sha256=MljT7tpgFIhL6n5rdoS3hmq_u0rtHRE6cxXCseujklQ,10911
 xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
-xinference/model/llm/sglang/core.py,sha256=RGHy6t9n0c4zL6Uha8P7t-qPvisPyulFVHw-8Aq8CJ0,14046
+xinference/model/llm/sglang/core.py,sha256=9c4KgEFswu1Fx3qI4VFszv26902FwIifq9AVzMijDa4,14087
 xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/llm/vllm/core.py,sha256=j0vqOp295TS1l0O2CNkTfjgwmQ-t5KtX-bFfbgSKlYs,22097
+xinference/model/llm/vllm/core.py,sha256=beZeuCR_wCbIjtU-WWM8q0rVPPPiPLO2VINnvNQfq8w,22165
 xinference/model/rerank/__init__.py,sha256=BXIL1uu3ZpZHX9bODhW9lxKUXudZE7-OkXFmmM5rpMU,2817
 xinference/model/rerank/core.py,sha256=qAUwOdRHomn0uCzCw6klDxJSZyIDQ4tvgz9pOPm-0GY,12150
 xinference/model/rerank/custom.py,sha256=NKk7jA7p4xkuwS5WoOs2SY2wdnoAVpyCjBTvv317bBw,3917
@@ -15428,9 +15428,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
 xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
 xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
 xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
-xinference-0.13.1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-xinference-0.13.1.dist-info/METADATA,sha256=GmkfFt_HXs3gvuBke75uA__8UL-jAwj41F47KEBUj4E,16633
-xinference-0.13.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-xinference-0.13.1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
-xinference-0.13.1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
-xinference-0.13.1.dist-info/RECORD,,
+xinference-0.13.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+xinference-0.13.2.dist-info/METADATA,sha256=EmYaz9n8oJHqQSU8Er7kqRuuN01VWaRBLZ8lgQMCMgc,16721
+xinference-0.13.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+xinference-0.13.2.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
+xinference-0.13.2.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
+xinference-0.13.2.dist-info/RECORD,,

{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

xinference 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl

Potentially problematic release.

xinference 0.13.1py3-none-any.whl → 0.13.2py3-none-any.whl