PyPI - xinference - Versions diffs - 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl - Mend

xinference 1.9.1py3-none-any.whl → 1.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-08-30T03:57:39+0800",
+ "date": "2025-09-30T23:23:16+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "b2d793d0b4a0af632932eb63dbeb1bc91b5b3d74",
- "version": "1.9.1"
+ "full-revisionid": "98a3c8ad4a6debd97ef29cc05aad3514f4ba488c",
+ "version": "1.10.1"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import asyncio
 import inspect
+import ipaddress
 import json
 import logging
 import multiprocessing
@@ -21,6 +22,7 @@ import os
 import pprint
 import sys
 import time
+import uuid
 import warnings
 from typing import Any, Dict, List, Optional, Union
@@ -53,6 +55,7 @@ from xoscar.utils import get_next_port
 from .._compat import BaseModel, Field
 from .._version import get_versions
 from ..constants import (
+    XINFERENCE_ALLOWED_IPS,
     XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
     XINFERENCE_DEFAULT_ENDPOINT_PORT,
     XINFERENCE_DISABLE_METRICS,
@@ -61,11 +64,16 @@ from ..constants import (
 from ..core.event import Event, EventCollectorActor, EventType
 from ..core.supervisor import SupervisorActor
 from ..core.utils import CancelMixin, json_dumps
+# Import Anthropic-related types and availability flag
 from ..types import (
+    ANTHROPIC_AVAILABLE,
+    AnthropicMessage,
     ChatCompletion,
     Completion,
     CreateChatCompletion,
     CreateCompletion,
+    CreateMessage,
     ImageList,
     PeftModelConfig,
     SDAPIResult,
@@ -213,6 +221,9 @@ class BuildGradioMediaInterfaceRequest(BaseModel):
 class RESTfulAPI(CancelMixin):
+    # Add new class attributes
+    _allowed_ip_list: Optional[List[ipaddress.IPv4Network]] = None
     def __init__(
         self,
         supervisor_address: str,
@@ -229,6 +240,45 @@ class RESTfulAPI(CancelMixin):
         self._auth_service = AuthService(auth_config_file)
         self._router = APIRouter()
         self._app = FastAPI()
+        # Initialize allowed IP list once
+        self._init_allowed_ip_list()
+    def _init_allowed_ip_list(self):
+        """Initialize the allowed IP list from environment variable."""
+        if RESTfulAPI._allowed_ip_list is None:
+            # ie: export XINFERENCE_ALLOWED_IPS=192.168.1.0/24
+            allowed_ips = XINFERENCE_ALLOWED_IPS
+            if allowed_ips:
+                RESTfulAPI._allowed_ip_list = []
+                for ip in allowed_ips.split(","):
+                    ip = ip.strip()
+                    try:
+                        # Try parsing as network/CIDR
+                        if "/" in ip:
+                            RESTfulAPI._allowed_ip_list.append(ipaddress.ip_network(ip))
+                        else:
+                            # Parse as single IP
+                            RESTfulAPI._allowed_ip_list.append(
+                                ipaddress.ip_network(f"{ip}/32")
+                            )
+                    except ValueError:
+                        logger.error(
+                            f"Invalid IP address or network: {ip}", exc_info=True
+                        )
+                        continue
+    def _is_ip_allowed(self, ip: str) -> bool:
+        """Check if an IP is allowed based on configured rules."""
+        if not RESTfulAPI._allowed_ip_list:
+            return True
+        try:
+            client_ip = ipaddress.ip_address(ip)
+            return any(
+                client_ip in allowed_net for allowed_net in RESTfulAPI._allowed_ip_list
+            )
+        except ValueError:
+            return False
     def is_authenticated(self):
         return False if self._auth_service.config is None else True
@@ -287,6 +337,16 @@ class RESTfulAPI(CancelMixin):
             allow_headers=["*"],
         )
+        @self._app.middleware("http")
+        async def ip_restriction_middleware(request: Request, call_next):
+            client_ip = request.client.host
+            if not self._is_ip_allowed(client_ip):
+                return PlainTextResponse(
+                    status_code=403, content=f"Access denied for IP: {client_ip}\n"
+                )
+            response = await call_next(request)
+            return response
         @self._app.exception_handler(500)
         async def internal_exception_handler(request: Request, exc: Exception):
             logger.exception("Handling request %s failed: %s", request.url, exc)
@@ -532,6 +592,40 @@ class RESTfulAPI(CancelMixin):
                 else None
             ),
         )
+        # Register messages endpoint only if Anthropic is available
+        if ANTHROPIC_AVAILABLE:
+            self._router.add_api_route(
+                "/anthropic/v1/messages",
+                self.create_message,
+                methods=["POST"],
+                response_model=AnthropicMessage,
+                dependencies=(
+                    [Security(self._auth_service, scopes=["models:read"])]
+                    if self.is_authenticated()
+                    else None
+                ),
+            )
+            # Register Anthropic models endpoints
+            self._router.add_api_route(
+                "/anthropic/v1/models",
+                self.anthropic_list_models,
+                methods=["GET"],
+                dependencies=(
+                    [Security(self._auth_service, scopes=["models:list"])]
+                    if self.is_authenticated()
+                    else None
+                ),
+            )
+            self._router.add_api_route(
+                "/anthropic/v1/models/{model_id}",
+                self.anthropic_get_model,
+                methods=["GET"],
+                dependencies=(
+                    [Security(self._auth_service, scopes=["models:list"])]
+                    if self.is_authenticated()
+                    else None
+                ),
+            )
         self._router.add_api_route(
             "/v1/embeddings",
             self.create_embedding,
@@ -994,6 +1088,58 @@ class RESTfulAPI(CancelMixin):
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
+    async def anthropic_list_models(self) -> JSONResponse:
+        """Anthropic-compatible models endpoint"""
+        try:
+            # Get running models from xinference
+            running_models = await (await self._get_supervisor_ref()).list_models()
+            # For backward compatibility with tests, only return running models by default
+            model_list = []
+            # Add running models to the list
+            for model_id, model_info in running_models.items():
+                anthropic_model = {
+                    "id": model_id,
+                    "object": "model",
+                    "created": 0,
+                    "display_name": model_info.get("model_name", model_id),
+                    "type": model_info.get("model_type", "model"),
+                    "max_tokens": model_info.get("context_length", 4096),
+                }
+                model_list.append(anthropic_model)
+            return JSONResponse(content=model_list)
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+    async def anthropic_get_model(self, model_id: str) -> JSONResponse:
+        """Anthropic-compatible model retrieval endpoint"""
+        try:
+            models = await (await self._get_supervisor_ref()).list_models()
+            model_info = models[model_id]
+            # Convert to Anthropic format
+            anthropic_model = {
+                "id": model_id,  # Return the original requested ID
+                "object": "model",
+                "created": 0,
+                "display_name": model_info.get("model_name", model_id),
+                "type": model_info.get("model_type", "model"),
+                "max_tokens": model_info.get("context_length", 4096),
+                **model_info,
+            }
+            return JSONResponse(content=anthropic_model)
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
     async def describe_model(self, model_uid: str) -> JSONResponse:
         try:
             data = await (await self._get_supervisor_ref()).describe_model(model_uid)
@@ -1417,6 +1563,151 @@ class RESTfulAPI(CancelMixin):
                 self.handle_request_limit_error(e)
                 raise HTTPException(status_code=500, detail=str(e))
+    async def create_message(self, request: Request) -> Response:
+        raw_body = await request.json()
+        body = CreateMessage.parse_obj(raw_body)
+        exclude = {
+            "model",
+            "messages",
+            "stream",
+            "stop_sequences",
+            "metadata",
+            "tool_choice",
+            "tools",
+        }
+        raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
+        kwargs = body.dict(exclude_unset=True, exclude=exclude)
+        # guided_decoding params
+        kwargs.update(self.extract_guided_params(raw_body=raw_body))
+        # TODO: Decide if this default value override is necessary #1061
+        if body.max_tokens is None:
+            kwargs["max_tokens"] = max_tokens_field.default
+        messages = body.messages and list(body.messages)
+        if not messages or messages[-1].get("role") not in ["user", "assistant"]:
+            raise HTTPException(
+                status_code=400, detail="Invalid input. Please specify the prompt."
+            )
+        # Handle tools parameter
+        if hasattr(body, "tools") and body.tools:
+            kwargs["tools"] = body.tools
+        # Handle tool_choice parameter
+        if hasattr(body, "tool_choice") and body.tool_choice:
+            kwargs["tool_choice"] = body.tool_choice
+        # Get model mapping
+        try:
+            running_models = await (await self._get_supervisor_ref()).list_models()
+        except Exception as e:
+            logger.error(f"Failed to get model mapping: {e}", exc_info=True)
+            raise HTTPException(status_code=500, detail="Failed to get model mapping")
+        if not running_models:
+            raise HTTPException(
+                status_code=400,
+                detail=f"No running models available. Please start a model in xinference first.",
+            )
+        requested_model_id = body.model
+        if "claude" in requested_model_id:
+            requested_model_id = list(running_models.keys())[0]
+        if requested_model_id not in running_models:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{requested_model_id}' is not available. Available models: {list(running_models.keys())}",
+            )
+        else:
+            model_uid = requested_model_id
+        try:
+            model = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        if body.stream:
+            async def stream_results():
+                iterator = None
+                try:
+                    try:
+                        iterator = await model.chat(
+                            messages, kwargs, raw_params=raw_kwargs
+                        )
+                    except RuntimeError as re:
+                        self.handle_request_limit_error(re)
+                    # Check if iterator is actually an async iterator
+                    if hasattr(iterator, "__aiter__"):
+                        async for item in iterator:
+                            yield item
+                    elif isinstance(iterator, (str, bytes)):
+                        # Handle case where chat returns bytes/string instead of iterator
+                        if isinstance(iterator, bytes):
+                            try:
+                                content = iterator.decode("utf-8")
+                            except UnicodeDecodeError:
+                                content = str(iterator)
+                        else:
+                            content = iterator
+                        yield dict(data=json.dumps({"content": content}))
+                    else:
+                        # Fallback: try to iterate normally
+                        try:
+                            for item in iterator:
+                                yield item
+                        except TypeError:
+                            # If not iterable, yield as single result
+                            yield dict(data=json.dumps({"content": str(iterator)}))
+                    yield "[DONE]"
+                except asyncio.CancelledError:
+                    logger.info(
+                        f"Disconnected from client (via refresh/close) {request.client} during chat."
+                    )
+                    return
+                except Exception as ex:
+                    ex = await self._get_model_last_error(model.uid, ex)
+                    logger.exception("Message stream got an error: %s", ex)
+                    await self._report_error_event(model_uid, str(ex))
+                    yield dict(data=json.dumps({"error": str(ex)}))
+                    return
+                finally:
+                    await model.decrease_serve_count()
+            return EventSourceResponse(
+                stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
+            )
+        else:
+            try:
+                data = await model.chat(messages, kwargs, raw_params=raw_kwargs)
+                # Convert OpenAI format to Anthropic format
+                openai_response = json.loads(data)
+                anthropic_response = self._convert_openai_to_anthropic(
+                    openai_response, body.model
+                )
+                return Response(
+                    json.dumps(anthropic_response), media_type="application/json"
+                )
+            except Exception as e:
+                e = await self._get_model_last_error(model.uid, e)
+                logger.error(e, exc_info=True)
+                await self._report_error_event(model_uid, str(e))
+                self.handle_request_limit_error(e)
+                raise HTTPException(status_code=500, detail=str(e))
     async def create_embedding(self, request: Request) -> Response:
         payload = await request.json()
         body = CreateEmbeddingRequest.parse_obj(payload)
@@ -1845,7 +2136,7 @@ class RESTfulAPI(CancelMixin):
     async def create_variations(
         self,
         model: str = Form(...),
-        image: UploadFile = File(media_type="application/octet-stream"),
+        image: List[UploadFile] = File(media_type="application/octet-stream"),
         prompt: Optional[Union[str, List[str]]] = Form(None),
         negative_prompt: Optional[Union[str, List[str]]] = Form(None),
         n: Optional[int] = Form(1),
@@ -1873,8 +2164,17 @@ class RESTfulAPI(CancelMixin):
                 parsed_kwargs = {}
             request_id = parsed_kwargs.get("request_id")
             self._add_running_task(request_id)
+            # Handle single image or multiple images
+            if len(image) == 1:
+                # Single image
+                image_data = Image.open(image[0].file)
+            else:
+                # Multiple images - convert to list of PIL Images
+                image_data = [Image.open(img.file) for img in image]
             image_list = await model_ref.image_to_image(
-                image=Image.open(image.file),
+                image=image_data,
                 prompt=prompt,
                 negative_prompt=negative_prompt,
                 n=n,
@@ -2371,7 +2671,14 @@ class RESTfulAPI(CancelMixin):
             data = await (await self._get_supervisor_ref()).list_model_registrations(
                 model_type, detailed=detailed
             )
-            return JSONResponse(content=data)
+            # Remove duplicate model names.
+            model_names = set()
+            final_data = []
+            for item in data:
+                if item["model_name"] not in model_names:
+                    model_names.add(item["model_name"])
+                    final_data.append(item)
+            return JSONResponse(content=final_data)
         except ValueError as re:
             logger.error(re, exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
@@ -2603,6 +2910,96 @@ class RESTfulAPI(CancelMixin):
         return kwargs
+    def _convert_openai_to_anthropic(self, openai_response: dict, model: str) -> dict:
+        """
+        Convert OpenAI response format to Anthropic response format.
+        Args:
+            openai_response: OpenAI format response
+            model: Model name
+        Returns:
+            Anthropic format response
+        """
+        # Extract content and tool calls from OpenAI response
+        content_blocks = []
+        stop_reason = "stop"
+        if "choices" in openai_response and len(openai_response["choices"]) > 0:
+            choice = openai_response["choices"][0]
+            message = choice.get("message", {})
+            # Handle content text
+            content = message.get("content", "")
+            if content:
+                if isinstance(content, str):
+                    # If content is a string, use it directly
+                    content_blocks.append({"type": "text", "text": content})
+                elif isinstance(content, list):
+                    # If content is a list, extract text from each content block
+                    for content_block in content:
+                        if isinstance(content_block, dict):
+                            if content_block.get("type") == "text":
+                                text = content_block.get("text", "")
+                                if text:
+                                    content_blocks.append(
+                                        {"type": "text", "text": text}
+                                    )
+                            elif "text" in content_block:
+                                # Handle different content block format
+                                text = content_block.get("text", "")
+                                if text:
+                                    content_blocks.append(
+                                        {"type": "text", "text": text}
+                                    )
+            # Handle tool calls
+            tool_calls = message.get("tool_calls", [])
+            for tool_call in tool_calls:
+                function = tool_call.get("function", {})
+                arguments = function.get("arguments", "{}")
+                try:
+                    input_data = json.loads(arguments)
+                except json.JSONDecodeError:
+                    input_data = {}
+                tool_use_block = {
+                    "type": "tool_use",
+                    "cache_control": {"type": "ephemeral"},
+                    "id": tool_call.get("id", str(uuid.uuid4())),
+                    "name": function.get("name", ""),
+                    "input": input_data,
+                }
+                content_blocks.append(tool_use_block)
+            # Set stop reason based on finish reason
+            finish_reason = choice.get("finish_reason", "stop")
+            if finish_reason == "tool_calls":
+                stop_reason = "tool_use"
+        # Build Anthropic response
+        anthropic_response = {
+            "id": str(uuid.uuid4()),
+            "type": "message",
+            "role": "assistant",
+            "content": content_blocks,
+            "model": model,
+            "stop_reason": stop_reason,
+            "stop_sequence": None,
+            "usage": {
+                "input_tokens": openai_response.get("usage", {}).get(
+                    "prompt_tokens", 0
+                ),
+                "output_tokens": openai_response.get("usage", {}).get(
+                    "completion_tokens", 0
+                ),
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+            },
+        }
+        return anthropic_response
 def run(
     supervisor_address: str,

xinference/client/restful/async_restful_client.py CHANGED Viewed

@@ -285,7 +285,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
     async def image_to_image(
         self,
-        image: Union[str, bytes],
+        image: Union[str, bytes, List[Union[str, bytes]]],
         prompt: str,
         negative_prompt: Optional[str] = None,
         n: int = 1,
@@ -298,7 +298,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
         Parameters
         ----------
-        image: `Union[str, bytes]`
+        image: `Union[str, bytes, List[Union[str, bytes]]]`
             The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
             specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
             accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
@@ -338,7 +338,24 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
         files: List[Any] = []
         for key, value in params.items():
             files.append((key, (None, value)))
-        files.append(("image", ("image", image, "application/octet-stream")))
+        # Handle both single image and list of images
+        if isinstance(image, list):
+            if len(image) == 0:
+                raise ValueError("Image list cannot be empty")
+            elif len(image) == 1:
+                # Single image in list, use it directly
+                files.append(("image", ("image", image[0], "application/octet-stream")))
+            else:
+                # Multiple images - send all images with same field name
+                # FastAPI will collect them into a list
+                for img_data in image:
+                    files.append(
+                        ("image", ("image", img_data, "application/octet-stream"))
+                    )
+        else:
+            # Single image
+            files.append(("image", ("image", image, "application/octet-stream")))
         response = await self.session.post(url, files=files, headers=self.auth_headers)
         if response.status != 200:
             raise RuntimeError(

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -250,7 +250,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
     def image_to_image(
         self,
-        image: Union[str, bytes],
+        image: Union[str, bytes, List[Union[str, bytes]]],
         prompt: str,
         negative_prompt: Optional[str] = None,
         n: int = 1,
@@ -263,7 +263,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         Parameters
         ----------
-        image: `Union[str, bytes]`
+        image: `Union[str, bytes, List[Union[str, bytes]]]`
             The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
             specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
             accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
@@ -302,7 +302,24 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         files: List[Any] = []
         for key, value in params.items():
             files.append((key, (None, value)))
-        files.append(("image", ("image", image, "application/octet-stream")))
+        # Handle both single image and list of images
+        if isinstance(image, list):
+            if len(image) == 0:
+                raise ValueError("Image list cannot be empty")
+            elif len(image) == 1:
+                # Single image in list, use it directly
+                files.append(("image", ("image", image[0], "application/octet-stream")))
+            else:
+                # Multiple images - send all images with same field name
+                # FastAPI will collect them into a list
+                for img_data in image:
+                    files.append(
+                        ("image", ("image", img_data, "application/octet-stream"))
+                    )
+        else:
+            # Single image
+            files.append(("image", ("image", image, "application/octet-stream")))
         response = self.session.post(url, files=files, headers=self.auth_headers)
         if response.status_code != 200:
             raise RuntimeError(

xinference/constants.py CHANGED Viewed

@@ -33,6 +33,7 @@ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
 XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED = "XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED"
 XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
 XINFERENCE_ENV_MAX_TOKENS = "XINFERENCE_MAX_TOKENS"
+XINFERENCE_ENV_ALLOWED_IPS = "XINFERENCE_ALLOWED_IPS"
 def get_xinference_home() -> str:
@@ -110,3 +111,4 @@ XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED = (
     if os.getenv(XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED)
     else None
 )
+XINFERENCE_ALLOWED_IPS = os.getenv(XINFERENCE_ENV_ALLOWED_IPS)

xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

Potentially problematic release.

xinference 1.9.1py3-none-any.whl → 1.10.1py3-none-any.whl