PyPI - xinference - Versions diffs - 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

xinference 1.9.1py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (34) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +415 -1
xinference/constants.py +2 -0
xinference/core/supervisor.py +29 -1
xinference/model/audio/core.py +5 -0
xinference/model/audio/kokoro.py +1 -1
xinference/model/audio/kokoro_zh.py +124 -0
xinference/model/audio/model_spec.json +20 -0
xinference/model/embedding/sentence_transformers/core.py +4 -4
xinference/model/embedding/vllm/core.py +7 -1
xinference/model/image/model_spec.json +2 -3
xinference/model/llm/core.py +10 -0
xinference/model/llm/llama_cpp/core.py +1 -0
xinference/model/llm/llm_family.json +40 -20
xinference/model/llm/llm_family.py +1 -0
xinference/model/llm/mlx/core.py +52 -33
xinference/model/llm/sglang/core.py +2 -44
xinference/model/llm/tool_parsers/__init__.py +58 -0
xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +128 -0
xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
xinference/model/llm/transformers/core.py +1 -1
xinference/model/llm/utils.py +127 -45
xinference/model/llm/vllm/core.py +2 -61
xinference/types.py +105 -2
{xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/METADATA +7 -3
{xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/RECORD +34 -26
{xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/WHEEL +0 -0
{xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/entry_points.txt +0 -0
{xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/licenses/LICENSE +0 -0
{xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-08-30T03:57:39+0800",
+ "date": "2025-09-12T21:20:52+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "b2d793d0b4a0af632932eb63dbeb1bc91b5b3d74",
- "version": "1.9.1"
+ "full-revisionid": "b018733c97029fb59e8ffe55fadc6473232fbf23",
+ "version": "1.10.0"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import asyncio
 import inspect
+import ipaddress
 import json
 import logging
 import multiprocessing
@@ -21,6 +22,7 @@ import os
 import pprint
 import sys
 import time
+import uuid
 import warnings
 from typing import Any, Dict, List, Optional, Union
@@ -53,6 +55,7 @@ from xoscar.utils import get_next_port
 from .._compat import BaseModel, Field
 from .._version import get_versions
 from ..constants import (
+    XINFERENCE_ALLOWED_IPS,
     XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
     XINFERENCE_DEFAULT_ENDPOINT_PORT,
     XINFERENCE_DISABLE_METRICS,
@@ -61,11 +64,16 @@ from ..constants import (
 from ..core.event import Event, EventCollectorActor, EventType
 from ..core.supervisor import SupervisorActor
 from ..core.utils import CancelMixin, json_dumps
+# Import Anthropic-related types and availability flag
 from ..types import (
+    ANTHROPIC_AVAILABLE,
+    AnthropicMessage,
     ChatCompletion,
     Completion,
     CreateChatCompletion,
     CreateCompletion,
+    CreateMessage,
     ImageList,
     PeftModelConfig,
     SDAPIResult,
@@ -213,6 +221,9 @@ class BuildGradioMediaInterfaceRequest(BaseModel):
 class RESTfulAPI(CancelMixin):
+    # Add new class attributes
+    _allowed_ip_list: Optional[List[ipaddress.IPv4Network]] = None
     def __init__(
         self,
         supervisor_address: str,
@@ -229,6 +240,45 @@ class RESTfulAPI(CancelMixin):
         self._auth_service = AuthService(auth_config_file)
         self._router = APIRouter()
         self._app = FastAPI()
+        # Initialize allowed IP list once
+        self._init_allowed_ip_list()
+    def _init_allowed_ip_list(self):
+        """Initialize the allowed IP list from environment variable."""
+        if RESTfulAPI._allowed_ip_list is None:
+            # ie: export XINFERENCE_ALLOWED_IPS=192.168.1.0/24
+            allowed_ips = XINFERENCE_ALLOWED_IPS
+            if allowed_ips:
+                RESTfulAPI._allowed_ip_list = []
+                for ip in allowed_ips.split(","):
+                    ip = ip.strip()
+                    try:
+                        # Try parsing as network/CIDR
+                        if "/" in ip:
+                            RESTfulAPI._allowed_ip_list.append(ipaddress.ip_network(ip))
+                        else:
+                            # Parse as single IP
+                            RESTfulAPI._allowed_ip_list.append(
+                                ipaddress.ip_network(f"{ip}/32")
+                            )
+                    except ValueError:
+                        logger.error(
+                            f"Invalid IP address or network: {ip}", exc_info=True
+                        )
+                        continue
+    def _is_ip_allowed(self, ip: str) -> bool:
+        """Check if an IP is allowed based on configured rules."""
+        if not RESTfulAPI._allowed_ip_list:
+            return True
+        try:
+            client_ip = ipaddress.ip_address(ip)
+            return any(
+                client_ip in allowed_net for allowed_net in RESTfulAPI._allowed_ip_list
+            )
+        except ValueError:
+            return False
     def is_authenticated(self):
         return False if self._auth_service.config is None else True
@@ -287,6 +337,16 @@ class RESTfulAPI(CancelMixin):
             allow_headers=["*"],
         )
+        @self._app.middleware("http")
+        async def ip_restriction_middleware(request: Request, call_next):
+            client_ip = request.client.host
+            if not self._is_ip_allowed(client_ip):
+                return PlainTextResponse(
+                    status_code=403, content=f"Access denied for IP: {client_ip}\n"
+                )
+            response = await call_next(request)
+            return response
         @self._app.exception_handler(500)
         async def internal_exception_handler(request: Request, exc: Exception):
             logger.exception("Handling request %s failed: %s", request.url, exc)
@@ -532,6 +592,40 @@ class RESTfulAPI(CancelMixin):
                 else None
             ),
         )
+        # Register messages endpoint only if Anthropic is available
+        if ANTHROPIC_AVAILABLE:
+            self._router.add_api_route(
+                "/anthropic/v1/messages",
+                self.create_message,
+                methods=["POST"],
+                response_model=AnthropicMessage,
+                dependencies=(
+                    [Security(self._auth_service, scopes=["models:read"])]
+                    if self.is_authenticated()
+                    else None
+                ),
+            )
+            # Register Anthropic models endpoints
+            self._router.add_api_route(
+                "/anthropic/v1/models",
+                self.anthropic_list_models,
+                methods=["GET"],
+                dependencies=(
+                    [Security(self._auth_service, scopes=["models:list"])]
+                    if self.is_authenticated()
+                    else None
+                ),
+            )
+            self._router.add_api_route(
+                "/anthropic/v1/models/{model_id}",
+                self.anthropic_get_model,
+                methods=["GET"],
+                dependencies=(
+                    [Security(self._auth_service, scopes=["models:list"])]
+                    if self.is_authenticated()
+                    else None
+                ),
+            )
         self._router.add_api_route(
             "/v1/embeddings",
             self.create_embedding,
@@ -994,6 +1088,58 @@ class RESTfulAPI(CancelMixin):
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
+    async def anthropic_list_models(self) -> JSONResponse:
+        """Anthropic-compatible models endpoint"""
+        try:
+            # Get running models from xinference
+            running_models = await (await self._get_supervisor_ref()).list_models()
+            # For backward compatibility with tests, only return running models by default
+            model_list = []
+            # Add running models to the list
+            for model_id, model_info in running_models.items():
+                anthropic_model = {
+                    "id": model_id,
+                    "object": "model",
+                    "created": 0,
+                    "display_name": model_info.get("model_name", model_id),
+                    "type": model_info.get("model_type", "model"),
+                    "max_tokens": model_info.get("context_length", 4096),
+                }
+                model_list.append(anthropic_model)
+            return JSONResponse(content=model_list)
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+    async def anthropic_get_model(self, model_id: str) -> JSONResponse:
+        """Anthropic-compatible model retrieval endpoint"""
+        try:
+            models = await (await self._get_supervisor_ref()).list_models()
+            model_info = models[model_id]
+            # Convert to Anthropic format
+            anthropic_model = {
+                "id": model_id,  # Return the original requested ID
+                "object": "model",
+                "created": 0,
+                "display_name": model_info.get("model_name", model_id),
+                "type": model_info.get("model_type", "model"),
+                "max_tokens": model_info.get("context_length", 4096),
+                **model_info,
+            }
+            return JSONResponse(content=anthropic_model)
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
     async def describe_model(self, model_uid: str) -> JSONResponse:
         try:
             data = await (await self._get_supervisor_ref()).describe_model(model_uid)
@@ -1417,6 +1563,151 @@ class RESTfulAPI(CancelMixin):
                 self.handle_request_limit_error(e)
                 raise HTTPException(status_code=500, detail=str(e))
+    async def create_message(self, request: Request) -> Response:
+        raw_body = await request.json()
+        body = CreateMessage.parse_obj(raw_body)
+        exclude = {
+            "model",
+            "messages",
+            "stream",
+            "stop_sequences",
+            "metadata",
+            "tool_choice",
+            "tools",
+        }
+        raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
+        kwargs = body.dict(exclude_unset=True, exclude=exclude)
+        # guided_decoding params
+        kwargs.update(self.extract_guided_params(raw_body=raw_body))
+        # TODO: Decide if this default value override is necessary #1061
+        if body.max_tokens is None:
+            kwargs["max_tokens"] = max_tokens_field.default
+        messages = body.messages and list(body.messages)
+        if not messages or messages[-1].get("role") not in ["user", "assistant"]:
+            raise HTTPException(
+                status_code=400, detail="Invalid input. Please specify the prompt."
+            )
+        # Handle tools parameter
+        if hasattr(body, "tools") and body.tools:
+            kwargs["tools"] = body.tools
+        # Handle tool_choice parameter
+        if hasattr(body, "tool_choice") and body.tool_choice:
+            kwargs["tool_choice"] = body.tool_choice
+        # Get model mapping
+        try:
+            running_models = await (await self._get_supervisor_ref()).list_models()
+        except Exception as e:
+            logger.error(f"Failed to get model mapping: {e}", exc_info=True)
+            raise HTTPException(status_code=500, detail="Failed to get model mapping")
+        if not running_models:
+            raise HTTPException(
+                status_code=400,
+                detail=f"No running models available. Please start a model in xinference first.",
+            )
+        requested_model_id = body.model
+        if "claude" in requested_model_id:
+            requested_model_id = list(running_models.keys())[0]
+        if requested_model_id not in running_models:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{requested_model_id}' is not available. Available models: {list(running_models.keys())}",
+            )
+        else:
+            model_uid = requested_model_id
+        try:
+            model = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        if body.stream:
+            async def stream_results():
+                iterator = None
+                try:
+                    try:
+                        iterator = await model.chat(
+                            messages, kwargs, raw_params=raw_kwargs
+                        )
+                    except RuntimeError as re:
+                        self.handle_request_limit_error(re)
+                    # Check if iterator is actually an async iterator
+                    if hasattr(iterator, "__aiter__"):
+                        async for item in iterator:
+                            yield item
+                    elif isinstance(iterator, (str, bytes)):
+                        # Handle case where chat returns bytes/string instead of iterator
+                        if isinstance(iterator, bytes):
+                            try:
+                                content = iterator.decode("utf-8")
+                            except UnicodeDecodeError:
+                                content = str(iterator)
+                        else:
+                            content = iterator
+                        yield dict(data=json.dumps({"content": content}))
+                    else:
+                        # Fallback: try to iterate normally
+                        try:
+                            for item in iterator:
+                                yield item
+                        except TypeError:
+                            # If not iterable, yield as single result
+                            yield dict(data=json.dumps({"content": str(iterator)}))
+                    yield "[DONE]"
+                except asyncio.CancelledError:
+                    logger.info(
+                        f"Disconnected from client (via refresh/close) {request.client} during chat."
+                    )
+                    return
+                except Exception as ex:
+                    ex = await self._get_model_last_error(model.uid, ex)
+                    logger.exception("Message stream got an error: %s", ex)
+                    await self._report_error_event(model_uid, str(ex))
+                    yield dict(data=json.dumps({"error": str(ex)}))
+                    return
+                finally:
+                    await model.decrease_serve_count()
+            return EventSourceResponse(
+                stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
+            )
+        else:
+            try:
+                data = await model.chat(messages, kwargs, raw_params=raw_kwargs)
+                # Convert OpenAI format to Anthropic format
+                openai_response = json.loads(data)
+                anthropic_response = self._convert_openai_to_anthropic(
+                    openai_response, body.model
+                )
+                return Response(
+                    json.dumps(anthropic_response), media_type="application/json"
+                )
+            except Exception as e:
+                e = await self._get_model_last_error(model.uid, e)
+                logger.error(e, exc_info=True)
+                await self._report_error_event(model_uid, str(e))
+                self.handle_request_limit_error(e)
+                raise HTTPException(status_code=500, detail=str(e))
     async def create_embedding(self, request: Request) -> Response:
         payload = await request.json()
         body = CreateEmbeddingRequest.parse_obj(payload)
@@ -2371,7 +2662,14 @@ class RESTfulAPI(CancelMixin):
             data = await (await self._get_supervisor_ref()).list_model_registrations(
                 model_type, detailed=detailed
             )
-            return JSONResponse(content=data)
+            # Remove duplicate model names.
+            model_names = set()
+            final_data = []
+            for item in data:
+                if item["model_name"] not in model_names:
+                    model_names.add(item["model_name"])
+                    final_data.append(item)
+            return JSONResponse(content=final_data)
         except ValueError as re:
             logger.error(re, exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
@@ -2560,6 +2858,19 @@ class RESTfulAPI(CancelMixin):
     def extract_guided_params(raw_body: dict) -> dict:
         kwargs = {}
         raw_extra_body: dict = raw_body.get("extra_body")  # type: ignore
+        # Convert OpenAI response_format to vLLM guided decoding
+        response_format = raw_body.get("response_format")
+        if response_format is not None:
+            if isinstance(response_format, dict):
+                format_type = response_format.get("type")
+                if format_type == "json_schema":
+                    json_schema = response_format.get("json_schema")
+                    if isinstance(json_schema, dict):
+                        schema = json_schema.get("schema")
+                        if schema is not None:
+                            kwargs["guided_json"] = schema
+                elif format_type == "json_object":
+                    kwargs["guided_json_object"] = True
         if raw_body.get("guided_json"):
             kwargs["guided_json"] = raw_body.get("guided_json")
         if raw_body.get("guided_regex") is not None:
@@ -2578,6 +2889,19 @@ class RESTfulAPI(CancelMixin):
             )
         # Parse OpenAI extra_body
         if raw_extra_body is not None:
+            # Convert OpenAI response_format to vLLM guided decoding
+            extra_response_format = raw_extra_body.get("response_format")
+            if extra_response_format is not None:
+                if isinstance(extra_response_format, dict):
+                    format_type = extra_response_format.get("type")
+                    if format_type == "json_schema":
+                        json_schema = extra_response_format.get("json_schema")
+                        if isinstance(json_schema, dict):
+                            schema = json_schema.get("schema")
+                            if schema is not None:
+                                kwargs["guided_json"] = schema
+                    elif format_type == "json_object":
+                        kwargs["guided_json_object"] = True
             if raw_extra_body.get("guided_json"):
                 kwargs["guided_json"] = raw_extra_body.get("guided_json")
             if raw_extra_body.get("guided_regex") is not None:
@@ -2603,6 +2927,96 @@ class RESTfulAPI(CancelMixin):
         return kwargs
+    def _convert_openai_to_anthropic(self, openai_response: dict, model: str) -> dict:
+        """
+        Convert OpenAI response format to Anthropic response format.
+        Args:
+            openai_response: OpenAI format response
+            model: Model name
+        Returns:
+            Anthropic format response
+        """
+        # Extract content and tool calls from OpenAI response
+        content_blocks = []
+        stop_reason = "stop"
+        if "choices" in openai_response and len(openai_response["choices"]) > 0:
+            choice = openai_response["choices"][0]
+            message = choice.get("message", {})
+            # Handle content text
+            content = message.get("content", "")
+            if content:
+                if isinstance(content, str):
+                    # If content is a string, use it directly
+                    content_blocks.append({"type": "text", "text": content})
+                elif isinstance(content, list):
+                    # If content is a list, extract text from each content block
+                    for content_block in content:
+                        if isinstance(content_block, dict):
+                            if content_block.get("type") == "text":
+                                text = content_block.get("text", "")
+                                if text:
+                                    content_blocks.append(
+                                        {"type": "text", "text": text}
+                                    )
+                            elif "text" in content_block:
+                                # Handle different content block format
+                                text = content_block.get("text", "")
+                                if text:
+                                    content_blocks.append(
+                                        {"type": "text", "text": text}
+                                    )
+            # Handle tool calls
+            tool_calls = message.get("tool_calls", [])
+            for tool_call in tool_calls:
+                function = tool_call.get("function", {})
+                arguments = function.get("arguments", "{}")
+                try:
+                    input_data = json.loads(arguments)
+                except json.JSONDecodeError:
+                    input_data = {}
+                tool_use_block = {
+                    "type": "tool_use",
+                    "cache_control": {"type": "ephemeral"},
+                    "id": tool_call.get("id", str(uuid.uuid4())),
+                    "name": function.get("name", ""),
+                    "input": input_data,
+                }
+                content_blocks.append(tool_use_block)
+            # Set stop reason based on finish reason
+            finish_reason = choice.get("finish_reason", "stop")
+            if finish_reason == "tool_calls":
+                stop_reason = "tool_use"
+        # Build Anthropic response
+        anthropic_response = {
+            "id": str(uuid.uuid4()),
+            "type": "message",
+            "role": "assistant",
+            "content": content_blocks,
+            "model": model,
+            "stop_reason": stop_reason,
+            "stop_sequence": None,
+            "usage": {
+                "input_tokens": openai_response.get("usage", {}).get(
+                    "prompt_tokens", 0
+                ),
+                "output_tokens": openai_response.get("usage", {}).get(
+                    "completion_tokens", 0
+                ),
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+            },
+        }
+        return anthropic_response
 def run(
     supervisor_address: str,

xinference/constants.py CHANGED Viewed

@@ -33,6 +33,7 @@ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
 XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED = "XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED"
 XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
 XINFERENCE_ENV_MAX_TOKENS = "XINFERENCE_MAX_TOKENS"
+XINFERENCE_ENV_ALLOWED_IPS = "XINFERENCE_ALLOWED_IPS"
 def get_xinference_home() -> str:
@@ -110,3 +111,4 @@ XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED = (
     if os.getenv(XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED)
     else None
 )
+XINFERENCE_ALLOWED_IPS = os.getenv(XINFERENCE_ENV_ALLOWED_IPS)

xinference/core/supervisor.py CHANGED Viewed

@@ -886,6 +886,10 @@ class SupervisorActor(xo.StatelessActor):
                 await self._cache_tracker_ref.record_model_version(
                     generate_fn(model_spec), self.address
                 )
+                await self._sync_register_model(
+                    model_type, model, persist, model_spec.model_name
+                )
             except ValueError as e:
                 raise e
             except Exception as e:
@@ -894,6 +898,30 @@ class SupervisorActor(xo.StatelessActor):
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
+    async def _sync_register_model(
+        self, model_type: str, model: str, persist: bool, model_name: str
+    ):
+        logger.info(f"begin sync model:{model_name} to worker")
+        try:
+            # Sync model to all workers.
+            for name, worker in self._worker_address_to_worker.items():
+                logger.info(f"sync model:{model_name} to {name}")
+                if name == self.address:
+                    # Ignore: when worker and supervisor at the same node.
+                    logger.info(
+                        f"ignore sync model:{model_name} to {name} for same node"
+                    )
+                else:
+                    await worker.register_model(model_type, model, persist)
+                    logger.info(f"success sync model:{model_name} to {name}")
+        except Exception as e:
+            # If sync fails, unregister the model in all workers.
+            for name, worker in self._worker_address_to_worker.items():
+                logger.warning(f"ready to unregister model for {name}")
+                await worker.unregister_model(model_type, model_name)
+                logger.warning(f"finish unregister model:{model} for {name}")
+            raise e
     @log_async(logger=logger)
     async def unregister_model(self, model_type: str, model_name: str):
         if model_type in self._custom_register_type_to_cls:
@@ -1014,7 +1042,7 @@ class SupervisorActor(xo.StatelessActor):
             )
         # search in worker first
-        if not self.is_local_deployment():
+        if not self.is_local_deployment() and worker_ip is None:
             workers = list(self._worker_address_to_worker.values())
             for worker in workers:
                 res = await worker.get_model_registration(model_type, model_name)

xinference/model/audio/core.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
 from .kokoro import KokoroModel
 from .kokoro_mlx import KokoroMLXModel
+from .kokoro_zh import KokoroZHModel
 from .megatts import MegaTTSModel
 from .melotts import MeloTTSModel
 from .whisper import WhisperModel
@@ -140,6 +141,7 @@ def create_audio_model_instance(
     MeloTTSModel,
     KokoroModel,
     KokoroMLXModel,
+    KokoroZHModel,
     MegaTTSModel,
 ]:
     from ..cache_manager import CacheManager
@@ -160,6 +162,7 @@ def create_audio_model_instance(
         MeloTTSModel,
         KokoroModel,
         KokoroMLXModel,
+        KokoroZHModel,
         MegaTTSModel,
     ]
     if model_spec.model_family == "whisper":
@@ -183,6 +186,8 @@ def create_audio_model_instance(
         model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "Kokoro":
         model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "Kokoro-zh":
+        model = KokoroZHModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "Kokoro-MLX":
         model = KokoroMLXModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "MegaTTS":

xinference/model/audio/kokoro.py CHANGED Viewed

@@ -81,7 +81,7 @@ class KokoroModel:
         logger.info("Launching Kokoro model with language code: %s", lang_code)
         self._model = KPipeline(
             lang_code=lang_code,
-            model=KModel(config=config_path, model=model_path),
+            model=KModel(config=config_path, model=model_path).to(self._device),
             device=self._device,
         )

xinference 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

Potentially problematic release.

xinference 1.9.1py3-none-any.whl → 1.10.0py3-none-any.whl