PyPI - xinference - Versions diffs - 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

xinference 0.7.5py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (120) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-01-05T15:29:43+0800",
+ "date": "2024-01-19T17:14:28+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "56b28b3e4149b0a9ab6f5322401b1c3f1fc95c1a",
- "version": "0.7.5"
+ "full-revisionid": "fb3985e95fbb3e6cb51a321d6d6a9a10661128fe",
+ "version": "0.8.1"
 }
 '''  # END VERSION_JSON

xinference/api/oauth2/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference/api/oauth2/common.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+XINFERENCE_OAUTH2_CONFIG = None

xinference/api/oauth2/core.py ADDED Viewed

@@ -0,0 +1,93 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import List, Optional, Union
+from fastapi import Depends, HTTPException, status
+from fastapi.security import OAuth2PasswordBearer, SecurityScopes
+from jose import JWTError, jwt
+from pydantic import BaseModel, ValidationError
+from typing_extensions import Annotated
+from .types import AuthStartupConfig, User
+logger = logging.getLogger(__name__)
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+def get_db():
+    from .common import XINFERENCE_OAUTH2_CONFIG
+    # In a real enterprise-level environment, this should be the database
+    yield XINFERENCE_OAUTH2_CONFIG
+def get_user(db_users: List[User], username: str) -> Optional[User]:
+    for user in db_users:
+        if user.username == username:
+            return user
+    return None
+class TokenData(BaseModel):
+    username: Union[str, None] = None
+    scopes: List[str] = []
+def verify_token(
+    security_scopes: SecurityScopes,
+    token: Annotated[str, Depends(oauth2_scheme)],
+    config: Optional[AuthStartupConfig] = Depends(get_db),
+):
+    if security_scopes.scopes:
+        authenticate_value = f'Bearer scope="{security_scopes.scope_str}"'
+    else:
+        authenticate_value = "Bearer"
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": authenticate_value},
+    )
+    try:
+        assert config is not None
+        payload = jwt.decode(
+            token,
+            config.auth_config.secret_key,
+            algorithms=[config.auth_config.algorithm],
+            options={"verify_exp": False},  # TODO: supports token expiration
+        )
+        username: str = payload.get("sub")
+        if username is None:
+            raise credentials_exception
+        token_scopes = payload.get("scopes", [])
+        # TODO: check expire
+        token_data = TokenData(scopes=token_scopes, username=username)
+    except (JWTError, ValidationError):
+        raise credentials_exception
+    user = get_user(config.user_config, username=token_data.username)  # type: ignore
+    if user is None:
+        raise credentials_exception
+    if "admin" in token_data.scopes:
+        return user
+    for scope in security_scopes.scopes:
+        if scope not in token_data.scopes:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Not enough permissions",
+                headers={"WWW-Authenticate": authenticate_value},
+            )
+    return user

xinference/api/oauth2/types.py ADDED Viewed

@@ -0,0 +1,36 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+from pydantic import BaseModel
+class LoginUserForm(BaseModel):
+    username: str
+    password: str
+class User(LoginUserForm):
+    permissions: List[str]
+class AuthConfig(BaseModel):
+    algorithm: str = "HS256"
+    secret_key: str
+    token_expire_in_minutes: int
+class AuthStartupConfig(BaseModel):
+    auth_config: AuthConfig
+    user_config: List[User]

xinference/api/oauth2/utils.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datetime import datetime, timedelta
+from typing import Union
+from jose import jwt
+from passlib.context import CryptContext
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+def create_access_token(
+    data: dict,
+    secret_key: str,
+    algorithm: str,
+    expires_delta: Union[timedelta, None] = None,
+):
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=15)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, secret_key, algorithm=algorithm)
+    return encoded_jwt
+def verify_password(plain_password, hashed_password):
+    return pwd_context.verify(plain_password, hashed_password)
+def get_password_hash(password):
+    return pwd_context.hash(password)

xinference/api/restful_api.py CHANGED Viewed

@@ -21,10 +21,14 @@ import os
 import pprint
 import sys
 import warnings
+from datetime import timedelta
 from typing import Any, List, Optional, Union
 import gradio as gr
+import pydantic
 import xoscar as xo
+from aioprometheus import REGISTRY, MetricsMiddleware
+from aioprometheus.asgi.starlette import metrics
 from fastapi import (
     APIRouter,
     FastAPI,
@@ -34,9 +38,12 @@ from fastapi import (
     Query,
     Request,
     Response,
+    Security,
     UploadFile,
+    status,
 )
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 from PIL import Image
 from pydantic import BaseModel, Field
@@ -57,11 +64,14 @@ from ..types import (
     CreateCompletion,
     ImageList,
 )
+from .oauth2.core import get_user, verify_token
+from .oauth2.types import AuthStartupConfig, LoginUserForm, User
+from .oauth2.utils import create_access_token, get_password_hash, verify_password
 logger = logging.getLogger(__name__)
-class JSONResponse(StarletteJSONResponse):
+class JSONResponse(StarletteJSONResponse):  # type: ignore # noqa: F811
     def render(self, content: Any) -> bytes:
         return json_dumps(content)
@@ -125,16 +135,48 @@ class BuildGradioInterfaceRequest(BaseModel):
     model_lang: List[str]
+def authenticate_user(db_users: List[User], username: str, password: str):
+    user = get_user(db_users, username)
+    if not user:
+        return False
+    if not verify_password(password, user.password):
+        return False
+    return user
 class RESTfulAPI:
-    def __init__(self, supervisor_address: str, host: str, port: int):
+    def __init__(
+        self,
+        supervisor_address: str,
+        host: str,
+        port: int,
+        auth_config_file: Optional[str] = None,
+    ):
         super().__init__()
         self._supervisor_address = supervisor_address
         self._host = host
         self._port = port
         self._supervisor_ref = None
+        self._auth_config: AuthStartupConfig = self.init_auth_config(auth_config_file)
         self._router = APIRouter()
         self._app = FastAPI()
+    @staticmethod
+    def init_auth_config(auth_config_file: Optional[str]):
+        from .oauth2 import common
+        if auth_config_file:
+            config: AuthStartupConfig = pydantic.parse_file_as(
+                path=auth_config_file, type_=AuthStartupConfig
+            )
+            for user in config.user_config:
+                user.password = get_password_hash(user.password)
+            common.XINFERENCE_OAUTH2_CONFIG = config  # type: ignore
+            return config
+    def is_authenticated(self):
+        return False if self._auth_config is None else True
     @staticmethod
     def handle_request_limit_error(e: Exception):
         if "Rate limit reached" in str(e):
@@ -147,6 +189,33 @@ class RESTfulAPI:
             )
         return self._supervisor_ref
+    async def login_for_access_token(self, form_data: LoginUserForm) -> JSONResponse:
+        user = authenticate_user(
+            self._auth_config.user_config, form_data.username, form_data.password
+        )
+        if not user:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Incorrect username or password",
+                headers={"WWW-Authenticate": "Bearer"},
+            )
+        assert user is not None and isinstance(user, User)
+        access_token_expires = timedelta(
+            minutes=self._auth_config.auth_config.token_expire_in_minutes
+        )
+        access_token = create_access_token(
+            data={"sub": user.username, "scopes": user.permissions},
+            secret_key=self._auth_config.auth_config.secret_key,
+            algorithm=self._auth_config.auth_config.algorithm,
+            expires_delta=access_token_expires,
+        )
+        return JSONResponse(
+            content={"access_token": access_token, "token_type": "bearer"}
+        )
+    async def is_cluster_authenticated(self) -> JSONResponse:
+        return JSONResponse(content={"auth": self.is_authenticated()})
     def serve(self, logging_conf: Optional[dict] = None):
         self._app.add_middleware(
             CORSMiddleware,
@@ -155,8 +224,10 @@ class RESTfulAPI:
             allow_methods=["*"],
             allow_headers=["*"],
         )
+        # internal interface
         self._router.add_api_route("/status", self.get_status, methods=["GET"])
-        self._router.add_api_route("/v1/models", self.list_models, methods=["GET"])
+        # conflict with /v1/models/{model_uid} below, so register this first
         self._router.add_api_route(
             "/v1/models/prompts", self._get_builtin_prompts, methods=["GET"]
         )
@@ -166,52 +237,124 @@ class RESTfulAPI:
         self._router.add_api_route(
             "/v1/cluster/devices", self._get_devices_count, methods=["GET"]
         )
+        self._router.add_api_route("/v1/address", self.get_address, methods=["GET"])
+        # user interface
+        self._router.add_api_route(
+            "/v1/ui/{model_uid}",
+            self.build_gradio_interface,
+            methods=["POST"],
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/token", self.login_for_access_token, methods=["POST"]
+        )
+        self._router.add_api_route(
+            "/v1/cluster/auth", self.is_cluster_authenticated, methods=["GET"]
+        )
+        # running instances
         self._router.add_api_route(
-            "/v1/models/{model_uid}", self.describe_model, methods=["GET"]
+            "/v1/models/instances",
+            self.get_instance_info,
+            methods=["GET"],
+            dependencies=[Security(verify_token, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/models",
+            self.list_models,
+            methods=["GET"],
+            dependencies=[Security(verify_token, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/models/{model_uid}",
+            self.describe_model,
+            methods=["GET"],
+            dependencies=[Security(verify_token, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/models",
+            self.launch_model,
+            methods=["POST"],
+            dependencies=[Security(verify_token, scopes=["models:start"])]
+            if self.is_authenticated()
+            else None,
         )
-        self._router.add_api_route("/v1/models", self.launch_model, methods=["POST"])
         self._router.add_api_route(
             "/experimental/speculative_llms",
             self.launch_speculative_llm,
             methods=["POST"],
+            dependencies=[Security(verify_token, scopes=["models:start"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
-            "/v1/models/{model_uid}", self.terminate_model, methods=["DELETE"]
+            "/v1/models/{model_uid}",
+            self.terminate_model,
+            methods=["DELETE"],
+            dependencies=[Security(verify_token, scopes=["models:stop"])]
+            if self.is_authenticated()
+            else None,
         )
-        self._router.add_api_route("/v1/address", self.get_address, methods=["GET"])
         self._router.add_api_route(
             "/v1/completions",
             self.create_completion,
             methods=["POST"],
             response_model=Completion,
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/embeddings",
             self.create_embedding,
             methods=["POST"],
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/rerank",
             self.rerank,
             methods=["POST"],
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/images/generations",
             self.create_images,
             methods=["POST"],
             response_model=ImageList,
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/images/variations",
             self.create_variations,
             methods=["POST"],
             response_model=ImageList,
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/chat/completions",
             self.create_chat_completion,
             methods=["POST"],
             response_model=ChatCompletion,
+            dependencies=[Security(verify_token, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
         )
         # for custom models
@@ -219,28 +362,42 @@ class RESTfulAPI:
             "/v1/model_registrations/{model_type}",
             self.register_model,
             methods=["POST"],
+            dependencies=[Security(verify_token, scopes=["models:register"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/model_registrations/{model_type}/{model_name}",
             self.unregister_model,
             methods=["DELETE"],
+            dependencies=[Security(verify_token, scopes=["models:unregister"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/model_registrations/{model_type}",
             self.list_model_registrations,
             methods=["GET"],
+            dependencies=[Security(verify_token, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
         )
         self._router.add_api_route(
             "/v1/model_registrations/{model_type}/{model_name}",
             self.get_model_registrations,
             methods=["GET"],
+            dependencies=[Security(verify_token, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
         )
-        self._router.add_api_route(
-            "/v1/ui/{model_uid}", self.build_gradio_interface, methods=["POST"]
-        )
+        # Clear the global Registry for the MetricsMiddleware, or
+        # the MetricsMiddleware will register duplicated metrics if the port
+        # conflict (This serve method run more than once).
+        REGISTRY.clear()
+        self._app.add_middleware(MetricsMiddleware)
         self._app.include_router(self._router)
+        self._app.add_route("/metrics", metrics)
         # Check all the routes returns Response.
         # This is to avoid `jsonable_encoder` performance issue:
@@ -406,7 +563,9 @@ class RESTfulAPI:
         return JSONResponse(content={"model_uid": model_uid})
-    async def launch_model(self, request: Request) -> JSONResponse:
+    async def launch_model(
+        self, request: Request, wait_ready: bool = Query(True)
+    ) -> JSONResponse:
         payload = await request.json()
         model_uid = payload.get("model_uid")
         model_name = payload.get("model_name")
@@ -451,6 +610,7 @@ class RESTfulAPI:
                 replica=replica,
                 n_gpu=n_gpu,
                 request_limits=request_limits,
+                wait_ready=wait_ready,
                 **kwargs,
             )
@@ -466,8 +626,22 @@ class RESTfulAPI:
         return JSONResponse(content={"model_uid": model_uid})
+    async def get_instance_info(
+        self,
+        model_name: Optional[str] = Query(None),
+        model_uid: Optional[str] = Query(None),
+    ) -> JSONResponse:
+        try:
+            infos = await (await self._get_supervisor_ref()).get_instance_info(
+                model_name, model_uid
+            )
+        except Exception as e:
+            logger.error(str(e), exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+        return JSONResponse(content=infos)
     async def build_gradio_interface(
-        self, model_uid: str, body: BuildGradioInterfaceRequest
+        self, model_uid: str, body: BuildGradioInterfaceRequest, request: Request
     ) -> JSONResponse:
         """
         Separate build_interface with launch_model
@@ -475,7 +649,7 @@ class RESTfulAPI:
         but calling API in async function does not return
         """
         assert self._app is not None
-        assert body.model_type == "LLM"
+        assert body.model_type in ["LLM", "multimodal"]
         # asyncio.Lock() behaves differently in 3.9 than 3.10+
         # A event loop is required in 3.9 but not 3.10+
@@ -489,21 +663,24 @@ class RESTfulAPI:
                 )
                 asyncio.set_event_loop(asyncio.new_event_loop())
-        from ..core.chat_interface import LLMInterface
+        from ..core.chat_interface import GradioInterface
         try:
+            access_token = request.headers.get("Authorization")
             internal_host = "localhost" if self._host == "0.0.0.0" else self._host
-            interface = LLMInterface(
+            interface = GradioInterface(
                 endpoint=f"http://{internal_host}:{self._port}",
                 model_uid=model_uid,
                 model_name=body.model_name,
                 model_size_in_billions=body.model_size_in_billions,
+                model_type=body.model_type,
                 model_format=body.model_format,
                 quantization=body.quantization,
                 context_length=body.context_length,
                 model_ability=body.model_ability,
                 model_description=body.model_description,
                 model_lang=body.model_lang,
+                access_token=access_token,
             ).build()
             gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
         except ValueError as ve:
@@ -581,8 +758,6 @@ class RESTfulAPI:
                     async for item in iterator:
                         yield item
                 except Exception as ex:
-                    if iterator is not None:
-                        await iterator.destroy()
                     logger.exception("Completion stream got an error: %s", ex)
                     # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
                     yield dict(data=json.dumps({"error": str(ex)}))
@@ -660,8 +835,7 @@ class RESTfulAPI:
             raise HTTPException(status_code=500, detail=str(e))
         try:
-            if request.kwargs:
-                kwargs = json.loads(request.kwargs)
+            kwargs = json.loads(request.kwargs) if request.kwargs else {}
             image_list = await model.text_to_image(
                 prompt=request.prompt,
                 n=request.n,
@@ -844,8 +1018,6 @@ class RESTfulAPI:
                     async for item in iterator:
                         yield item
                 except Exception as ex:
-                    if iterator is not None:
-                        await iterator.destroy()
                     logger.exception("Chat completion stream got an error: %s", ex)
                     # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
                     yield dict(data=json.dumps({"error": str(ex)}))
@@ -926,11 +1098,20 @@ class RESTfulAPI:
 def run(
-    supervisor_address: str, host: str, port: int, logging_conf: Optional[dict] = None
+    supervisor_address: str,
+    host: str,
+    port: int,
+    logging_conf: Optional[dict] = None,
+    auth_config_file: Optional[str] = None,
 ):
     logger.info(f"Starting Xinference at endpoint: http://{host}:{port}")
     try:
-        api = RESTfulAPI(supervisor_address=supervisor_address, host=host, port=port)
+        api = RESTfulAPI(
+            supervisor_address=supervisor_address,
+            host=host,
+            port=port,
+            auth_config_file=auth_config_file,
+        )
         api.serve(logging_conf=logging_conf)
     except SystemExit:
         logger.warning("Failed to create socket with port %d", port)
@@ -941,7 +1122,10 @@ def run(
             logger.info(f"Found available port: {port}")
             logger.info(f"Starting Xinference at endpoint: http://{host}:{port}")
             api = RESTfulAPI(
-                supervisor_address=supervisor_address, host=host, port=port
+                supervisor_address=supervisor_address,
+                host=host,
+                port=port,
+                auth_config_file=auth_config_file,
             )
             api.serve(logging_conf=logging_conf)
         else:
@@ -949,10 +1133,15 @@ def run(
 def run_in_subprocess(
-    supervisor_address: str, host: str, port: int, logging_conf: Optional[dict] = None
+    supervisor_address: str,
+    host: str,
+    port: int,
+    logging_conf: Optional[dict] = None,
+    auth_config_file: Optional[str] = None,
 ) -> multiprocessing.Process:
     p = multiprocessing.Process(
-        target=run, args=(supervisor_address, host, port, logging_conf)
+        target=run,
+        args=(supervisor_address, host, port, logging_conf, auth_config_file),
     )
     p.daemon = True
     p.start()

xinference 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl

Potentially problematic release.

xinference 0.7.5py3-none-any.whl → 0.8.1py3-none-any.whl