PyPI - xinference - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend - Supply Chain Defender

xinference 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (95) hide show

xinference/api/restful_api.py CHANGED Viewed

@@ -20,12 +20,11 @@ import multiprocessing
 import os
 import pprint
 import sys
+import time
 import warnings
-from datetime import timedelta
 from typing import Any, List, Optional, Union
 import gradio as gr
-import pydantic
 import xoscar as xo
 from aioprometheus import REGISTRY, MetricsMiddleware
 from aioprometheus.asgi.starlette import metrics
@@ -40,7 +39,6 @@ from fastapi import (
     Response,
     Security,
     UploadFile,
-    status,
 )
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
@@ -54,6 +52,7 @@ from uvicorn import Config, Server
 from xoscar.utils import get_next_port
 from ..constants import XINFERENCE_DEFAULT_ENDPOINT_PORT
+from ..core.event import Event, EventCollectorActor, EventType
 from ..core.supervisor import SupervisorActor
 from ..core.utils import json_dumps
 from ..types import (
@@ -63,10 +62,10 @@ from ..types import (
     CreateChatCompletion,
     CreateCompletion,
     ImageList,
+    max_tokens_field,
 )
-from .oauth2.core import get_user, verify_token
-from .oauth2.types import AuthStartupConfig, LoginUserForm, User
-from .oauth2.utils import create_access_token, get_password_hash, verify_password
+from .oauth2.auth_service import AuthService
+from .oauth2.types import LoginUserForm
 logger = logging.getLogger(__name__)
@@ -135,15 +134,6 @@ class BuildGradioInterfaceRequest(BaseModel):
     model_lang: List[str]
-def authenticate_user(db_users: List[User], username: str, password: str):
-    user = get_user(db_users, username)
-    if not user:
-        return False
-    if not verify_password(password, user.password):
-        return False
-    return user
 class RESTfulAPI:
     def __init__(
         self,
@@ -157,25 +147,13 @@ class RESTfulAPI:
         self._host = host
         self._port = port
         self._supervisor_ref = None
-        self._auth_config: AuthStartupConfig = self.init_auth_config(auth_config_file)
+        self._event_collector_ref = None
+        self._auth_service = AuthService(auth_config_file)
         self._router = APIRouter()
         self._app = FastAPI()
-    @staticmethod
-    def init_auth_config(auth_config_file: Optional[str]):
-        from .oauth2 import common
-        if auth_config_file:
-            config: AuthStartupConfig = pydantic.parse_file_as(
-                path=auth_config_file, type_=AuthStartupConfig
-            )
-            for user in config.user_config:
-                user.password = get_password_hash(user.password)
-            common.XINFERENCE_OAUTH2_CONFIG = config  # type: ignore
-            return config
     def is_authenticated(self):
-        return False if self._auth_config is None else True
+        return False if self._auth_service.config is None else True
     @staticmethod
     def handle_request_limit_error(e: Exception):
@@ -189,29 +167,34 @@ class RESTfulAPI:
             )
         return self._supervisor_ref
-    async def login_for_access_token(self, form_data: LoginUserForm) -> JSONResponse:
-        user = authenticate_user(
-            self._auth_config.user_config, form_data.username, form_data.password
-        )
-        if not user:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail="Incorrect username or password",
-                headers={"WWW-Authenticate": "Bearer"},
+    async def _get_event_collector_ref(self) -> xo.ActorRefType[EventCollectorActor]:
+        if self._event_collector_ref is None:
+            self._event_collector_ref = await xo.actor_ref(
+                address=self._supervisor_address, uid=EventCollectorActor.uid()
             )
-        assert user is not None and isinstance(user, User)
-        access_token_expires = timedelta(
-            minutes=self._auth_config.auth_config.token_expire_in_minutes
-        )
-        access_token = create_access_token(
-            data={"sub": user.username, "scopes": user.permissions},
-            secret_key=self._auth_config.auth_config.secret_key,
-            algorithm=self._auth_config.auth_config.algorithm,
-            expires_delta=access_token_expires,
-        )
-        return JSONResponse(
-            content={"access_token": access_token, "token_type": "bearer"}
+        return self._event_collector_ref
+    async def _report_error_event(self, model_uid: str, content: str):
+        try:
+            event_collector_ref = await self._get_event_collector_ref()
+            await event_collector_ref.report_event(
+                model_uid,
+                Event(
+                    event_type=EventType.ERROR,
+                    event_ts=int(time.time()),
+                    event_content=content,
+                ),
+            )
+        except Exception:
+            logger.exception(
+                "Report error event failed, model: %s, content: %s", model_uid, content
+            )
+    async def login_for_access_token(self, form_data: LoginUserForm) -> JSONResponse:
+        result = self._auth_service.generate_token_for_user(
+            form_data.username, form_data.password
         )
+        return JSONResponse(content=result)
     async def is_cluster_authenticated(self) -> JSONResponse:
         return JSONResponse(content={"auth": self.is_authenticated()})
@@ -234,6 +217,9 @@ class RESTfulAPI:
         self._router.add_api_route(
             "/v1/models/families", self._get_builtin_families, methods=["GET"]
         )
+        self._router.add_api_route(
+            "/v1/cluster/info", self.get_cluster_device_info, methods=["GET"]
+        )
         self._router.add_api_route(
             "/v1/cluster/devices", self._get_devices_count, methods=["GET"]
         )
@@ -244,7 +230,7 @@ class RESTfulAPI:
             "/v1/ui/{model_uid}",
             self.build_gradio_interface,
             methods=["POST"],
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -259,7 +245,15 @@ class RESTfulAPI:
             "/v1/models/instances",
             self.get_instance_info,
             methods=["GET"],
-            dependencies=[Security(verify_token, scopes=["models:list"])]
+            dependencies=[Security(self._auth_service, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/models/{model_type}/{model_name}/versions",
+            self.get_model_versions,
+            methods=["GET"],
+            dependencies=[Security(self._auth_service, scopes=["models:list"])]
             if self.is_authenticated()
             else None,
         )
@@ -267,7 +261,7 @@ class RESTfulAPI:
             "/v1/models",
             self.list_models,
             methods=["GET"],
-            dependencies=[Security(verify_token, scopes=["models:list"])]
+            dependencies=[Security(self._auth_service, scopes=["models:list"])]
             if self.is_authenticated()
             else None,
         )
@@ -276,7 +270,23 @@ class RESTfulAPI:
             "/v1/models/{model_uid}",
             self.describe_model,
             methods=["GET"],
-            dependencies=[Security(verify_token, scopes=["models:list"])]
+            dependencies=[Security(self._auth_service, scopes=["models:list"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/models/{model_uid}/events",
+            self.get_model_events,
+            methods=["GET"],
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/models/instance",
+            self.launch_model_by_version,
+            methods=["POST"],
+            dependencies=[Security(self._auth_service, scopes=["models:start"])]
             if self.is_authenticated()
             else None,
         )
@@ -284,7 +294,7 @@ class RESTfulAPI:
             "/v1/models",
             self.launch_model,
             methods=["POST"],
-            dependencies=[Security(verify_token, scopes=["models:start"])]
+            dependencies=[Security(self._auth_service, scopes=["models:start"])]
             if self.is_authenticated()
             else None,
         )
@@ -292,7 +302,7 @@ class RESTfulAPI:
             "/experimental/speculative_llms",
             self.launch_speculative_llm,
             methods=["POST"],
-            dependencies=[Security(verify_token, scopes=["models:start"])]
+            dependencies=[Security(self._auth_service, scopes=["models:start"])]
             if self.is_authenticated()
             else None,
         )
@@ -300,7 +310,7 @@ class RESTfulAPI:
             "/v1/models/{model_uid}",
             self.terminate_model,
             methods=["DELETE"],
-            dependencies=[Security(verify_token, scopes=["models:stop"])]
+            dependencies=[Security(self._auth_service, scopes=["models:stop"])]
             if self.is_authenticated()
             else None,
         )
@@ -309,7 +319,7 @@ class RESTfulAPI:
             self.create_completion,
             methods=["POST"],
             response_model=Completion,
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -317,7 +327,7 @@ class RESTfulAPI:
             "/v1/embeddings",
             self.create_embedding,
             methods=["POST"],
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -325,7 +335,23 @@ class RESTfulAPI:
             "/v1/rerank",
             self.rerank,
             methods=["POST"],
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/audio/transcriptions",
+            self.create_transcriptions,
+            methods=["POST"],
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
+            if self.is_authenticated()
+            else None,
+        )
+        self._router.add_api_route(
+            "/v1/audio/translations",
+            self.create_translations,
+            methods=["POST"],
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -334,7 +360,7 @@ class RESTfulAPI:
             self.create_images,
             methods=["POST"],
             response_model=ImageList,
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -343,7 +369,7 @@ class RESTfulAPI:
             self.create_variations,
             methods=["POST"],
             response_model=ImageList,
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -352,7 +378,7 @@ class RESTfulAPI:
             self.create_chat_completion,
             methods=["POST"],
             response_model=ChatCompletion,
-            dependencies=[Security(verify_token, scopes=["models:read"])]
+            dependencies=[Security(self._auth_service, scopes=["models:read"])]
             if self.is_authenticated()
             else None,
         )
@@ -362,7 +388,7 @@ class RESTfulAPI:
             "/v1/model_registrations/{model_type}",
             self.register_model,
             methods=["POST"],
-            dependencies=[Security(verify_token, scopes=["models:register"])]
+            dependencies=[Security(self._auth_service, scopes=["models:register"])]
             if self.is_authenticated()
             else None,
         )
@@ -370,7 +396,7 @@ class RESTfulAPI:
             "/v1/model_registrations/{model_type}/{model_name}",
             self.unregister_model,
             methods=["DELETE"],
-            dependencies=[Security(verify_token, scopes=["models:unregister"])]
+            dependencies=[Security(self._auth_service, scopes=["models:unregister"])]
             if self.is_authenticated()
             else None,
         )
@@ -378,7 +404,7 @@ class RESTfulAPI:
             "/v1/model_registrations/{model_type}",
             self.list_model_registrations,
             methods=["GET"],
-            dependencies=[Security(verify_token, scopes=["models:list"])]
+            dependencies=[Security(self._auth_service, scopes=["models:list"])]
             if self.is_authenticated()
             else None,
         )
@@ -386,7 +412,7 @@ class RESTfulAPI:
             "/v1/model_registrations/{model_type}/{model_name}",
             self.get_model_registrations,
             methods=["GET"],
-            dependencies=[Security(verify_token, scopes=["models:list"])]
+            dependencies=[Security(self._auth_service, scopes=["models:list"])]
             if self.is_authenticated()
             else None,
         )
@@ -640,6 +666,44 @@ class RESTfulAPI:
             raise HTTPException(status_code=500, detail=str(e))
         return JSONResponse(content=infos)
+    async def launch_model_by_version(
+        self, request: Request, wait_ready: bool = Query(True)
+    ) -> JSONResponse:
+        payload = await request.json()
+        model_uid = payload.get("model_uid")
+        model_type = payload.get("model_type")
+        model_version = payload.get("model_version")
+        replica = payload.get("replica", 1)
+        n_gpu = payload.get("n_gpu", "auto")
+        try:
+            model_uid = await (
+                await self._get_supervisor_ref()
+            ).launch_model_by_version(
+                model_uid=model_uid,
+                model_type=model_type,
+                model_version=model_version,
+                replica=replica,
+                n_gpu=n_gpu,
+                wait_ready=wait_ready,
+            )
+        except Exception as e:
+            logger.error(str(e), exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+        return JSONResponse(content={"model_uid": model_uid})
+    async def get_model_versions(
+        self, model_type: str, model_name: str
+    ) -> JSONResponse:
+        try:
+            content = await (await self._get_supervisor_ref()).get_model_versions(
+                model_type, model_name
+            )
+            return JSONResponse(content=content)
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
     async def build_gradio_interface(
         self, model_uid: str, body: BuildGradioInterfaceRequest, request: Request
     ) -> JSONResponse:
@@ -649,7 +713,7 @@ class RESTfulAPI:
         but calling API in async function does not return
         """
         assert self._app is not None
-        assert body.model_type in ["LLM", "multimodal"]
+        assert body.model_type == "LLM"
         # asyncio.Lock() behaves differently in 3.9 than 3.10+
         # A event loop is required in 3.9 but not 3.10+
@@ -731,6 +795,9 @@ class RESTfulAPI:
         }
         kwargs = body.dict(exclude_unset=True, exclude=exclude)
+        if body.max_tokens is None:
+            kwargs["max_tokens"] = max_tokens_field.default
         if body.logit_bias is not None:
             raise HTTPException(status_code=501, detail="Not implemented")
@@ -740,10 +807,12 @@ class RESTfulAPI:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         if body.stream:
@@ -759,6 +828,7 @@ class RESTfulAPI:
                         yield item
                 except Exception as ex:
                     logger.exception("Completion stream got an error: %s", ex)
+                    await self._report_error_event(model_uid, str(ex))
                     # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
                     yield dict(data=json.dumps({"error": str(ex)}))
@@ -769,6 +839,7 @@ class RESTfulAPI:
                 return Response(data, media_type="application/json")
             except Exception as e:
                 logger.error(e, exc_info=True)
+                await self._report_error_event(model_uid, str(e))
                 self.handle_request_limit_error(e)
                 raise HTTPException(status_code=500, detail=str(e))
@@ -779,9 +850,11 @@ class RESTfulAPI:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         try:
@@ -789,10 +862,12 @@ class RESTfulAPI:
             return Response(embedding, media_type="application/json")
         except RuntimeError as re:
             logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
             self.handle_request_limit_error(re)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
     async def rerank(self, request: RerankRequest) -> Response:
@@ -801,9 +876,11 @@ class RESTfulAPI:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         try:
@@ -817,10 +894,100 @@ class RESTfulAPI:
             return Response(scores, media_type="application/json")
         except RuntimeError as re:
             logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
             self.handle_request_limit_error(re)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+    async def create_transcriptions(
+        self,
+        model: str = Form(...),
+        file: UploadFile = File(media_type="application/octet-stream"),
+        language: Optional[str] = Form(None),
+        prompt: Optional[str] = Form(None),
+        response_format: Optional[str] = Form("json"),
+        temperature: Optional[float] = Form(0),
+        kwargs: Optional[str] = Form(None),
+    ) -> Response:
+        model_uid = model
+        try:
+            model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        try:
+            if kwargs is not None:
+                parsed_kwargs = json.loads(kwargs)
+            else:
+                parsed_kwargs = {}
+            transcription = await model_ref.transcriptions(
+                audio=await file.read(),
+                language=language,
+                prompt=prompt,
+                response_format=response_format,
+                temperature=temperature,
+                **parsed_kwargs,
+            )
+            return Response(content=transcription, media_type="application/json")
+        except RuntimeError as re:
+            logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+    async def create_translations(
+        self,
+        model: str = Form(...),
+        file: UploadFile = File(media_type="application/octet-stream"),
+        prompt: Optional[str] = Form(None),
+        response_format: Optional[str] = Form("json"),
+        temperature: Optional[float] = Form(0),
+        kwargs: Optional[str] = Form(None),
+    ) -> Response:
+        model_uid = model
+        try:
+            model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        try:
+            if kwargs is not None:
+                parsed_kwargs = json.loads(kwargs)
+            else:
+                parsed_kwargs = {}
+            translation = await model_ref.translations(
+                audio=await file.read(),
+                prompt=prompt,
+                response_format=response_format,
+                temperature=temperature,
+                **parsed_kwargs,
+            )
+            return Response(content=translation, media_type="application/json")
+        except RuntimeError as re:
+            logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
     async def create_images(self, request: TextToImageRequest) -> Response:
@@ -829,9 +996,11 @@ class RESTfulAPI:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         try:
@@ -846,10 +1015,12 @@ class RESTfulAPI:
             return Response(content=image_list, media_type="application/json")
         except RuntimeError as re:
             logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
             self.handle_request_limit_error(re)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
     async def create_variations(
@@ -868,14 +1039,18 @@ class RESTfulAPI:
             model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         try:
             if kwargs is not None:
-                kwargs = json.loads(kwargs)
+                parsed_kwargs = json.loads(kwargs)
+            else:
+                parsed_kwargs = {}
             image_list = await model_ref.image_to_image(
                 image=Image.open(image.file),
                 prompt=prompt,
@@ -883,14 +1058,16 @@ class RESTfulAPI:
                 n=n,
                 size=size,
                 response_format=response_format,
-                **kwargs,
+                **parsed_kwargs,
             )
             return Response(content=image_list, media_type="application/json")
         except RuntimeError as re:
             logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
     async def create_chat_completion(
@@ -909,6 +1086,9 @@ class RESTfulAPI:
         }
         kwargs = body.dict(exclude_unset=True, exclude=exclude)
+        if body.max_tokens is None:
+            kwargs["max_tokens"] = max_tokens_field.default
         if body.logit_bias is not None:
             raise HTTPException(status_code=501, detail="Not implemented")
@@ -958,31 +1138,32 @@ class RESTfulAPI:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         try:
             desc = await (await self._get_supervisor_ref()).describe_model(model_uid)
         except ValueError as ve:
             logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
             raise HTTPException(status_code=400, detail=str(ve))
         except Exception as e:
             logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
         model_name = desc.get("model_name", "")
-        is_chatglm_ggml = (
-            desc.get("model_format") == "ggmlv3" and "chatglm" in model_name
-        )
         function_call_models = ["chatglm3", "gorilla-openfunctions-v1", "qwen-chat"]
         is_qwen = desc.get("model_format") == "ggmlv3" and "qwen" in model_name
-        if (is_chatglm_ggml or is_qwen) and system_prompt is not None:
+        if is_qwen and system_prompt is not None:
             raise HTTPException(
-                status_code=400, detail="ChatGLM ggml does not have system prompt"
+                status_code=400, detail="Qwen ggml does not have system prompt"
             )
         if not any(name in model_name for name in function_call_models):
@@ -1007,31 +1188,34 @@ class RESTfulAPI:
                 iterator = None
                 try:
                     try:
-                        if is_chatglm_ggml or is_qwen:
+                        if is_qwen:
                             iterator = await model.chat(prompt, chat_history, kwargs)
                         else:
                             iterator = await model.chat(
                                 prompt, system_prompt, chat_history, kwargs
                             )
                     except RuntimeError as re:
+                        await self._report_error_event(model_uid, str(re))
                         self.handle_request_limit_error(re)
                     async for item in iterator:
                         yield item
                 except Exception as ex:
                     logger.exception("Chat completion stream got an error: %s", ex)
+                    await self._report_error_event(model_uid, str(ex))
                     # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
                     yield dict(data=json.dumps({"error": str(ex)}))
             return EventSourceResponse(stream_results())
         else:
             try:
-                if is_chatglm_ggml or is_qwen:
+                if is_qwen:
                     data = await model.chat(prompt, chat_history, kwargs)
                 else:
                     data = await model.chat(prompt, system_prompt, chat_history, kwargs)
                 return Response(content=data, media_type="application/json")
             except Exception as e:
                 logger.error(e, exc_info=True)
+                await self._report_error_event(model_uid, str(e))
                 self.handle_request_limit_error(e)
                 raise HTTPException(status_code=500, detail=str(e))
@@ -1096,6 +1280,26 @@ class RESTfulAPI:
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
+    async def get_model_events(self, model_uid: str) -> JSONResponse:
+        try:
+            event_collector_ref = await self._get_event_collector_ref()
+            events = await event_collector_ref.get_model_events(model_uid)
+            return JSONResponse(content=events)
+        except ValueError as re:
+            logger.error(re, exc_info=True)
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+    async def get_cluster_device_info(self) -> JSONResponse:
+        try:
+            data = await (await self._get_supervisor_ref()).get_cluster_device_info()
+            return JSONResponse(content=data)
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
 def run(
     supervisor_address: str,