xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +107 -11
- xinference/client/restful/restful_client.py +51 -11
- xinference/constants.py +5 -1
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +33 -39
- xinference/deploy/cmdline.py +17 -0
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +2 -1
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +63 -46
- xinference/model/audio/model_spec_modelscope.json +31 -14
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +40 -115
- xinference/model/llm/core.py +29 -6
- xinference/model/llm/llama_cpp/core.py +30 -347
- xinference/model/llm/llm_family.json +1674 -2203
- xinference/model/llm/llm_family.py +71 -7
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1838 -2016
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +23 -7
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +39 -11
- xinference/model/llm/transformers/chatglm.py +9 -2
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +58 -60
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +3 -21
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +8 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +96 -45
- xinference/model/llm/vllm/core.py +108 -24
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +157 -13
- xinference/model/video/model_spec.json +100 -0
- xinference/model/video/model_spec_modelscope.json +104 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +2 -71
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +7 -4
- xinference/web/ui/src/locales/zh.json +7 -4
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-05-16T20:05:54+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "81a24f4646ace8f41c85a810237491d9c0ad5282",
|
|
15
|
+
"version": "1.6.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -56,6 +56,7 @@ from ..constants import (
|
|
|
56
56
|
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
57
57
|
XINFERENCE_DEFAULT_ENDPOINT_PORT,
|
|
58
58
|
XINFERENCE_DISABLE_METRICS,
|
|
59
|
+
XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
|
|
59
60
|
)
|
|
60
61
|
from ..core.event import Event, EventCollectorActor, EventType
|
|
61
62
|
from ..core.supervisor import SupervisorActor
|
|
@@ -201,13 +202,13 @@ class BuildGradioInterfaceRequest(BaseModel):
|
|
|
201
202
|
model_lang: List[str]
|
|
202
203
|
|
|
203
204
|
|
|
204
|
-
class
|
|
205
|
+
class BuildGradioMediaInterfaceRequest(BaseModel):
|
|
205
206
|
model_type: str
|
|
206
207
|
model_name: str
|
|
207
208
|
model_family: str
|
|
208
209
|
model_id: str
|
|
209
210
|
controlnet: Union[None, List[Dict[str, Union[str, dict, None]]]]
|
|
210
|
-
model_revision: str
|
|
211
|
+
model_revision: Optional[str]
|
|
211
212
|
model_ability: List[str]
|
|
212
213
|
|
|
213
214
|
|
|
@@ -352,7 +353,27 @@ class RESTfulAPI(CancelMixin):
|
|
|
352
353
|
)
|
|
353
354
|
self._router.add_api_route(
|
|
354
355
|
"/v1/ui/images/{model_uid}",
|
|
355
|
-
self.
|
|
356
|
+
self.build_gradio_media_interface,
|
|
357
|
+
methods=["POST"],
|
|
358
|
+
dependencies=(
|
|
359
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
360
|
+
if self.is_authenticated()
|
|
361
|
+
else None
|
|
362
|
+
),
|
|
363
|
+
)
|
|
364
|
+
self._router.add_api_route(
|
|
365
|
+
"/v1/ui/audios/{model_uid}",
|
|
366
|
+
self.build_gradio_media_interface,
|
|
367
|
+
methods=["POST"],
|
|
368
|
+
dependencies=(
|
|
369
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
370
|
+
if self.is_authenticated()
|
|
371
|
+
else None
|
|
372
|
+
),
|
|
373
|
+
)
|
|
374
|
+
self._router.add_api_route(
|
|
375
|
+
"/v1/ui/videos/{model_uid}",
|
|
376
|
+
self.build_gradio_media_interface,
|
|
356
377
|
methods=["POST"],
|
|
357
378
|
dependencies=(
|
|
358
379
|
[Security(self._auth_service, scopes=["models:read"])]
|
|
@@ -676,6 +697,17 @@ class RESTfulAPI(CancelMixin):
|
|
|
676
697
|
else None
|
|
677
698
|
),
|
|
678
699
|
)
|
|
700
|
+
self._router.add_api_route(
|
|
701
|
+
"/v1/video/generations/image",
|
|
702
|
+
self.create_videos_from_images,
|
|
703
|
+
methods=["POST"],
|
|
704
|
+
response_model=VideoList,
|
|
705
|
+
dependencies=(
|
|
706
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
707
|
+
if self.is_authenticated()
|
|
708
|
+
else None
|
|
709
|
+
),
|
|
710
|
+
)
|
|
679
711
|
self._router.add_api_route(
|
|
680
712
|
"/v1/chat/completions",
|
|
681
713
|
self.create_chat_completion,
|
|
@@ -1183,16 +1215,16 @@ class RESTfulAPI(CancelMixin):
|
|
|
1183
1215
|
|
|
1184
1216
|
return JSONResponse(content={"model_uid": model_uid})
|
|
1185
1217
|
|
|
1186
|
-
async def
|
|
1218
|
+
async def build_gradio_media_interface(
|
|
1187
1219
|
self, model_uid: str, request: Request
|
|
1188
1220
|
) -> JSONResponse:
|
|
1189
1221
|
"""
|
|
1190
1222
|
Build a Gradio interface for image processing models.
|
|
1191
1223
|
"""
|
|
1192
1224
|
payload = await request.json()
|
|
1193
|
-
body =
|
|
1225
|
+
body = BuildGradioMediaInterfaceRequest.parse_obj(payload)
|
|
1194
1226
|
assert self._app is not None
|
|
1195
|
-
assert body.model_type
|
|
1227
|
+
assert body.model_type in ("image", "video", "audio")
|
|
1196
1228
|
|
|
1197
1229
|
# asyncio.Lock() behaves differently in 3.9 than 3.10+
|
|
1198
1230
|
# A event loop is required in 3.9 but not 3.10+
|
|
@@ -1206,12 +1238,12 @@ class RESTfulAPI(CancelMixin):
|
|
|
1206
1238
|
)
|
|
1207
1239
|
asyncio.set_event_loop(asyncio.new_event_loop())
|
|
1208
1240
|
|
|
1209
|
-
from ..core.
|
|
1241
|
+
from ..core.media_interface import MediaInterface
|
|
1210
1242
|
|
|
1211
1243
|
try:
|
|
1212
1244
|
access_token = request.headers.get("Authorization")
|
|
1213
1245
|
internal_host = "localhost" if self._host == "0.0.0.0" else self._host
|
|
1214
|
-
interface =
|
|
1246
|
+
interface = MediaInterface(
|
|
1215
1247
|
endpoint=f"http://{internal_host}:{self._port}",
|
|
1216
1248
|
model_uid=model_uid,
|
|
1217
1249
|
model_family=body.model_family,
|
|
@@ -1221,6 +1253,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
1221
1253
|
controlnet=body.controlnet,
|
|
1222
1254
|
access_token=access_token,
|
|
1223
1255
|
model_ability=body.model_ability,
|
|
1256
|
+
model_type=body.model_type,
|
|
1224
1257
|
).build()
|
|
1225
1258
|
|
|
1226
1259
|
gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
|
|
@@ -1338,7 +1371,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
1338
1371
|
finally:
|
|
1339
1372
|
await model.decrease_serve_count()
|
|
1340
1373
|
|
|
1341
|
-
return EventSourceResponse(
|
|
1374
|
+
return EventSourceResponse(
|
|
1375
|
+
stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
|
|
1376
|
+
)
|
|
1342
1377
|
else:
|
|
1343
1378
|
try:
|
|
1344
1379
|
data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
|
|
@@ -1606,7 +1641,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
1606
1641
|
await model.decrease_serve_count()
|
|
1607
1642
|
|
|
1608
1643
|
return EventSourceResponse(
|
|
1609
|
-
media_type="application/octet-stream",
|
|
1644
|
+
media_type="application/octet-stream",
|
|
1645
|
+
content=stream_results(),
|
|
1646
|
+
ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
|
|
1610
1647
|
)
|
|
1611
1648
|
else:
|
|
1612
1649
|
return Response(media_type="application/octet-stream", content=out)
|
|
@@ -1975,14 +2012,22 @@ class RESTfulAPI(CancelMixin):
|
|
|
1975
2012
|
await self._report_error_event(model_uid, str(e))
|
|
1976
2013
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1977
2014
|
|
|
2015
|
+
request_id = None
|
|
1978
2016
|
try:
|
|
1979
2017
|
kwargs = json.loads(body.kwargs) if body.kwargs else {}
|
|
2018
|
+
request_id = kwargs.get("request_id")
|
|
2019
|
+
self._add_running_task(request_id)
|
|
1980
2020
|
video_list = await model.text_to_video(
|
|
1981
2021
|
prompt=body.prompt,
|
|
1982
2022
|
n=body.n,
|
|
1983
2023
|
**kwargs,
|
|
1984
2024
|
)
|
|
1985
2025
|
return Response(content=video_list, media_type="application/json")
|
|
2026
|
+
except asyncio.CancelledError:
|
|
2027
|
+
err_str = f"The request has been cancelled: {request_id}"
|
|
2028
|
+
logger.error(err_str)
|
|
2029
|
+
await self._report_error_event(model_uid, err_str)
|
|
2030
|
+
raise HTTPException(status_code=409, detail=err_str)
|
|
1986
2031
|
except Exception as e:
|
|
1987
2032
|
e = await self._get_model_last_error(model.uid, e)
|
|
1988
2033
|
logger.error(e, exc_info=True)
|
|
@@ -1990,6 +2035,55 @@ class RESTfulAPI(CancelMixin):
|
|
|
1990
2035
|
self.handle_request_limit_error(e)
|
|
1991
2036
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1992
2037
|
|
|
2038
|
+
async def create_videos_from_images(
|
|
2039
|
+
self,
|
|
2040
|
+
model: str = Form(...),
|
|
2041
|
+
image: UploadFile = File(media_type="application/octet-stream"),
|
|
2042
|
+
prompt: Optional[Union[str, List[str]]] = Form(None),
|
|
2043
|
+
negative_prompt: Optional[Union[str, List[str]]] = Form(None),
|
|
2044
|
+
n: Optional[int] = Form(1),
|
|
2045
|
+
kwargs: Optional[str] = Form(None),
|
|
2046
|
+
) -> Response:
|
|
2047
|
+
model_uid = model
|
|
2048
|
+
try:
|
|
2049
|
+
model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
2050
|
+
except ValueError as ve:
|
|
2051
|
+
logger.error(str(ve), exc_info=True)
|
|
2052
|
+
await self._report_error_event(model_uid, str(ve))
|
|
2053
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
2054
|
+
except Exception as e:
|
|
2055
|
+
logger.error(e, exc_info=True)
|
|
2056
|
+
await self._report_error_event(model_uid, str(e))
|
|
2057
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
2058
|
+
|
|
2059
|
+
request_id = None
|
|
2060
|
+
try:
|
|
2061
|
+
if kwargs is not None:
|
|
2062
|
+
parsed_kwargs = json.loads(kwargs)
|
|
2063
|
+
else:
|
|
2064
|
+
parsed_kwargs = {}
|
|
2065
|
+
request_id = parsed_kwargs.get("request_id")
|
|
2066
|
+
self._add_running_task(request_id)
|
|
2067
|
+
video_list = await model_ref.image_to_video(
|
|
2068
|
+
image=Image.open(image.file),
|
|
2069
|
+
prompt=prompt,
|
|
2070
|
+
negative_prompt=negative_prompt,
|
|
2071
|
+
n=n,
|
|
2072
|
+
**parsed_kwargs,
|
|
2073
|
+
)
|
|
2074
|
+
return Response(content=video_list, media_type="application/json")
|
|
2075
|
+
except asyncio.CancelledError:
|
|
2076
|
+
err_str = f"The request has been cancelled: {request_id}"
|
|
2077
|
+
logger.error(err_str)
|
|
2078
|
+
await self._report_error_event(model_uid, err_str)
|
|
2079
|
+
raise HTTPException(status_code=409, detail=err_str)
|
|
2080
|
+
except Exception as e:
|
|
2081
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
2082
|
+
logger.error(e, exc_info=True)
|
|
2083
|
+
await self._report_error_event(model_uid, str(e))
|
|
2084
|
+
self.handle_request_limit_error(e)
|
|
2085
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
2086
|
+
|
|
1993
2087
|
async def create_chat_completion(self, request: Request) -> Response:
|
|
1994
2088
|
raw_body = await request.json()
|
|
1995
2089
|
body = CreateChatCompletion.parse_obj(raw_body)
|
|
@@ -2122,7 +2216,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
2122
2216
|
finally:
|
|
2123
2217
|
await model.decrease_serve_count()
|
|
2124
2218
|
|
|
2125
|
-
return EventSourceResponse(
|
|
2219
|
+
return EventSourceResponse(
|
|
2220
|
+
stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
|
|
2221
|
+
)
|
|
2126
2222
|
else:
|
|
2127
2223
|
try:
|
|
2128
2224
|
data = await model.chat(
|
|
@@ -28,7 +28,6 @@ if TYPE_CHECKING:
|
|
|
28
28
|
CompletionChunk,
|
|
29
29
|
Embedding,
|
|
30
30
|
ImageList,
|
|
31
|
-
LlamaCppGenerateConfig,
|
|
32
31
|
PytorchGenerateConfig,
|
|
33
32
|
VideoList,
|
|
34
33
|
)
|
|
@@ -464,14 +463,59 @@ class RESTfulVideoModelHandle(RESTfulModelHandle):
|
|
|
464
463
|
response_data = response.json()
|
|
465
464
|
return response_data
|
|
466
465
|
|
|
466
|
+
def image_to_video(
|
|
467
|
+
self,
|
|
468
|
+
image: Union[str, bytes],
|
|
469
|
+
prompt: str,
|
|
470
|
+
negative_prompt: Optional[str] = None,
|
|
471
|
+
n: int = 1,
|
|
472
|
+
**kwargs,
|
|
473
|
+
) -> "VideoList":
|
|
474
|
+
"""
|
|
475
|
+
Creates a video by the input image and text.
|
|
476
|
+
|
|
477
|
+
Parameters
|
|
478
|
+
----------
|
|
479
|
+
image: `Union[str, bytes]`
|
|
480
|
+
The input image to condition the generation on.
|
|
481
|
+
prompt: `str` or `List[str]`
|
|
482
|
+
The prompt or prompts to guide video generation. If not defined, you need to pass `prompt_embeds`.
|
|
483
|
+
negative_prompt (`str` or `List[str]`, *optional*):
|
|
484
|
+
The prompt or prompts not to guide the image generation.
|
|
485
|
+
n: `int`, defaults to 1
|
|
486
|
+
The number of videos to generate per prompt. Must be between 1 and 10.
|
|
487
|
+
Returns
|
|
488
|
+
-------
|
|
489
|
+
VideoList
|
|
490
|
+
A list of video objects.
|
|
491
|
+
"""
|
|
492
|
+
url = f"{self._base_url}/v1/video/generations/image"
|
|
493
|
+
params = {
|
|
494
|
+
"model": self._model_uid,
|
|
495
|
+
"prompt": prompt,
|
|
496
|
+
"negative_prompt": negative_prompt,
|
|
497
|
+
"n": n,
|
|
498
|
+
"kwargs": json.dumps(kwargs),
|
|
499
|
+
}
|
|
500
|
+
files: List[Any] = []
|
|
501
|
+
for key, value in params.items():
|
|
502
|
+
files.append((key, (None, value)))
|
|
503
|
+
files.append(("image", ("image", image, "application/octet-stream")))
|
|
504
|
+
response = requests.post(url, files=files, headers=self.auth_headers)
|
|
505
|
+
if response.status_code != 200:
|
|
506
|
+
raise RuntimeError(
|
|
507
|
+
f"Failed to create the video from image, detail: {_get_error_string(response)}"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
response_data = response.json()
|
|
511
|
+
return response_data
|
|
512
|
+
|
|
467
513
|
|
|
468
514
|
class RESTfulGenerateModelHandle(RESTfulModelHandle):
|
|
469
515
|
def generate(
|
|
470
516
|
self,
|
|
471
517
|
prompt: str,
|
|
472
|
-
generate_config: Optional[
|
|
473
|
-
Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
|
|
474
|
-
] = None,
|
|
518
|
+
generate_config: Optional["PytorchGenerateConfig"] = None,
|
|
475
519
|
) -> Union["Completion", Iterator["CompletionChunk"]]:
|
|
476
520
|
"""
|
|
477
521
|
Creates a completion for the provided prompt and parameters via RESTful APIs.
|
|
@@ -480,9 +524,8 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
|
|
|
480
524
|
----------
|
|
481
525
|
prompt: str
|
|
482
526
|
The user's message or user's input.
|
|
483
|
-
generate_config: Optional[
|
|
527
|
+
generate_config: Optional["PytorchGenerateConfig"]
|
|
484
528
|
Additional configuration for the chat generation.
|
|
485
|
-
"LlamaCppGenerateConfig" -> Configuration for llama-cpp-python model
|
|
486
529
|
"PytorchGenerateConfig" -> Configuration for pytorch model
|
|
487
530
|
|
|
488
531
|
Returns
|
|
@@ -528,9 +571,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
|
528
571
|
self,
|
|
529
572
|
messages: List[Dict],
|
|
530
573
|
tools: Optional[List[Dict]] = None,
|
|
531
|
-
generate_config: Optional[
|
|
532
|
-
Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
|
|
533
|
-
] = None,
|
|
574
|
+
generate_config: Optional["PytorchGenerateConfig"] = None,
|
|
534
575
|
) -> Union["ChatCompletion", Iterator["ChatCompletionChunk"]]:
|
|
535
576
|
"""
|
|
536
577
|
Given a list of messages comprising a conversation, the model will return a response via RESTful APIs.
|
|
@@ -541,9 +582,8 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
|
541
582
|
A list of messages comprising the conversation so far.
|
|
542
583
|
tools: Optional[List[Dict]]
|
|
543
584
|
A tool list.
|
|
544
|
-
generate_config: Optional[
|
|
585
|
+
generate_config: Optional["PytorchGenerateConfig"]
|
|
545
586
|
Additional configuration for the chat generation.
|
|
546
|
-
"LlamaCppGenerateConfig" -> configuration for llama-cpp-python model
|
|
547
587
|
"PytorchGenerateConfig" -> configuration for pytorch model
|
|
548
588
|
|
|
549
589
|
Returns
|
xinference/constants.py
CHANGED
|
@@ -29,7 +29,8 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
|
|
|
29
29
|
XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
|
|
30
30
|
XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
|
|
31
31
|
XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
|
|
32
|
-
XINFERENCE_ENV_VIRTUAL_ENV = "
|
|
32
|
+
XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
|
|
33
|
+
XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
def get_xinference_home() -> str:
|
|
@@ -89,6 +90,9 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
|
|
|
89
90
|
XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
|
|
90
91
|
XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
|
|
91
92
|
)
|
|
93
|
+
XINFERENCE_SSE_PING_ATTEMPTS_SECONDS = int(
|
|
94
|
+
os.environ.get(XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS, 600)
|
|
95
|
+
)
|
|
92
96
|
XINFERENCE_LAUNCH_MODEL_RETRY = 3
|
|
93
97
|
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
|
|
94
98
|
XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))
|