xinference 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +50 -1
- xinference/client/restful/restful_client.py +82 -2
- xinference/constants.py +3 -0
- xinference/core/chat_interface.py +297 -83
- xinference/core/model.py +1 -0
- xinference/core/progress_tracker.py +16 -8
- xinference/core/supervisor.py +45 -1
- xinference/core/worker.py +262 -37
- xinference/deploy/cmdline.py +33 -1
- xinference/model/audio/core.py +11 -1
- xinference/model/audio/megatts.py +105 -0
- xinference/model/audio/model_spec.json +24 -1
- xinference/model/audio/model_spec_modelscope.json +26 -1
- xinference/model/core.py +14 -0
- xinference/model/embedding/core.py +6 -1
- xinference/model/flexible/core.py +6 -1
- xinference/model/image/core.py +6 -1
- xinference/model/image/model_spec.json +17 -1
- xinference/model/image/model_spec_modelscope.json +17 -1
- xinference/model/llm/__init__.py +0 -4
- xinference/model/llm/core.py +4 -0
- xinference/model/llm/llama_cpp/core.py +40 -16
- xinference/model/llm/llm_family.json +413 -84
- xinference/model/llm/llm_family.py +24 -1
- xinference/model/llm/llm_family_modelscope.json +447 -0
- xinference/model/llm/mlx/core.py +16 -2
- xinference/model/llm/transformers/__init__.py +14 -0
- xinference/model/llm/transformers/core.py +30 -6
- xinference/model/llm/transformers/gemma3.py +17 -2
- xinference/model/llm/transformers/intern_vl.py +28 -18
- xinference/model/llm/transformers/minicpmv26.py +21 -2
- xinference/model/llm/transformers/qwen-omni.py +308 -0
- xinference/model/llm/transformers/qwen2_audio.py +1 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -4
- xinference/model/llm/utils.py +11 -1
- xinference/model/llm/vllm/core.py +35 -0
- xinference/model/llm/vllm/distributed_executor.py +8 -2
- xinference/model/rerank/core.py +6 -1
- xinference/model/utils.py +118 -1
- xinference/model/video/core.py +6 -1
- xinference/thirdparty/megatts3/__init__.py +0 -0
- xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
- xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
- xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
- xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
- xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
- xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
- xinference/types.py +10 -0
- xinference/utils.py +54 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
- xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
- xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
- xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
- xinference/web/ui/src/locales/en.json +2 -1
- xinference/web/ui/src/locales/zh.json +2 -1
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/METADATA +127 -114
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/RECORD +96 -60
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
- xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
- xinference/web/ui/build/static/js/main.5ca4eea1.js +0 -3
- xinference/web/ui/build/static/js/main.5ca4eea1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
- /xinference/web/ui/build/static/js/{main.5ca4eea1.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-04-
|
|
11
|
+
"date": "2025-04-19T20:32:22+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "ee8d025e1c046b22b3b148e5e97c0e107c979ee3",
|
|
15
|
+
"version": "1.5.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -468,6 +468,26 @@ class RESTfulAPI(CancelMixin):
|
|
|
468
468
|
else None
|
|
469
469
|
),
|
|
470
470
|
)
|
|
471
|
+
self._router.add_api_route(
|
|
472
|
+
"/v1/models/{model_uid}/progress",
|
|
473
|
+
self.get_launch_model_progress,
|
|
474
|
+
methods=["GET"],
|
|
475
|
+
dependencies=(
|
|
476
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
477
|
+
if self.is_authenticated()
|
|
478
|
+
else None
|
|
479
|
+
),
|
|
480
|
+
)
|
|
481
|
+
self._router.add_api_route(
|
|
482
|
+
"/v1/models/{model_uid}/cancel",
|
|
483
|
+
self.cancel_launch_model,
|
|
484
|
+
methods=["POST"],
|
|
485
|
+
dependencies=(
|
|
486
|
+
[Security(self._auth_service, scopes=["models:stop"])]
|
|
487
|
+
if self.is_authenticated()
|
|
488
|
+
else None
|
|
489
|
+
),
|
|
490
|
+
)
|
|
471
491
|
self._router.add_api_route(
|
|
472
492
|
"/v1/completions",
|
|
473
493
|
self.create_completion,
|
|
@@ -1024,6 +1044,10 @@ class RESTfulAPI(CancelMixin):
|
|
|
1024
1044
|
except RuntimeError as re:
|
|
1025
1045
|
logger.error(str(re), exc_info=True)
|
|
1026
1046
|
raise HTTPException(status_code=503, detail=str(re))
|
|
1047
|
+
except asyncio.CancelledError as ce:
|
|
1048
|
+
# cancelled by user
|
|
1049
|
+
logger.error(str(ce), exc_info=True)
|
|
1050
|
+
raise HTTPException(status_code=499, detail=str(ce))
|
|
1027
1051
|
except Exception as e:
|
|
1028
1052
|
logger.error(str(e), exc_info=True)
|
|
1029
1053
|
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -1044,6 +1068,26 @@ class RESTfulAPI(CancelMixin):
|
|
|
1044
1068
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1045
1069
|
return JSONResponse(content=infos)
|
|
1046
1070
|
|
|
1071
|
+
async def get_launch_model_progress(self, model_uid: str) -> JSONResponse:
|
|
1072
|
+
try:
|
|
1073
|
+
progress = await (
|
|
1074
|
+
await self._get_supervisor_ref()
|
|
1075
|
+
).get_launch_builtin_model_progress(model_uid)
|
|
1076
|
+
except Exception as e:
|
|
1077
|
+
logger.error(str(e), exc_info=True)
|
|
1078
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1079
|
+
return JSONResponse(content={"progress": progress})
|
|
1080
|
+
|
|
1081
|
+
async def cancel_launch_model(self, model_uid: str) -> JSONResponse:
|
|
1082
|
+
try:
|
|
1083
|
+
await (await self._get_supervisor_ref()).cancel_launch_builtin_model(
|
|
1084
|
+
model_uid
|
|
1085
|
+
)
|
|
1086
|
+
except Exception as e:
|
|
1087
|
+
logger.error(str(e), exc_info=True)
|
|
1088
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1089
|
+
return JSONResponse(content=None)
|
|
1090
|
+
|
|
1047
1091
|
async def launch_model_by_version(
|
|
1048
1092
|
self, request: Request, wait_ready: bool = Query(True)
|
|
1049
1093
|
) -> JSONResponse:
|
|
@@ -1514,8 +1558,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1514
1558
|
prompt_speech: Optional[UploadFile] = File(
|
|
1515
1559
|
None, media_type="application/octet-stream"
|
|
1516
1560
|
),
|
|
1561
|
+
prompt_latent: Optional[UploadFile] = File(
|
|
1562
|
+
None, media_type="application/octet-stream"
|
|
1563
|
+
),
|
|
1517
1564
|
) -> Response:
|
|
1518
|
-
if prompt_speech:
|
|
1565
|
+
if prompt_speech or prompt_latent:
|
|
1519
1566
|
f = await request.form()
|
|
1520
1567
|
else:
|
|
1521
1568
|
f = await request.json()
|
|
@@ -1539,6 +1586,8 @@ class RESTfulAPI(CancelMixin):
|
|
|
1539
1586
|
parsed_kwargs = {}
|
|
1540
1587
|
if prompt_speech is not None:
|
|
1541
1588
|
parsed_kwargs["prompt_speech"] = await prompt_speech.read()
|
|
1589
|
+
if prompt_latent is not None:
|
|
1590
|
+
parsed_kwargs["prompt_latent"] = await prompt_latent.read()
|
|
1542
1591
|
out = await model.speech(
|
|
1543
1592
|
input=body.input,
|
|
1544
1593
|
voice=body.voice,
|
|
@@ -723,6 +723,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
723
723
|
speed: float = 1.0,
|
|
724
724
|
stream: bool = False,
|
|
725
725
|
prompt_speech: Optional[bytes] = None,
|
|
726
|
+
prompt_latent: Optional[bytes] = None,
|
|
726
727
|
**kwargs,
|
|
727
728
|
):
|
|
728
729
|
"""
|
|
@@ -743,6 +744,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
743
744
|
Use stream or not.
|
|
744
745
|
prompt_speech: bytes
|
|
745
746
|
The audio bytes to be provided to the model.
|
|
747
|
+
prompt_latent: bytes
|
|
748
|
+
The latent bytes to be provided to the model.
|
|
746
749
|
|
|
747
750
|
Returns
|
|
748
751
|
-------
|
|
@@ -759,14 +762,22 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
759
762
|
"stream": stream,
|
|
760
763
|
"kwargs": json.dumps(kwargs),
|
|
761
764
|
}
|
|
765
|
+
files: List[Any] = []
|
|
762
766
|
if prompt_speech:
|
|
763
|
-
files: List[Any] = []
|
|
764
767
|
files.append(
|
|
765
768
|
(
|
|
766
769
|
"prompt_speech",
|
|
767
770
|
("prompt_speech", prompt_speech, "application/octet-stream"),
|
|
768
771
|
)
|
|
769
772
|
)
|
|
773
|
+
if prompt_latent:
|
|
774
|
+
files.append(
|
|
775
|
+
(
|
|
776
|
+
"prompt_latent",
|
|
777
|
+
("prompt_latent", prompt_latent, "application/octet-stream"),
|
|
778
|
+
)
|
|
779
|
+
)
|
|
780
|
+
if files:
|
|
770
781
|
response = requests.post(
|
|
771
782
|
url, data=params, files=files, headers=self.auth_headers, stream=stream
|
|
772
783
|
)
|
|
@@ -999,10 +1010,17 @@ class Client:
|
|
|
999
1010
|
"model_path": model_path,
|
|
1000
1011
|
}
|
|
1001
1012
|
|
|
1013
|
+
wait_ready = kwargs.pop("wait_ready", True)
|
|
1014
|
+
|
|
1002
1015
|
for key, value in kwargs.items():
|
|
1003
1016
|
payload[str(key)] = value
|
|
1004
1017
|
|
|
1005
|
-
|
|
1018
|
+
if wait_ready:
|
|
1019
|
+
response = requests.post(url, json=payload, headers=self._headers)
|
|
1020
|
+
else:
|
|
1021
|
+
response = requests.post(
|
|
1022
|
+
url, json=payload, headers=self._headers, params={"wait_ready": False}
|
|
1023
|
+
)
|
|
1006
1024
|
if response.status_code != 200:
|
|
1007
1025
|
raise RuntimeError(
|
|
1008
1026
|
f"Failed to launch model, detail: {_get_error_string(response)}"
|
|
@@ -1035,6 +1053,68 @@ class Client:
|
|
|
1035
1053
|
f"Failed to terminate model, detail: {_get_error_string(response)}"
|
|
1036
1054
|
)
|
|
1037
1055
|
|
|
1056
|
+
def get_launch_model_progress(self, model_uid: str) -> dict:
|
|
1057
|
+
"""
|
|
1058
|
+
Get progress of the specific model.
|
|
1059
|
+
|
|
1060
|
+
Parameters
|
|
1061
|
+
----------
|
|
1062
|
+
model_uid: str
|
|
1063
|
+
The unique id that identify the model we want.
|
|
1064
|
+
|
|
1065
|
+
Returns
|
|
1066
|
+
-------
|
|
1067
|
+
result: dict
|
|
1068
|
+
Result that contains progress.
|
|
1069
|
+
|
|
1070
|
+
Raises
|
|
1071
|
+
------
|
|
1072
|
+
RuntimeError
|
|
1073
|
+
Report failure to get the wanted model with given model_uid. Provide details of failure through error message.
|
|
1074
|
+
"""
|
|
1075
|
+
url = f"{self.base_url}/v1/models/{model_uid}/progress"
|
|
1076
|
+
|
|
1077
|
+
response = requests.get(url, headers=self._headers)
|
|
1078
|
+
if response.status_code != 200:
|
|
1079
|
+
raise RuntimeError(
|
|
1080
|
+
f"Fail to get model launching progress, detail: {_get_error_string(response)}"
|
|
1081
|
+
)
|
|
1082
|
+
return response.json()
|
|
1083
|
+
|
|
1084
|
+
def cancel_launch_model(self, model_uid: str):
|
|
1085
|
+
"""
|
|
1086
|
+
Cancel launching model.
|
|
1087
|
+
|
|
1088
|
+
Parameters
|
|
1089
|
+
----------
|
|
1090
|
+
model_uid: str
|
|
1091
|
+
The unique id that identify the model we want.
|
|
1092
|
+
|
|
1093
|
+
Raises
|
|
1094
|
+
------
|
|
1095
|
+
RuntimeError
|
|
1096
|
+
Report failure to get the wanted model with given model_uid. Provide details of failure through error message.
|
|
1097
|
+
"""
|
|
1098
|
+
url = f"{self.base_url}/v1/models/{model_uid}/cancel"
|
|
1099
|
+
|
|
1100
|
+
response = requests.post(url, headers=self._headers)
|
|
1101
|
+
if response.status_code != 200:
|
|
1102
|
+
raise RuntimeError(
|
|
1103
|
+
f"Fail to cancel launching model, detail: {_get_error_string(response)}"
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
def get_instance_info(self, model_name: str, model_uid: str):
|
|
1107
|
+
url = f"{self.base_url}/v1/models/instances"
|
|
1108
|
+
response = requests.get(
|
|
1109
|
+
url,
|
|
1110
|
+
headers=self._headers,
|
|
1111
|
+
params={"model_name": model_name, "model_uid": model_uid},
|
|
1112
|
+
)
|
|
1113
|
+
if response.status_code != 200:
|
|
1114
|
+
raise RuntimeError("Failed to get instance info")
|
|
1115
|
+
response_data = response.json()
|
|
1116
|
+
return response_data
|
|
1117
|
+
|
|
1038
1118
|
def _get_supervisor_internal_address(self):
|
|
1039
1119
|
url = f"{self.base_url}/v1/address"
|
|
1040
1120
|
response = requests.get(url, headers=self._headers)
|
xinference/constants.py
CHANGED
|
@@ -29,6 +29,7 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
|
|
|
29
29
|
XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
|
|
30
30
|
XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
|
|
31
31
|
XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
|
|
32
|
+
XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_EANBLE_VIRTUAL_ENV"
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
def get_xinference_home() -> str:
|
|
@@ -55,6 +56,7 @@ XINFERENCE_LOG_DIR = os.path.join(XINFERENCE_HOME, "logs")
|
|
|
55
56
|
XINFERENCE_IMAGE_DIR = os.path.join(XINFERENCE_HOME, "image")
|
|
56
57
|
XINFERENCE_VIDEO_DIR = os.path.join(XINFERENCE_HOME, "video")
|
|
57
58
|
XINFERENCE_AUTH_DIR = os.path.join(XINFERENCE_HOME, "auth")
|
|
59
|
+
XINFERENCE_VIRTUAL_ENV_DIR = os.path.join(XINFERENCE_HOME, "virtualenv")
|
|
58
60
|
XINFERENCE_CSG_ENDPOINT = str(
|
|
59
61
|
os.environ.get(XINFERENCE_ENV_CSG_ENDPOINT, "https://hub-stg.opencsg.com/")
|
|
60
62
|
)
|
|
@@ -89,3 +91,4 @@ XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
|
|
|
89
91
|
)
|
|
90
92
|
XINFERENCE_LAUNCH_MODEL_RETRY = 3
|
|
91
93
|
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
|
|
94
|
+
XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))
|