xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +107 -11
- xinference/client/restful/restful_client.py +51 -11
- xinference/constants.py +5 -1
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +33 -39
- xinference/deploy/cmdline.py +17 -0
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +2 -1
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +63 -46
- xinference/model/audio/model_spec_modelscope.json +31 -14
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +40 -115
- xinference/model/llm/core.py +29 -6
- xinference/model/llm/llama_cpp/core.py +30 -347
- xinference/model/llm/llm_family.json +1674 -2203
- xinference/model/llm/llm_family.py +71 -7
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1838 -2016
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +23 -7
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +39 -11
- xinference/model/llm/transformers/chatglm.py +9 -2
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +58 -60
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +3 -21
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +8 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +96 -45
- xinference/model/llm/vllm/core.py +108 -24
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +157 -13
- xinference/model/video/model_spec.json +100 -0
- xinference/model/video/model_spec_modelscope.json +104 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +2 -71
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +7 -4
- xinference/web/ui/src/locales/zh.json +7 -4
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
xinference/core/model.py
CHANGED
|
@@ -632,6 +632,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
632
632
|
return await _gen.__anext__() # noqa: F821
|
|
633
633
|
except StopAsyncIteration:
|
|
634
634
|
return stop
|
|
635
|
+
except Exception as e:
|
|
636
|
+
return e
|
|
635
637
|
|
|
636
638
|
def _wrapper(_gen):
|
|
637
639
|
# Avoid issue: https://github.com/python/cpython/issues/112182
|
|
@@ -639,6 +641,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
639
641
|
return next(_gen)
|
|
640
642
|
except StopIteration:
|
|
641
643
|
return stop
|
|
644
|
+
except Exception as e:
|
|
645
|
+
return e
|
|
642
646
|
|
|
643
647
|
while True:
|
|
644
648
|
try:
|
|
@@ -699,6 +703,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
699
703
|
o = stream_out.get()
|
|
700
704
|
if o is stop:
|
|
701
705
|
break
|
|
706
|
+
elif isinstance(o, Exception):
|
|
707
|
+
raise o
|
|
702
708
|
else:
|
|
703
709
|
yield o
|
|
704
710
|
|
|
@@ -715,6 +721,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
715
721
|
o = await stream_out.get()
|
|
716
722
|
if o is stop:
|
|
717
723
|
break
|
|
724
|
+
elif isinstance(o, Exception):
|
|
725
|
+
raise o
|
|
718
726
|
else:
|
|
719
727
|
yield o
|
|
720
728
|
|
|
@@ -1229,19 +1237,51 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
1229
1237
|
*args,
|
|
1230
1238
|
**kwargs,
|
|
1231
1239
|
):
|
|
1232
|
-
kwargs
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1240
|
+
progressor = kwargs["progressor"] = await self._get_progressor(
|
|
1241
|
+
kwargs.pop("request_id", None)
|
|
1242
|
+
)
|
|
1243
|
+
with progressor:
|
|
1244
|
+
if hasattr(self._model, "text_to_video"):
|
|
1245
|
+
return await self._call_wrapper_json(
|
|
1246
|
+
self._model.text_to_video,
|
|
1247
|
+
prompt,
|
|
1248
|
+
n,
|
|
1249
|
+
*args,
|
|
1250
|
+
**kwargs,
|
|
1251
|
+
)
|
|
1241
1252
|
raise AttributeError(
|
|
1242
1253
|
f"Model {self._model.model_spec} is not for creating video."
|
|
1243
1254
|
)
|
|
1244
1255
|
|
|
1256
|
+
@request_limit
|
|
1257
|
+
@log_async(logger=logger)
|
|
1258
|
+
async def image_to_video(
|
|
1259
|
+
self,
|
|
1260
|
+
image: "PIL.Image",
|
|
1261
|
+
prompt: str,
|
|
1262
|
+
negative_prompt: Optional[str] = None,
|
|
1263
|
+
n: int = 1,
|
|
1264
|
+
*args,
|
|
1265
|
+
**kwargs,
|
|
1266
|
+
):
|
|
1267
|
+
kwargs["negative_prompt"] = negative_prompt
|
|
1268
|
+
progressor = kwargs["progressor"] = await self._get_progressor(
|
|
1269
|
+
kwargs.pop("request_id", None)
|
|
1270
|
+
)
|
|
1271
|
+
with progressor:
|
|
1272
|
+
if hasattr(self._model, "image_to_video"):
|
|
1273
|
+
return await self._call_wrapper_json(
|
|
1274
|
+
self._model.image_to_video,
|
|
1275
|
+
image,
|
|
1276
|
+
prompt,
|
|
1277
|
+
n,
|
|
1278
|
+
*args,
|
|
1279
|
+
**kwargs,
|
|
1280
|
+
)
|
|
1281
|
+
raise AttributeError(
|
|
1282
|
+
f"Model {self._model.model_spec} is not for creating video from image."
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1245
1285
|
async def record_metrics(self, name, op, kwargs):
|
|
1246
1286
|
worker_ref = await self._get_worker_ref()
|
|
1247
1287
|
await worker_ref.record_metrics(name, op, kwargs)
|
xinference/core/supervisor.py
CHANGED
|
@@ -1102,8 +1102,8 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1102
1102
|
xavier_config=xavier_config,
|
|
1103
1103
|
**kwargs,
|
|
1104
1104
|
)
|
|
1105
|
-
await worker_ref.wait_for_load(_replica_model_uid)
|
|
1106
1105
|
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1106
|
+
await worker_ref.wait_for_load(_replica_model_uid)
|
|
1107
1107
|
return subpool_address
|
|
1108
1108
|
|
|
1109
1109
|
async def _launch_model():
|
xinference/core/utils.py
CHANGED
|
@@ -263,7 +263,7 @@ class CancelMixin:
|
|
|
263
263
|
_CANCEL_TASK_NAME = "abort_block"
|
|
264
264
|
|
|
265
265
|
def __init__(self):
|
|
266
|
-
self._running_tasks: weakref.WeakValueDictionary[
|
|
266
|
+
self._running_tasks: weakref.WeakValueDictionary[ # type: ignore
|
|
267
267
|
str, asyncio.Task
|
|
268
268
|
] = weakref.WeakValueDictionary()
|
|
269
269
|
|
xinference/core/worker.py
CHANGED
|
@@ -148,7 +148,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
148
148
|
elif metrics_exporter_host is not None or metrics_exporter_port is not None:
|
|
149
149
|
# metrics export server.
|
|
150
150
|
logger.info(
|
|
151
|
-
f"Starting metrics export server at {metrics_exporter_host}:{metrics_exporter_port}"
|
|
151
|
+
f"Starting metrics export server at {metrics_exporter_host}:{metrics_exporter_port}" # noqa: E231
|
|
152
152
|
)
|
|
153
153
|
q: queue.Queue = queue.Queue()
|
|
154
154
|
self._metrics_thread = threading.Thread(
|
|
@@ -162,7 +162,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
162
162
|
while self._metrics_thread.is_alive():
|
|
163
163
|
try:
|
|
164
164
|
host, port = q.get(block=False)[:2]
|
|
165
|
-
logger.info(
|
|
165
|
+
logger.info(
|
|
166
|
+
f"Metrics server is started at: http://{host}:{port}" # noqa: E231
|
|
167
|
+
)
|
|
166
168
|
break
|
|
167
169
|
except queue.Empty:
|
|
168
170
|
pass
|
|
@@ -584,6 +586,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
584
586
|
n_gpu: Optional[Union[int, str]] = "auto",
|
|
585
587
|
gpu_idx: Optional[List[int]] = None,
|
|
586
588
|
env: Optional[Dict[str, str]] = None,
|
|
589
|
+
start_python: Optional[str] = None,
|
|
587
590
|
) -> Tuple[str, List[str]]:
|
|
588
591
|
env = {} if env is None else env
|
|
589
592
|
devices = []
|
|
@@ -609,14 +612,8 @@ class WorkerActor(xo.StatelessActor):
|
|
|
609
612
|
)
|
|
610
613
|
env[env_name] = ",".join([str(dev) for dev in devices])
|
|
611
614
|
|
|
612
|
-
if os.name != "nt" and platform.system() != "Darwin":
|
|
613
|
-
# Linux
|
|
614
|
-
start_method = "forkserver"
|
|
615
|
-
else:
|
|
616
|
-
# Windows and macOS
|
|
617
|
-
start_method = "spawn"
|
|
618
615
|
subpool_address = await self._main_pool.append_sub_pool(
|
|
619
|
-
env=env,
|
|
616
|
+
env=env, start_python=start_python
|
|
620
617
|
)
|
|
621
618
|
return subpool_address, [str(dev) for dev in devices]
|
|
622
619
|
|
|
@@ -787,9 +784,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
787
784
|
elif model_type == "image":
|
|
788
785
|
return model.model_ability
|
|
789
786
|
elif model_type == "audio":
|
|
790
|
-
return
|
|
787
|
+
return model.model_ability
|
|
791
788
|
elif model_type == "video":
|
|
792
|
-
return
|
|
789
|
+
return model.model_ability
|
|
793
790
|
elif model_type == "flexible":
|
|
794
791
|
return ["flexible"]
|
|
795
792
|
else:
|
|
@@ -833,6 +830,8 @@ class WorkerActor(xo.StatelessActor):
|
|
|
833
830
|
virtual_env_manager: VirtualEnvManager = get_virtual_env_manager(
|
|
834
831
|
virtual_env_name or "uv", env_path
|
|
835
832
|
)
|
|
833
|
+
# create env
|
|
834
|
+
virtual_env_manager.create_env()
|
|
836
835
|
return virtual_env_manager
|
|
837
836
|
|
|
838
837
|
@classmethod
|
|
@@ -845,9 +844,6 @@ class WorkerActor(xo.StatelessActor):
|
|
|
845
844
|
# no settings or no packages
|
|
846
845
|
return
|
|
847
846
|
|
|
848
|
-
# create env
|
|
849
|
-
virtual_env_manager.create_env()
|
|
850
|
-
|
|
851
847
|
if settings.inherit_pip_config:
|
|
852
848
|
# inherit pip config
|
|
853
849
|
pip_config = get_pip_config_args()
|
|
@@ -1001,22 +997,26 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1001
997
|
# virtualenv
|
|
1002
998
|
enable_virtual_env = kwargs.pop("enable_virtual_env", None)
|
|
1003
999
|
virtual_env_name = kwargs.pop("virtual_env_name", None)
|
|
1004
|
-
virtual_env_path = os.path.join(
|
|
1000
|
+
virtual_env_path = os.path.join(
|
|
1001
|
+
XINFERENCE_VIRTUAL_ENV_DIR, "v2", model_name
|
|
1002
|
+
)
|
|
1005
1003
|
virtual_env_manager = await asyncio.to_thread(
|
|
1006
1004
|
self._create_virtual_env_manager,
|
|
1007
1005
|
enable_virtual_env,
|
|
1008
1006
|
virtual_env_name,
|
|
1009
1007
|
virtual_env_path,
|
|
1010
1008
|
)
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
else None
|
|
1009
|
+
subpool_python_path = (
|
|
1010
|
+
None
|
|
1011
|
+
if virtual_env_manager is None
|
|
1012
|
+
else virtual_env_manager.get_python_path()
|
|
1016
1013
|
)
|
|
1017
|
-
|
|
1018
1014
|
subpool_address, devices = await self._create_subpool(
|
|
1019
|
-
model_uid,
|
|
1015
|
+
model_uid,
|
|
1016
|
+
model_type,
|
|
1017
|
+
n_gpu=n_gpu,
|
|
1018
|
+
gpu_idx=gpu_idx,
|
|
1019
|
+
start_python=subpool_python_path,
|
|
1020
1020
|
)
|
|
1021
1021
|
all_subpool_addresses = [subpool_address]
|
|
1022
1022
|
try:
|
|
@@ -1116,7 +1116,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1116
1116
|
coros.append(
|
|
1117
1117
|
self._main_pool.append_sub_pool(
|
|
1118
1118
|
env={env_name: env_value},
|
|
1119
|
-
|
|
1119
|
+
start_python=subpool_python_path,
|
|
1120
1120
|
)
|
|
1121
1121
|
)
|
|
1122
1122
|
pool_addresses = await asyncio.gather(*coros)
|
|
@@ -1255,7 +1255,14 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1255
1255
|
try:
|
|
1256
1256
|
logger.debug("Start to destroy model actor: %s", model_ref)
|
|
1257
1257
|
coro = xo.destroy_actor(model_ref)
|
|
1258
|
-
|
|
1258
|
+
# see https://github.com/xorbitsai/xoscar/pull/140
|
|
1259
|
+
# asyncio.wait_for cannot work for Xoscar actor call,
|
|
1260
|
+
# because when time out, the coroutine will be cancelled via raise CancelledEror,
|
|
1261
|
+
# inside actor call, the error will be caught and
|
|
1262
|
+
# a CancelMessage will be sent to dest actor pool,
|
|
1263
|
+
# however the actor pool may be stuck already,
|
|
1264
|
+
# thus the timeout will never be raised
|
|
1265
|
+
await xo.wait_for(coro, timeout=5)
|
|
1259
1266
|
except Exception as e:
|
|
1260
1267
|
logger.debug(
|
|
1261
1268
|
"Destroy model actor failed, model uid: %s, error: %s", model_uid, e
|
|
@@ -1434,7 +1441,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1434
1441
|
else:
|
|
1435
1442
|
logger.debug(f"{path} is not a valid path.")
|
|
1436
1443
|
except Exception as e:
|
|
1437
|
-
logger.error(f"Fail to delete {path} with error:{e}.")
|
|
1444
|
+
logger.error(f"Fail to delete {path} with error:{e}.") # noqa: E231
|
|
1438
1445
|
return False
|
|
1439
1446
|
await self._cache_tracker_ref.confirm_and_remove_model(
|
|
1440
1447
|
model_version, self.address
|
|
@@ -1467,26 +1474,13 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1467
1474
|
model_ref = self._model_uid_to_model[rep_model_uid]
|
|
1468
1475
|
await model_ref.start_transfer_for_vllm(rank_addresses)
|
|
1469
1476
|
|
|
1470
|
-
@staticmethod
|
|
1471
|
-
def _get_start_method():
|
|
1472
|
-
if os.name != "nt" and platform.system() != "Darwin":
|
|
1473
|
-
# Linux
|
|
1474
|
-
start_method = "forkserver"
|
|
1475
|
-
else:
|
|
1476
|
-
# Windows and macOS
|
|
1477
|
-
start_method = "spawn"
|
|
1478
|
-
return start_method
|
|
1479
|
-
|
|
1480
1477
|
@log_async(logger=logger, level=logging.INFO)
|
|
1481
1478
|
async def launch_rank0_model(
|
|
1482
1479
|
self, rep_model_uid: str, xavier_config: Dict[str, Any]
|
|
1483
1480
|
) -> Tuple[str, int]:
|
|
1484
1481
|
from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
|
|
1485
1482
|
|
|
1486
|
-
|
|
1487
|
-
subpool_address = await self._main_pool.append_sub_pool(
|
|
1488
|
-
start_method=start_method
|
|
1489
|
-
)
|
|
1483
|
+
subpool_address = await self._main_pool.append_sub_pool()
|
|
1490
1484
|
|
|
1491
1485
|
store_address = subpool_address.split(":")[0]
|
|
1492
1486
|
# Note that `store_port` needs to be generated on the worker,
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -805,6 +805,14 @@ def remove_cache(
|
|
|
805
805
|
type=(str, str),
|
|
806
806
|
multiple=True,
|
|
807
807
|
)
|
|
808
|
+
@click.option(
|
|
809
|
+
"--quantization-config",
|
|
810
|
+
"-qc",
|
|
811
|
+
"quantization_config",
|
|
812
|
+
type=(str, str),
|
|
813
|
+
multiple=True,
|
|
814
|
+
help="bnb quantization config for `transformers` engine.",
|
|
815
|
+
)
|
|
808
816
|
@click.option(
|
|
809
817
|
"--worker-ip",
|
|
810
818
|
default=None,
|
|
@@ -853,6 +861,7 @@ def model_launch(
|
|
|
853
861
|
trust_remote_code: bool,
|
|
854
862
|
api_key: Optional[str],
|
|
855
863
|
model_path: Optional[str],
|
|
864
|
+
quantization_config: Optional[Tuple],
|
|
856
865
|
):
|
|
857
866
|
kwargs = {}
|
|
858
867
|
for i in range(0, len(ctx.args), 2):
|
|
@@ -884,6 +893,12 @@ def model_launch(
|
|
|
884
893
|
else:
|
|
885
894
|
_n_gpu = int(n_gpu)
|
|
886
895
|
|
|
896
|
+
bnb_quantization_config = (
|
|
897
|
+
{k: handle_click_args_type(v) for k, v in dict(quantization_config).items()}
|
|
898
|
+
if quantization_config
|
|
899
|
+
else None
|
|
900
|
+
)
|
|
901
|
+
|
|
887
902
|
image_lora_load_params = (
|
|
888
903
|
{k: handle_click_args_type(v) for k, v in dict(image_lora_load_kwargs).items()}
|
|
889
904
|
if image_lora_load_kwargs
|
|
@@ -929,6 +944,8 @@ def model_launch(
|
|
|
929
944
|
|
|
930
945
|
# do not wait for launching.
|
|
931
946
|
kwargs["wait_ready"] = False
|
|
947
|
+
if bnb_quantization_config:
|
|
948
|
+
kwargs["quantization_config"] = {**bnb_quantization_config}
|
|
932
949
|
|
|
933
950
|
model_uid = client.launch_model(
|
|
934
951
|
model_name=model_name,
|
xinference/deploy/utils.py
CHANGED
|
@@ -141,13 +141,10 @@ def get_config_dict(
|
|
|
141
141
|
async def create_worker_actor_pool(
|
|
142
142
|
address: str, logging_conf: Optional[dict] = None
|
|
143
143
|
) -> "MainActorPoolType":
|
|
144
|
-
subprocess_start_method = "forkserver" if os.name != "nt" else "spawn"
|
|
145
|
-
|
|
146
144
|
return await xo.create_actor_pool(
|
|
147
145
|
address=address,
|
|
148
146
|
n_process=0,
|
|
149
147
|
auto_recover="process",
|
|
150
|
-
subprocess_start_method=subprocess_start_method,
|
|
151
148
|
logging_conf={"dict": logging_conf},
|
|
152
149
|
)
|
|
153
150
|
|
|
@@ -66,31 +66,8 @@ def _need_filter(spec: dict):
|
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
def _install():
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
os.path.dirname(__file__), "model_spec_modelscope.json"
|
|
72
|
-
)
|
|
73
|
-
BUILTIN_AUDIO_MODELS.update(
|
|
74
|
-
dict(
|
|
75
|
-
(spec["model_name"], AudioModelFamilyV1(**spec))
|
|
76
|
-
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
77
|
-
if not _need_filter(spec)
|
|
78
|
-
)
|
|
79
|
-
)
|
|
80
|
-
for model_name, model_spec in BUILTIN_AUDIO_MODELS.items():
|
|
81
|
-
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
82
|
-
|
|
83
|
-
MODELSCOPE_AUDIO_MODELS.update(
|
|
84
|
-
dict(
|
|
85
|
-
(spec["model_name"], AudioModelFamilyV1(**spec))
|
|
86
|
-
for spec in json.load(
|
|
87
|
-
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
88
|
-
)
|
|
89
|
-
if not _need_filter(spec)
|
|
90
|
-
)
|
|
91
|
-
)
|
|
92
|
-
for model_name, model_spec in MODELSCOPE_AUDIO_MODELS.items():
|
|
93
|
-
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
69
|
+
load_model_family_from_json("model_spec.json", BUILTIN_AUDIO_MODELS)
|
|
70
|
+
load_model_family_from_json("model_spec_modelscope.json", MODELSCOPE_AUDIO_MODELS)
|
|
94
71
|
|
|
95
72
|
# register model description after recording model revision
|
|
96
73
|
for model_spec_info in [BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS]:
|
|
@@ -104,5 +81,17 @@ def _install():
|
|
|
104
81
|
for ud_audio in get_user_defined_audios():
|
|
105
82
|
AUDIO_MODEL_DESCRIPTIONS.update(generate_audio_description(ud_audio))
|
|
106
83
|
|
|
107
|
-
|
|
108
|
-
|
|
84
|
+
|
|
85
|
+
def load_model_family_from_json(json_filename, target_families):
|
|
86
|
+
json_path = os.path.join(os.path.dirname(__file__), json_filename)
|
|
87
|
+
target_families.update(
|
|
88
|
+
dict(
|
|
89
|
+
(spec["model_name"], AudioModelFamilyV1(**spec))
|
|
90
|
+
for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
|
|
91
|
+
if not _need_filter(spec)
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
for model_name, model_spec in target_families.items():
|
|
95
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
96
|
+
|
|
97
|
+
del json_path
|
xinference/model/audio/core.py
CHANGED
|
@@ -52,7 +52,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
|
|
|
52
52
|
model_revision: Optional[str]
|
|
53
53
|
multilingual: bool
|
|
54
54
|
language: Optional[str]
|
|
55
|
-
model_ability: Optional[str]
|
|
55
|
+
model_ability: Optional[List[str]]
|
|
56
56
|
default_model_config: Optional[Dict[str, Any]]
|
|
57
57
|
default_transcription_config: Optional[Dict[str, Any]]
|
|
58
58
|
engine: Optional[str]
|
|
@@ -82,6 +82,7 @@ class AudioModelDescription(ModelDescription):
|
|
|
82
82
|
"model_name": self._model_spec.model_name,
|
|
83
83
|
"model_family": self._model_spec.model_family,
|
|
84
84
|
"model_revision": self._model_spec.model_revision,
|
|
85
|
+
"model_ability": self._model_spec.model_ability,
|
|
85
86
|
}
|
|
86
87
|
|
|
87
88
|
def to_version_info(self):
|
|
@@ -55,10 +55,12 @@ class CosyVoiceModel:
|
|
|
55
55
|
thirdparty_dir = os.path.join(os.path.dirname(__file__), "../../thirdparty")
|
|
56
56
|
sys.path.insert(0, thirdparty_dir)
|
|
57
57
|
|
|
58
|
+
kwargs = {}
|
|
58
59
|
if "CosyVoice2" in self._model_spec.model_name:
|
|
59
60
|
from cosyvoice.cli.cosyvoice import CosyVoice2 as CosyVoice
|
|
60
61
|
|
|
61
62
|
self._is_cosyvoice2 = True
|
|
63
|
+
kwargs = {"use_flow_cache": self._kwargs.get("use_flow_cache", False)}
|
|
62
64
|
else:
|
|
63
65
|
from cosyvoice.cli.cosyvoice import CosyVoice
|
|
64
66
|
|
|
@@ -69,7 +71,7 @@ class CosyVoiceModel:
|
|
|
69
71
|
"compile", False
|
|
70
72
|
)
|
|
71
73
|
logger.info("Loading CosyVoice model, compile=%s...", load_jit)
|
|
72
|
-
self._model = CosyVoice(self._model_path, load_jit=load_jit)
|
|
74
|
+
self._model = CosyVoice(self._model_path, load_jit=load_jit, **kwargs)
|
|
73
75
|
if self._is_cosyvoice2:
|
|
74
76
|
spk2info_file = os.path.join(thirdparty_dir, "cosyvoice/bin/spk2info.pt")
|
|
75
77
|
self._model.frontend.spk2info = torch.load(
|
|
@@ -112,7 +114,7 @@ class CosyVoiceModel:
|
|
|
112
114
|
input, prompt_speech_16k, stream=stream
|
|
113
115
|
)
|
|
114
116
|
else:
|
|
115
|
-
available_speakers = self._model.
|
|
117
|
+
available_speakers = self._model.list_available_spks()
|
|
116
118
|
if not voice:
|
|
117
119
|
voice = available_speakers[0]
|
|
118
120
|
logger.info("Auto select speaker: %s", voice)
|