xinference 0.14.4.post1__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +51 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +5 -39
- xinference/client/restful/restful_client.py +3 -24
- xinference/conftest.py +1 -1
- xinference/constants.py +5 -0
- xinference/core/cache_tracker.py +1 -1
- xinference/core/chat_interface.py +8 -14
- xinference/core/event.py +1 -1
- xinference/core/model.py +82 -31
- xinference/core/scheduler.py +37 -37
- xinference/core/status_guard.py +1 -1
- xinference/core/supervisor.py +11 -10
- xinference/core/utils.py +80 -22
- xinference/core/worker.py +17 -16
- xinference/deploy/cmdline.py +8 -16
- xinference/deploy/local.py +1 -1
- xinference/deploy/supervisor.py +1 -1
- xinference/deploy/utils.py +1 -1
- xinference/deploy/worker.py +1 -1
- xinference/model/audio/cosyvoice.py +86 -41
- xinference/model/embedding/core.py +52 -31
- xinference/model/image/stable_diffusion/core.py +18 -1
- xinference/model/llm/__init__.py +21 -11
- xinference/model/llm/llama_cpp/core.py +16 -33
- xinference/model/llm/llm_family.json +619 -1297
- xinference/model/llm/llm_family.py +31 -52
- xinference/model/llm/llm_family_csghub.json +18 -35
- xinference/model/llm/llm_family_modelscope.json +573 -1119
- xinference/model/llm/lmdeploy/core.py +56 -88
- xinference/model/llm/mlx/core.py +46 -69
- xinference/model/llm/sglang/core.py +33 -18
- xinference/model/llm/transformers/chatglm.py +167 -305
- xinference/model/llm/transformers/cogvlm2.py +36 -63
- xinference/model/llm/transformers/cogvlm2_video.py +33 -223
- xinference/model/llm/transformers/core.py +49 -50
- xinference/model/llm/transformers/deepseek_vl.py +53 -96
- xinference/model/llm/transformers/glm4v.py +55 -111
- xinference/model/llm/transformers/intern_vl.py +39 -70
- xinference/model/llm/transformers/internlm2.py +32 -54
- xinference/model/llm/transformers/minicpmv25.py +22 -55
- xinference/model/llm/transformers/minicpmv26.py +158 -68
- xinference/model/llm/transformers/omnilmm.py +5 -28
- xinference/model/llm/transformers/qwen2_vl.py +208 -0
- xinference/model/llm/transformers/qwen_vl.py +34 -86
- xinference/model/llm/transformers/utils.py +32 -38
- xinference/model/llm/transformers/yi_vl.py +32 -72
- xinference/model/llm/utils.py +195 -489
- xinference/model/llm/vllm/core.py +153 -100
- xinference/model/rerank/core.py +41 -8
- xinference/model/rerank/model_spec.json +7 -0
- xinference/model/rerank/model_spec_modelscope.json +7 -1
- xinference/model/utils.py +1 -31
- xinference/thirdparty/cosyvoice/bin/export_jit.py +64 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.py +8 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +38 -22
- xinference/thirdparty/cosyvoice/cli/model.py +139 -26
- xinference/thirdparty/cosyvoice/flow/flow.py +15 -9
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +20 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +8 -4
- xinference/thirdparty/cosyvoice/llm/llm.py +14 -13
- xinference/thirdparty/cosyvoice/transformer/attention.py +7 -3
- xinference/thirdparty/cosyvoice/transformer/decoder.py +1 -1
- xinference/thirdparty/cosyvoice/transformer/embedding.py +4 -3
- xinference/thirdparty/cosyvoice/transformer/encoder.py +4 -2
- xinference/thirdparty/cosyvoice/utils/common.py +36 -0
- xinference/thirdparty/cosyvoice/utils/file_utils.py +16 -0
- xinference/thirdparty/deepseek_vl/serve/assets/Kelpy-Codos.js +100 -0
- xinference/thirdparty/deepseek_vl/serve/assets/avatar.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.css +355 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.js +22 -0
- xinference/thirdparty/deepseek_vl/serve/assets/favicon.ico +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/app.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/chart.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/mirror.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/pipeline.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/puzzle.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/rap.jpeg +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/base.yaml +87 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +34 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/r_8_alpha_16.yaml +4 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +83 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text-data.proto +24 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/README.md +27 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/.gitignore +114 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/README.md +36 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/css/style.css +161 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/html/footer.html +11 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/js/animate.js +69 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/README.md +59 -0
- xinference/thirdparty/matcha/VERSION +1 -0
- xinference/thirdparty/matcha/hifigan/LICENSE +21 -0
- xinference/thirdparty/matcha/hifigan/README.md +101 -0
- xinference/thirdparty/omnilmm/LICENSE +201 -0
- xinference/thirdparty/whisper/__init__.py +156 -0
- xinference/thirdparty/whisper/__main__.py +3 -0
- xinference/thirdparty/whisper/assets/gpt2.tiktoken +50256 -0
- xinference/thirdparty/whisper/assets/mel_filters.npz +0 -0
- xinference/thirdparty/whisper/assets/multilingual.tiktoken +50257 -0
- xinference/thirdparty/whisper/audio.py +157 -0
- xinference/thirdparty/whisper/decoding.py +826 -0
- xinference/thirdparty/whisper/model.py +314 -0
- xinference/thirdparty/whisper/normalizers/__init__.py +2 -0
- xinference/thirdparty/whisper/normalizers/basic.py +76 -0
- xinference/thirdparty/whisper/normalizers/english.json +1741 -0
- xinference/thirdparty/whisper/normalizers/english.py +550 -0
- xinference/thirdparty/whisper/timing.py +386 -0
- xinference/thirdparty/whisper/tokenizer.py +395 -0
- xinference/thirdparty/whisper/transcribe.py +605 -0
- xinference/thirdparty/whisper/triton_ops.py +109 -0
- xinference/thirdparty/whisper/utils.py +316 -0
- xinference/thirdparty/whisper/version.py +1 -0
- xinference/types.py +7 -49
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.4bafd904.css → main.632e9148.css} +2 -2
- xinference/web/ui/build/static/css/main.632e9148.css.map +1 -0
- xinference/web/ui/build/static/js/main.9cfafbd6.js +3 -0
- xinference/web/ui/build/static/js/{main.eb13fe95.js.LICENSE.txt → main.9cfafbd6.js.LICENSE.txt} +2 -0
- xinference/web/ui/build/static/js/main.9cfafbd6.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/10c69dc7a296779fcffedeff9393d832dfcb0013c36824adf623d3c518b801ff.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77d50223f3e734d4485cca538cb098a8c3a7a0a1a9f01f58cdda3af42fe1adf5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a56d5a642409a84988891089c98ca28ad0546432dfbae8aaa51bc5a280e1cdd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9ff696a3e3471f01b46c63d18af32e491eb5dc0e43cb30202c96871466df57f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +37 -0
- xinference/web/ui/node_modules/a-sync-waterfall/package.json +21 -0
- xinference/web/ui/node_modules/nunjucks/node_modules/commander/package.json +48 -0
- xinference/web/ui/node_modules/nunjucks/package.json +112 -0
- xinference/web/ui/package-lock.json +38 -0
- xinference/web/ui/package.json +1 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.0.dist-info}/METADATA +8 -8
- {xinference-0.14.4.post1.dist-info → xinference-0.15.0.dist-info}/RECORD +141 -87
- xinference/model/llm/transformers/llama_2.py +0 -108
- xinference/web/ui/build/static/css/main.4bafd904.css.map +0 -1
- xinference/web/ui/build/static/js/main.eb13fe95.js +0 -3
- xinference/web/ui/build/static/js/main.eb13fe95.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +0 -1
- {xinference-0.14.4.post1.dist-info → xinference-0.15.0.dist-info}/LICENSE +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.0.dist-info}/WHEEL +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.0.dist-info}/top_level.txt +0 -0
xinference/_compat.py
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from typing import Dict, Iterable, List, Literal, Optional, Union
|
|
15
|
+
|
|
14
16
|
from pydantic.version import VERSION as PYDANTIC_VERSION
|
|
15
17
|
|
|
16
18
|
PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
|
|
@@ -50,3 +52,52 @@ else:
|
|
|
50
52
|
from pydantic.parse import load_str_bytes # noqa: F401
|
|
51
53
|
from pydantic.types import StrBytes # noqa: F401
|
|
52
54
|
from pydantic.utils import ROOT_KEY # noqa: F401
|
|
55
|
+
|
|
56
|
+
from openai.types.chat.chat_completion_named_tool_choice_param import (
|
|
57
|
+
ChatCompletionNamedToolChoiceParam,
|
|
58
|
+
)
|
|
59
|
+
from openai.types.chat.chat_completion_stream_options_param import (
|
|
60
|
+
ChatCompletionStreamOptionsParam,
|
|
61
|
+
)
|
|
62
|
+
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
63
|
+
|
|
64
|
+
OpenAIChatCompletionStreamOptionsParam = create_model_from_typeddict(
|
|
65
|
+
ChatCompletionStreamOptionsParam
|
|
66
|
+
)
|
|
67
|
+
OpenAIChatCompletionToolParam = create_model_from_typeddict(ChatCompletionToolParam)
|
|
68
|
+
OpenAIChatCompletionNamedToolChoiceParam = create_model_from_typeddict(
|
|
69
|
+
ChatCompletionNamedToolChoiceParam
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class CreateChatCompletionOpenAI(BaseModel):
|
|
74
|
+
"""
|
|
75
|
+
Comes from source code: https://github.com/openai/openai-python/blob/main/src/openai/types/chat/completion_create_params.py
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
messages: List[Dict]
|
|
79
|
+
model: str
|
|
80
|
+
frequency_penalty: Optional[float]
|
|
81
|
+
logit_bias: Optional[Dict[str, int]]
|
|
82
|
+
logprobs: Optional[bool]
|
|
83
|
+
max_tokens: Optional[int]
|
|
84
|
+
n: Optional[int]
|
|
85
|
+
parallel_tool_calls: Optional[bool]
|
|
86
|
+
presence_penalty: Optional[float]
|
|
87
|
+
# we do not support this
|
|
88
|
+
# response_format: ResponseFormat
|
|
89
|
+
seed: Optional[int]
|
|
90
|
+
service_tier: Optional[Literal["auto", "default"]]
|
|
91
|
+
stop: Union[Optional[str], List[str]]
|
|
92
|
+
stream_options: Optional[OpenAIChatCompletionStreamOptionsParam] # type: ignore
|
|
93
|
+
temperature: Optional[float]
|
|
94
|
+
tool_choice: Optional[ # type: ignore
|
|
95
|
+
Union[
|
|
96
|
+
Literal["none", "auto", "required"],
|
|
97
|
+
OpenAIChatCompletionNamedToolChoiceParam,
|
|
98
|
+
]
|
|
99
|
+
]
|
|
100
|
+
tools: Optional[Iterable[OpenAIChatCompletionToolParam]] # type: ignore
|
|
101
|
+
top_logprobs: Optional[int]
|
|
102
|
+
top_p: Optional[float]
|
|
103
|
+
user: Optional[str]
|
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-09-
|
|
11
|
+
"date": "2024-09-06T16:29:42+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.
|
|
14
|
+
"full-revisionid": "e2618be96293f112709c9ceed639a3443455a0e7",
|
|
15
|
+
"version": "0.15.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -57,9 +57,7 @@ from ..core.event import Event, EventCollectorActor, EventType
|
|
|
57
57
|
from ..core.supervisor import SupervisorActor
|
|
58
58
|
from ..core.utils import json_dumps
|
|
59
59
|
from ..types import (
|
|
60
|
-
SPECIAL_TOOL_PROMPT,
|
|
61
60
|
ChatCompletion,
|
|
62
|
-
ChatCompletionMessage,
|
|
63
61
|
Completion,
|
|
64
62
|
CreateChatCompletion,
|
|
65
63
|
CreateCompletion,
|
|
@@ -199,14 +197,14 @@ class RESTfulAPI:
|
|
|
199
197
|
async def _get_supervisor_ref(self) -> xo.ActorRefType[SupervisorActor]:
|
|
200
198
|
if self._supervisor_ref is None:
|
|
201
199
|
self._supervisor_ref = await xo.actor_ref(
|
|
202
|
-
address=self._supervisor_address, uid=SupervisorActor.
|
|
200
|
+
address=self._supervisor_address, uid=SupervisorActor.default_uid()
|
|
203
201
|
)
|
|
204
202
|
return self._supervisor_ref
|
|
205
203
|
|
|
206
204
|
async def _get_event_collector_ref(self) -> xo.ActorRefType[EventCollectorActor]:
|
|
207
205
|
if self._event_collector_ref is None:
|
|
208
206
|
self._event_collector_ref = await xo.actor_ref(
|
|
209
|
-
address=self._supervisor_address, uid=EventCollectorActor.
|
|
207
|
+
address=self._supervisor_address, uid=EventCollectorActor.default_uid()
|
|
210
208
|
)
|
|
211
209
|
return self._event_collector_ref
|
|
212
210
|
|
|
@@ -1627,33 +1625,7 @@ class RESTfulAPI:
|
|
|
1627
1625
|
status_code=400, detail="Invalid input. Please specify the prompt."
|
|
1628
1626
|
)
|
|
1629
1627
|
|
|
1630
|
-
system_messages: List["ChatCompletionMessage"] = []
|
|
1631
|
-
system_messages_contents = []
|
|
1632
|
-
non_system_messages = []
|
|
1633
|
-
for msg in messages:
|
|
1634
|
-
assert (
|
|
1635
|
-
msg.get("content") != SPECIAL_TOOL_PROMPT
|
|
1636
|
-
), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
|
|
1637
|
-
if msg["role"] == "system":
|
|
1638
|
-
system_messages_contents.append(msg["content"])
|
|
1639
|
-
else:
|
|
1640
|
-
non_system_messages.append(msg)
|
|
1641
|
-
system_messages.append(
|
|
1642
|
-
{"role": "system", "content": ". ".join(system_messages_contents)}
|
|
1643
|
-
)
|
|
1644
|
-
|
|
1645
1628
|
has_tool_message = messages[-1].get("role") == "tool"
|
|
1646
|
-
if has_tool_message:
|
|
1647
|
-
prompt = SPECIAL_TOOL_PROMPT
|
|
1648
|
-
system_prompt = system_messages[0]["content"] if system_messages else None
|
|
1649
|
-
chat_history = non_system_messages # exclude the prompt
|
|
1650
|
-
else:
|
|
1651
|
-
prompt = None
|
|
1652
|
-
if non_system_messages:
|
|
1653
|
-
prompt = non_system_messages[-1]["content"]
|
|
1654
|
-
system_prompt = system_messages[0]["content"] if system_messages else None
|
|
1655
|
-
chat_history = non_system_messages[:-1] # exclude the prompt
|
|
1656
|
-
|
|
1657
1629
|
model_uid = body.model
|
|
1658
1630
|
|
|
1659
1631
|
try:
|
|
@@ -1681,9 +1653,7 @@ class RESTfulAPI:
|
|
|
1681
1653
|
from ..model.llm.utils import GLM4_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY
|
|
1682
1654
|
|
|
1683
1655
|
model_family = desc.get("model_family", "")
|
|
1684
|
-
function_call_models =
|
|
1685
|
-
["gorilla-openfunctions-v1"] + QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
|
|
1686
|
-
)
|
|
1656
|
+
function_call_models = QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
|
|
1687
1657
|
|
|
1688
1658
|
if model_family not in function_call_models:
|
|
1689
1659
|
if body.tools:
|
|
@@ -1716,9 +1686,7 @@ class RESTfulAPI:
|
|
|
1716
1686
|
try:
|
|
1717
1687
|
try:
|
|
1718
1688
|
iterator = await model.chat(
|
|
1719
|
-
|
|
1720
|
-
system_prompt,
|
|
1721
|
-
chat_history,
|
|
1689
|
+
messages,
|
|
1722
1690
|
kwargs,
|
|
1723
1691
|
raw_params=raw_kwargs,
|
|
1724
1692
|
)
|
|
@@ -1750,9 +1718,7 @@ class RESTfulAPI:
|
|
|
1750
1718
|
else:
|
|
1751
1719
|
try:
|
|
1752
1720
|
data = await model.chat(
|
|
1753
|
-
|
|
1754
|
-
system_prompt,
|
|
1755
|
-
chat_history,
|
|
1721
|
+
messages,
|
|
1756
1722
|
kwargs,
|
|
1757
1723
|
raw_params=raw_kwargs,
|
|
1758
1724
|
)
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import json
|
|
15
15
|
import typing
|
|
16
|
-
import warnings
|
|
17
16
|
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
|
|
18
17
|
|
|
19
18
|
import requests
|
|
@@ -470,9 +469,7 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
|
|
|
470
469
|
class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
471
470
|
def chat(
|
|
472
471
|
self,
|
|
473
|
-
|
|
474
|
-
system_prompt: Optional[str] = None,
|
|
475
|
-
chat_history: Optional[List["ChatCompletionMessage"]] = None,
|
|
472
|
+
messages: List[Dict],
|
|
476
473
|
tools: Optional[List[Dict]] = None,
|
|
477
474
|
generate_config: Optional[
|
|
478
475
|
Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
|
|
@@ -483,11 +480,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
|
483
480
|
|
|
484
481
|
Parameters
|
|
485
482
|
----------
|
|
486
|
-
|
|
487
|
-
The user's input.
|
|
488
|
-
system_prompt: Optional[str]
|
|
489
|
-
The system context provide to Model prior to any chats.
|
|
490
|
-
chat_history: Optional[List["ChatCompletionMessage"]]
|
|
483
|
+
messages: List[Dict]
|
|
491
484
|
A list of messages comprising the conversation so far.
|
|
492
485
|
tools: Optional[List[Dict]]
|
|
493
486
|
A tool list.
|
|
@@ -509,25 +502,11 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
|
509
502
|
Report the failure to generate the chat from the server. Detailed information provided in error message.
|
|
510
503
|
|
|
511
504
|
"""
|
|
512
|
-
warnings.warn(
|
|
513
|
-
"The parameters `prompt`, `system_prompt` and `chat_history` will be deprecated in version v0.15.0, "
|
|
514
|
-
"and will be replaced by the parameter `messages`, "
|
|
515
|
-
"similar to the OpenAI API: https://platform.openai.com/docs/guides/chat-completions/getting-started",
|
|
516
|
-
category=DeprecationWarning,
|
|
517
|
-
stacklevel=2,
|
|
518
|
-
)
|
|
519
|
-
|
|
520
505
|
url = f"{self._base_url}/v1/chat/completions"
|
|
521
506
|
|
|
522
|
-
if chat_history is None:
|
|
523
|
-
chat_history = []
|
|
524
|
-
|
|
525
|
-
chat_history = handle_system_prompts(chat_history, system_prompt)
|
|
526
|
-
chat_history.append({"role": "user", "content": prompt}) # type: ignore
|
|
527
|
-
|
|
528
507
|
request_body: Dict[str, Any] = {
|
|
529
508
|
"model": self._model_uid,
|
|
530
|
-
"messages":
|
|
509
|
+
"messages": messages,
|
|
531
510
|
}
|
|
532
511
|
if tools is not None:
|
|
533
512
|
request_body["tools"] = tools
|
xinference/conftest.py
CHANGED
|
@@ -144,7 +144,7 @@ async def _start_test_cluster(
|
|
|
144
144
|
address=f"test://{address}", logging_conf=logging_conf
|
|
145
145
|
)
|
|
146
146
|
await xo.create_actor(
|
|
147
|
-
SupervisorActor, address=address, uid=SupervisorActor.
|
|
147
|
+
SupervisorActor, address=address, uid=SupervisorActor.default_uid()
|
|
148
148
|
)
|
|
149
149
|
await start_worker_components(
|
|
150
150
|
address=address,
|
xinference/constants.py
CHANGED
|
@@ -38,6 +38,10 @@ def get_xinference_home() -> str:
|
|
|
38
38
|
# if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
|
|
39
39
|
os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
|
|
40
40
|
os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
|
|
41
|
+
# In multi-tenant mode,
|
|
42
|
+
# gradio's temporary files are stored in their respective home directories,
|
|
43
|
+
# to prevent insufficient permissions
|
|
44
|
+
os.environ["GRADIO_TEMP_DIR"] = os.path.join(home_path, "tmp", "gradio")
|
|
41
45
|
return home_path
|
|
42
46
|
|
|
43
47
|
|
|
@@ -59,6 +63,7 @@ XINFERENCE_DEFAULT_ENDPOINT_PORT = 9997
|
|
|
59
63
|
XINFERENCE_DEFAULT_LOG_FILE_NAME = "xinference.log"
|
|
60
64
|
XINFERENCE_LOG_MAX_BYTES = 100 * 1024 * 1024
|
|
61
65
|
XINFERENCE_LOG_BACKUP_COUNT = 30
|
|
66
|
+
XINFERENCE_LOG_ARG_MAX_LENGTH = 100
|
|
62
67
|
XINFERENCE_HEALTH_CHECK_FAILURE_THRESHOLD = int(
|
|
63
68
|
os.environ.get(XINFERENCE_ENV_HEALTH_CHECK_FAILURE_THRESHOLD, 5)
|
|
64
69
|
)
|
xinference/core/cache_tracker.py
CHANGED
|
@@ -16,7 +16,7 @@ import base64
|
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
18
|
from io import BytesIO
|
|
19
|
-
from typing import Generator, List, Optional
|
|
19
|
+
from typing import Dict, Generator, List, Optional
|
|
20
20
|
|
|
21
21
|
import gradio as gr
|
|
22
22
|
import PIL.Image
|
|
@@ -27,7 +27,6 @@ from ..client.restful.restful_client import (
|
|
|
27
27
|
RESTfulChatModelHandle,
|
|
28
28
|
RESTfulGenerateModelHandle,
|
|
29
29
|
)
|
|
30
|
-
from ..types import ChatCompletionMessage
|
|
31
30
|
|
|
32
31
|
logger = logging.getLogger(__name__)
|
|
33
32
|
|
|
@@ -96,11 +95,11 @@ class GradioInterface:
|
|
|
96
95
|
flat_list += row
|
|
97
96
|
return flat_list
|
|
98
97
|
|
|
99
|
-
def to_chat(lst: List[str]) -> List[
|
|
98
|
+
def to_chat(lst: List[str]) -> List[Dict]:
|
|
100
99
|
res = []
|
|
101
100
|
for i in range(len(lst)):
|
|
102
101
|
role = "assistant" if i % 2 == 1 else "user"
|
|
103
|
-
res.append(
|
|
102
|
+
res.append(dict(role=role, content=lst[i]))
|
|
104
103
|
return res
|
|
105
104
|
|
|
106
105
|
def generate_wrapper(
|
|
@@ -116,11 +115,12 @@ class GradioInterface:
|
|
|
116
115
|
client._set_token(self._access_token)
|
|
117
116
|
model = client.get_model(self.model_uid)
|
|
118
117
|
assert isinstance(model, RESTfulChatModelHandle)
|
|
118
|
+
messages = to_chat(flatten(history))
|
|
119
|
+
messages.append(dict(role="user", content=message))
|
|
119
120
|
|
|
120
121
|
response_content = ""
|
|
121
122
|
for chunk in model.chat(
|
|
122
|
-
|
|
123
|
-
chat_history=to_chat(flatten(history)),
|
|
123
|
+
messages,
|
|
124
124
|
generate_config={
|
|
125
125
|
"max_tokens": int(max_tokens),
|
|
126
126
|
"temperature": temperature,
|
|
@@ -191,15 +191,10 @@ class GradioInterface:
|
|
|
191
191
|
model = client.get_model(self.model_uid)
|
|
192
192
|
assert isinstance(model, RESTfulChatModelHandle)
|
|
193
193
|
|
|
194
|
-
prompt = history[-1]
|
|
195
|
-
assert prompt["role"] == "user"
|
|
196
|
-
prompt = prompt["content"]
|
|
197
|
-
# multimodal chat does not support stream.
|
|
198
194
|
if stream:
|
|
199
195
|
response_content = ""
|
|
200
196
|
for chunk in model.chat(
|
|
201
|
-
|
|
202
|
-
chat_history=history[:-1],
|
|
197
|
+
messages=history,
|
|
203
198
|
generate_config={
|
|
204
199
|
"max_tokens": max_tokens,
|
|
205
200
|
"temperature": temperature,
|
|
@@ -224,8 +219,7 @@ class GradioInterface:
|
|
|
224
219
|
yield history, bot
|
|
225
220
|
else:
|
|
226
221
|
response = model.chat(
|
|
227
|
-
|
|
228
|
-
chat_history=history[:-1],
|
|
222
|
+
messages=history,
|
|
229
223
|
generate_config={
|
|
230
224
|
"max_tokens": max_tokens,
|
|
231
225
|
"temperature": temperature,
|
xinference/core/event.py
CHANGED
xinference/core/model.py
CHANGED
|
@@ -19,6 +19,7 @@ import json
|
|
|
19
19
|
import os
|
|
20
20
|
import time
|
|
21
21
|
import types
|
|
22
|
+
import uuid
|
|
22
23
|
import weakref
|
|
23
24
|
from asyncio.queues import Queue
|
|
24
25
|
from asyncio.tasks import wait_for
|
|
@@ -65,7 +66,12 @@ except ImportError:
|
|
|
65
66
|
OutOfMemoryError = _OutOfMemoryError
|
|
66
67
|
|
|
67
68
|
|
|
68
|
-
XINFERENCE_BATCHING_ALLOWED_VISION_MODELS = [
|
|
69
|
+
XINFERENCE_BATCHING_ALLOWED_VISION_MODELS = [
|
|
70
|
+
"qwen-vl-chat",
|
|
71
|
+
"cogvlm2",
|
|
72
|
+
"glm-4v",
|
|
73
|
+
"MiniCPM-V-2.6",
|
|
74
|
+
]
|
|
69
75
|
|
|
70
76
|
|
|
71
77
|
def request_limit(fn):
|
|
@@ -265,7 +271,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
265
271
|
|
|
266
272
|
if self._worker_ref is None:
|
|
267
273
|
self._worker_ref = await xo.actor_ref(
|
|
268
|
-
address=self._worker_address, uid=WorkerActor.
|
|
274
|
+
address=self._worker_address, uid=WorkerActor.default_uid()
|
|
269
275
|
)
|
|
270
276
|
return self._worker_ref
|
|
271
277
|
|
|
@@ -434,23 +440,35 @@ class ModelActor(xo.StatelessActor):
|
|
|
434
440
|
assert output_type == "binary", f"Unknown output type '{output_type}'"
|
|
435
441
|
return ret
|
|
436
442
|
|
|
437
|
-
@log_async(logger=logger)
|
|
438
443
|
@request_limit
|
|
439
444
|
@xo.generator
|
|
445
|
+
@log_async(logger=logger)
|
|
440
446
|
async def generate(self, prompt: str, *args, **kwargs):
|
|
441
447
|
if self.allow_batching():
|
|
448
|
+
# not support request_id
|
|
449
|
+
kwargs.pop("request_id", None)
|
|
442
450
|
return await self.handle_batching_request(
|
|
443
451
|
prompt, "generate", *args, **kwargs
|
|
444
452
|
)
|
|
445
453
|
else:
|
|
446
454
|
kwargs.pop("raw_params", None)
|
|
447
455
|
if hasattr(self._model, "generate"):
|
|
456
|
+
# not support request_id
|
|
457
|
+
kwargs.pop("request_id", None)
|
|
448
458
|
return await self._call_wrapper_json(
|
|
449
459
|
self._model.generate, prompt, *args, **kwargs
|
|
450
460
|
)
|
|
451
461
|
if hasattr(self._model, "async_generate"):
|
|
462
|
+
if "request_id" not in kwargs:
|
|
463
|
+
kwargs["request_id"] = str(uuid.uuid1())
|
|
464
|
+
else:
|
|
465
|
+
# model only accept string
|
|
466
|
+
kwargs["request_id"] = str(kwargs["request_id"])
|
|
452
467
|
return await self._call_wrapper_json(
|
|
453
|
-
self._model.async_generate,
|
|
468
|
+
self._model.async_generate,
|
|
469
|
+
prompt,
|
|
470
|
+
*args,
|
|
471
|
+
**kwargs,
|
|
454
472
|
)
|
|
455
473
|
raise AttributeError(f"Model {self._model.model_spec} is not for generate.")
|
|
456
474
|
|
|
@@ -481,22 +499,27 @@ class ModelActor(xo.StatelessActor):
|
|
|
481
499
|
yield res
|
|
482
500
|
|
|
483
501
|
@staticmethod
|
|
484
|
-
def _get_stream_from_args(
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
return False if args[2] is None else args[2].get("stream", False)
|
|
488
|
-
else:
|
|
489
|
-
assert args[0] is None or isinstance(args[0], dict)
|
|
490
|
-
return False if args[0] is None else args[0].get("stream", False)
|
|
502
|
+
def _get_stream_from_args(*args) -> bool:
|
|
503
|
+
assert args[0] is None or isinstance(args[0], dict)
|
|
504
|
+
return False if args[0] is None else args[0].get("stream", False)
|
|
491
505
|
|
|
492
|
-
async def handle_batching_request(
|
|
493
|
-
|
|
506
|
+
async def handle_batching_request(
|
|
507
|
+
self, prompt_or_messages: Union[str, List[Dict]], call_ability, *args, **kwargs
|
|
508
|
+
):
|
|
509
|
+
"""
|
|
510
|
+
The input parameter `prompt_or_messages`:
|
|
511
|
+
- when the model_ability is `generate`, it's `prompt`, which is str type.
|
|
512
|
+
- when the model_ability is `chat`, it's `messages`, which is List[Dict] type.
|
|
513
|
+
"""
|
|
514
|
+
stream = self._get_stream_from_args(*args)
|
|
494
515
|
assert self._scheduler_ref is not None
|
|
495
516
|
if stream:
|
|
496
517
|
assert self._scheduler_ref is not None
|
|
497
518
|
queue: Queue[Any] = Queue()
|
|
498
519
|
ret = self._queue_consumer(queue)
|
|
499
|
-
await self._scheduler_ref.add_request(
|
|
520
|
+
await self._scheduler_ref.add_request(
|
|
521
|
+
prompt_or_messages, queue, call_ability, *args, **kwargs
|
|
522
|
+
)
|
|
500
523
|
gen = self._to_async_gen("json", ret)
|
|
501
524
|
self._current_generator = weakref.ref(gen)
|
|
502
525
|
return gen
|
|
@@ -505,7 +528,9 @@ class ModelActor(xo.StatelessActor):
|
|
|
505
528
|
|
|
506
529
|
assert self._loop is not None
|
|
507
530
|
future = ConcurrentFuture()
|
|
508
|
-
await self._scheduler_ref.add_request(
|
|
531
|
+
await self._scheduler_ref.add_request(
|
|
532
|
+
prompt_or_messages, future, call_ability, *args, **kwargs
|
|
533
|
+
)
|
|
509
534
|
fut = asyncio.wrap_future(future, loop=self._loop)
|
|
510
535
|
result = await fut
|
|
511
536
|
if result == XINFERENCE_NON_STREAMING_ABORT_FLAG:
|
|
@@ -514,27 +539,36 @@ class ModelActor(xo.StatelessActor):
|
|
|
514
539
|
)
|
|
515
540
|
return await asyncio.to_thread(json_dumps, result)
|
|
516
541
|
|
|
517
|
-
@log_async(logger=logger)
|
|
518
542
|
@request_limit
|
|
519
543
|
@xo.generator
|
|
520
|
-
|
|
544
|
+
@log_async(logger=logger)
|
|
545
|
+
async def chat(self, messages: List[Dict], *args, **kwargs):
|
|
521
546
|
start_time = time.time()
|
|
522
547
|
response = None
|
|
523
548
|
try:
|
|
524
549
|
if self.allow_batching():
|
|
550
|
+
# not support request_id
|
|
551
|
+
kwargs.pop("request_id", None)
|
|
525
552
|
return await self.handle_batching_request(
|
|
526
|
-
|
|
553
|
+
messages, "chat", *args, **kwargs
|
|
527
554
|
)
|
|
528
555
|
else:
|
|
529
556
|
kwargs.pop("raw_params", None)
|
|
530
557
|
if hasattr(self._model, "chat"):
|
|
558
|
+
# not support request_id
|
|
559
|
+
kwargs.pop("request_id", None)
|
|
531
560
|
response = await self._call_wrapper_json(
|
|
532
|
-
self._model.chat,
|
|
561
|
+
self._model.chat, messages, *args, **kwargs
|
|
533
562
|
)
|
|
534
563
|
return response
|
|
535
564
|
if hasattr(self._model, "async_chat"):
|
|
565
|
+
if "request_id" not in kwargs:
|
|
566
|
+
kwargs["request_id"] = str(uuid.uuid1())
|
|
567
|
+
else:
|
|
568
|
+
# model only accept string
|
|
569
|
+
kwargs["request_id"] = str(kwargs["request_id"])
|
|
536
570
|
response = await self._call_wrapper_json(
|
|
537
|
-
self._model.async_chat,
|
|
571
|
+
self._model.async_chat, messages, *args, **kwargs
|
|
538
572
|
)
|
|
539
573
|
return response
|
|
540
574
|
raise AttributeError(f"Model {self._model.model_spec} is not for chat.")
|
|
@@ -565,9 +599,10 @@ class ModelActor(xo.StatelessActor):
|
|
|
565
599
|
return await self._scheduler_ref.abort_request(request_id)
|
|
566
600
|
return AbortRequestMessage.NO_OP.name
|
|
567
601
|
|
|
568
|
-
@log_async(logger=logger)
|
|
569
602
|
@request_limit
|
|
603
|
+
@log_async(logger=logger)
|
|
570
604
|
async def create_embedding(self, input: Union[str, List[str]], *args, **kwargs):
|
|
605
|
+
kwargs.pop("request_id", None)
|
|
571
606
|
if hasattr(self._model, "create_embedding"):
|
|
572
607
|
return await self._call_wrapper_json(
|
|
573
608
|
self._model.create_embedding, input, *args, **kwargs
|
|
@@ -577,8 +612,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
577
612
|
f"Model {self._model.model_spec} is not for creating embedding."
|
|
578
613
|
)
|
|
579
614
|
|
|
580
|
-
@log_async(logger=logger)
|
|
581
615
|
@request_limit
|
|
616
|
+
@log_async(logger=logger)
|
|
582
617
|
async def rerank(
|
|
583
618
|
self,
|
|
584
619
|
documents: List[str],
|
|
@@ -590,6 +625,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
590
625
|
*args,
|
|
591
626
|
**kwargs,
|
|
592
627
|
):
|
|
628
|
+
kwargs.pop("request_id", None)
|
|
593
629
|
if hasattr(self._model, "rerank"):
|
|
594
630
|
return await self._call_wrapper_json(
|
|
595
631
|
self._model.rerank,
|
|
@@ -604,8 +640,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
604
640
|
)
|
|
605
641
|
raise AttributeError(f"Model {self._model.model_spec} is not for reranking.")
|
|
606
642
|
|
|
607
|
-
@log_async(logger=logger, args_formatter=lambda _, kwargs: kwargs.pop("audio"))
|
|
608
643
|
@request_limit
|
|
644
|
+
@log_async(logger=logger, ignore_kwargs=["audio"])
|
|
609
645
|
async def transcriptions(
|
|
610
646
|
self,
|
|
611
647
|
audio: bytes,
|
|
@@ -614,7 +650,9 @@ class ModelActor(xo.StatelessActor):
|
|
|
614
650
|
response_format: str = "json",
|
|
615
651
|
temperature: float = 0,
|
|
616
652
|
timestamp_granularities: Optional[List[str]] = None,
|
|
653
|
+
**kwargs,
|
|
617
654
|
):
|
|
655
|
+
kwargs.pop("request_id", None)
|
|
618
656
|
if hasattr(self._model, "transcriptions"):
|
|
619
657
|
return await self._call_wrapper_json(
|
|
620
658
|
self._model.transcriptions,
|
|
@@ -629,8 +667,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
629
667
|
f"Model {self._model.model_spec} is not for creating transcriptions."
|
|
630
668
|
)
|
|
631
669
|
|
|
632
|
-
@log_async(logger=logger, args_formatter=lambda _, kwargs: kwargs.pop("audio"))
|
|
633
670
|
@request_limit
|
|
671
|
+
@log_async(logger=logger, ignore_kwargs=["audio"])
|
|
634
672
|
async def translations(
|
|
635
673
|
self,
|
|
636
674
|
audio: bytes,
|
|
@@ -639,7 +677,9 @@ class ModelActor(xo.StatelessActor):
|
|
|
639
677
|
response_format: str = "json",
|
|
640
678
|
temperature: float = 0,
|
|
641
679
|
timestamp_granularities: Optional[List[str]] = None,
|
|
680
|
+
**kwargs,
|
|
642
681
|
):
|
|
682
|
+
kwargs.pop("request_id", None)
|
|
643
683
|
if hasattr(self._model, "translations"):
|
|
644
684
|
return await self._call_wrapper_json(
|
|
645
685
|
self._model.translations,
|
|
@@ -654,12 +694,9 @@ class ModelActor(xo.StatelessActor):
|
|
|
654
694
|
f"Model {self._model.model_spec} is not for creating translations."
|
|
655
695
|
)
|
|
656
696
|
|
|
657
|
-
@log_async(
|
|
658
|
-
logger=logger,
|
|
659
|
-
args_formatter=lambda _, kwargs: kwargs.pop("prompt_speech", None),
|
|
660
|
-
)
|
|
661
697
|
@request_limit
|
|
662
698
|
@xo.generator
|
|
699
|
+
@log_async(logger=logger, ignore_kwargs=["prompt_speech"])
|
|
663
700
|
async def speech(
|
|
664
701
|
self,
|
|
665
702
|
input: str,
|
|
@@ -669,6 +706,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
669
706
|
stream: bool = False,
|
|
670
707
|
**kwargs,
|
|
671
708
|
):
|
|
709
|
+
kwargs.pop("request_id", None)
|
|
672
710
|
if hasattr(self._model, "speech"):
|
|
673
711
|
return await self._call_wrapper_binary(
|
|
674
712
|
self._model.speech,
|
|
@@ -683,8 +721,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
683
721
|
f"Model {self._model.model_spec} is not for creating speech."
|
|
684
722
|
)
|
|
685
723
|
|
|
686
|
-
@log_async(logger=logger)
|
|
687
724
|
@request_limit
|
|
725
|
+
@log_async(logger=logger)
|
|
688
726
|
async def text_to_image(
|
|
689
727
|
self,
|
|
690
728
|
prompt: str,
|
|
@@ -694,6 +732,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
694
732
|
*args,
|
|
695
733
|
**kwargs,
|
|
696
734
|
):
|
|
735
|
+
kwargs.pop("request_id", None)
|
|
697
736
|
if hasattr(self._model, "text_to_image"):
|
|
698
737
|
return await self._call_wrapper_json(
|
|
699
738
|
self._model.text_to_image,
|
|
@@ -708,6 +747,10 @@ class ModelActor(xo.StatelessActor):
|
|
|
708
747
|
f"Model {self._model.model_spec} is not for creating image."
|
|
709
748
|
)
|
|
710
749
|
|
|
750
|
+
@log_async(
|
|
751
|
+
logger=logger,
|
|
752
|
+
ignore_kwargs=["image"],
|
|
753
|
+
)
|
|
711
754
|
async def image_to_image(
|
|
712
755
|
self,
|
|
713
756
|
image: "PIL.Image",
|
|
@@ -719,6 +762,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
719
762
|
*args,
|
|
720
763
|
**kwargs,
|
|
721
764
|
):
|
|
765
|
+
kwargs.pop("request_id", None)
|
|
722
766
|
if hasattr(self._model, "image_to_image"):
|
|
723
767
|
return await self._call_wrapper_json(
|
|
724
768
|
self._model.image_to_image,
|
|
@@ -735,6 +779,10 @@ class ModelActor(xo.StatelessActor):
|
|
|
735
779
|
f"Model {self._model.model_spec} is not for creating image."
|
|
736
780
|
)
|
|
737
781
|
|
|
782
|
+
@log_async(
|
|
783
|
+
logger=logger,
|
|
784
|
+
ignore_kwargs=["image"],
|
|
785
|
+
)
|
|
738
786
|
async def inpainting(
|
|
739
787
|
self,
|
|
740
788
|
image: "PIL.Image",
|
|
@@ -747,6 +795,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
747
795
|
*args,
|
|
748
796
|
**kwargs,
|
|
749
797
|
):
|
|
798
|
+
kwargs.pop("request_id", None)
|
|
750
799
|
if hasattr(self._model, "inpainting"):
|
|
751
800
|
return await self._call_wrapper_json(
|
|
752
801
|
self._model.inpainting,
|
|
@@ -764,12 +813,13 @@ class ModelActor(xo.StatelessActor):
|
|
|
764
813
|
f"Model {self._model.model_spec} is not for creating image."
|
|
765
814
|
)
|
|
766
815
|
|
|
767
|
-
@log_async(logger=logger)
|
|
768
816
|
@request_limit
|
|
817
|
+
@log_async(logger=logger, ignore_kwargs=["image"])
|
|
769
818
|
async def infer(
|
|
770
819
|
self,
|
|
771
820
|
**kwargs,
|
|
772
821
|
):
|
|
822
|
+
kwargs.pop("request_id", None)
|
|
773
823
|
if hasattr(self._model, "infer"):
|
|
774
824
|
return await self._call_wrapper_json(
|
|
775
825
|
self._model.infer,
|
|
@@ -779,8 +829,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
779
829
|
f"Model {self._model.model_spec} is not for flexible infer."
|
|
780
830
|
)
|
|
781
831
|
|
|
782
|
-
@log_async(logger=logger)
|
|
783
832
|
@request_limit
|
|
833
|
+
@log_async(logger=logger)
|
|
784
834
|
async def text_to_video(
|
|
785
835
|
self,
|
|
786
836
|
prompt: str,
|
|
@@ -788,6 +838,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
788
838
|
*args,
|
|
789
839
|
**kwargs,
|
|
790
840
|
):
|
|
841
|
+
kwargs.pop("request_id", None)
|
|
791
842
|
if hasattr(self._model, "text_to_video"):
|
|
792
843
|
return await self._call_wrapper_json(
|
|
793
844
|
self._model.text_to_video,
|