xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +51 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +209 -40
- xinference/client/restful/restful_client.py +7 -26
- xinference/conftest.py +1 -1
- xinference/constants.py +5 -0
- xinference/core/cache_tracker.py +1 -1
- xinference/core/chat_interface.py +8 -14
- xinference/core/event.py +1 -1
- xinference/core/image_interface.py +28 -0
- xinference/core/model.py +110 -31
- xinference/core/scheduler.py +37 -37
- xinference/core/status_guard.py +1 -1
- xinference/core/supervisor.py +17 -10
- xinference/core/utils.py +80 -22
- xinference/core/worker.py +17 -16
- xinference/deploy/cmdline.py +8 -16
- xinference/deploy/local.py +1 -1
- xinference/deploy/supervisor.py +1 -1
- xinference/deploy/utils.py +1 -1
- xinference/deploy/worker.py +1 -1
- xinference/model/audio/cosyvoice.py +86 -41
- xinference/model/audio/fish_speech.py +9 -9
- xinference/model/audio/model_spec.json +9 -9
- xinference/model/audio/whisper.py +4 -1
- xinference/model/embedding/core.py +52 -31
- xinference/model/image/core.py +2 -1
- xinference/model/image/model_spec.json +16 -4
- xinference/model/image/model_spec_modelscope.json +16 -4
- xinference/model/image/sdapi.py +136 -0
- xinference/model/image/stable_diffusion/core.py +164 -19
- xinference/model/llm/__init__.py +29 -11
- xinference/model/llm/llama_cpp/core.py +16 -33
- xinference/model/llm/llm_family.json +1011 -1296
- xinference/model/llm/llm_family.py +34 -53
- xinference/model/llm/llm_family_csghub.json +18 -35
- xinference/model/llm/llm_family_modelscope.json +981 -1122
- xinference/model/llm/lmdeploy/core.py +56 -88
- xinference/model/llm/mlx/core.py +46 -69
- xinference/model/llm/sglang/core.py +36 -18
- xinference/model/llm/transformers/chatglm.py +168 -306
- xinference/model/llm/transformers/cogvlm2.py +36 -63
- xinference/model/llm/transformers/cogvlm2_video.py +33 -223
- xinference/model/llm/transformers/core.py +55 -50
- xinference/model/llm/transformers/deepseek_v2.py +340 -0
- xinference/model/llm/transformers/deepseek_vl.py +53 -96
- xinference/model/llm/transformers/glm4v.py +55 -111
- xinference/model/llm/transformers/intern_vl.py +39 -70
- xinference/model/llm/transformers/internlm2.py +32 -54
- xinference/model/llm/transformers/minicpmv25.py +22 -55
- xinference/model/llm/transformers/minicpmv26.py +158 -68
- xinference/model/llm/transformers/omnilmm.py +5 -28
- xinference/model/llm/transformers/qwen2_audio.py +168 -0
- xinference/model/llm/transformers/qwen2_vl.py +234 -0
- xinference/model/llm/transformers/qwen_vl.py +34 -86
- xinference/model/llm/transformers/utils.py +32 -38
- xinference/model/llm/transformers/yi_vl.py +32 -72
- xinference/model/llm/utils.py +280 -554
- xinference/model/llm/vllm/core.py +161 -100
- xinference/model/rerank/core.py +41 -8
- xinference/model/rerank/model_spec.json +7 -0
- xinference/model/rerank/model_spec_modelscope.json +7 -1
- xinference/model/utils.py +1 -31
- xinference/thirdparty/cosyvoice/bin/export_jit.py +64 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.py +8 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +38 -22
- xinference/thirdparty/cosyvoice/cli/model.py +139 -26
- xinference/thirdparty/cosyvoice/flow/flow.py +15 -9
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +20 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +8 -4
- xinference/thirdparty/cosyvoice/llm/llm.py +14 -13
- xinference/thirdparty/cosyvoice/transformer/attention.py +7 -3
- xinference/thirdparty/cosyvoice/transformer/decoder.py +1 -1
- xinference/thirdparty/cosyvoice/transformer/embedding.py +4 -3
- xinference/thirdparty/cosyvoice/transformer/encoder.py +4 -2
- xinference/thirdparty/cosyvoice/utils/common.py +36 -0
- xinference/thirdparty/cosyvoice/utils/file_utils.py +16 -0
- xinference/thirdparty/deepseek_vl/serve/assets/Kelpy-Codos.js +100 -0
- xinference/thirdparty/deepseek_vl/serve/assets/avatar.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.css +355 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.js +22 -0
- xinference/thirdparty/deepseek_vl/serve/assets/favicon.ico +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/app.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/chart.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/mirror.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/pipeline.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/puzzle.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/rap.jpeg +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/base.yaml +87 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +33 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/r_8_alpha_16.yaml +4 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +83 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text-data.proto +24 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/README.md +27 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/.gitignore +114 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/README.md +36 -0
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/css/style.css +161 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/html/footer.html +11 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/js/animate.js +69 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
- xinference/thirdparty/fish_speech/tools/api.py +79 -134
- xinference/thirdparty/fish_speech/tools/commons.py +35 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
- xinference/thirdparty/fish_speech/tools/file.py +17 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
- xinference/thirdparty/fish_speech/tools/sensevoice/README.md +59 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
- xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
- xinference/thirdparty/fish_speech/tools/webui.py +12 -146
- xinference/thirdparty/matcha/VERSION +1 -0
- xinference/thirdparty/matcha/hifigan/LICENSE +21 -0
- xinference/thirdparty/matcha/hifigan/README.md +101 -0
- xinference/thirdparty/omnilmm/LICENSE +201 -0
- xinference/thirdparty/whisper/__init__.py +156 -0
- xinference/thirdparty/whisper/__main__.py +3 -0
- xinference/thirdparty/whisper/assets/gpt2.tiktoken +50256 -0
- xinference/thirdparty/whisper/assets/mel_filters.npz +0 -0
- xinference/thirdparty/whisper/assets/multilingual.tiktoken +50257 -0
- xinference/thirdparty/whisper/audio.py +157 -0
- xinference/thirdparty/whisper/decoding.py +826 -0
- xinference/thirdparty/whisper/model.py +314 -0
- xinference/thirdparty/whisper/normalizers/__init__.py +2 -0
- xinference/thirdparty/whisper/normalizers/basic.py +76 -0
- xinference/thirdparty/whisper/normalizers/english.json +1741 -0
- xinference/thirdparty/whisper/normalizers/english.py +550 -0
- xinference/thirdparty/whisper/timing.py +386 -0
- xinference/thirdparty/whisper/tokenizer.py +395 -0
- xinference/thirdparty/whisper/transcribe.py +605 -0
- xinference/thirdparty/whisper/triton_ops.py +109 -0
- xinference/thirdparty/whisper/utils.py +316 -0
- xinference/thirdparty/whisper/version.py +1 -0
- xinference/types.py +14 -53
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.4bafd904.css → main.5061c4c3.css} +2 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
- xinference/web/ui/build/static/js/main.754740c0.js +3 -0
- xinference/web/ui/build/static/js/{main.eb13fe95.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +2 -0
- xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/10c69dc7a296779fcffedeff9393d832dfcb0013c36824adf623d3c518b801ff.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77d50223f3e734d4485cca538cb098a8c3a7a0a1a9f01f58cdda3af42fe1adf5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a56d5a642409a84988891089c98ca28ad0546432dfbae8aaa51bc5a280e1cdd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9ff696a3e3471f01b46c63d18af32e491eb5dc0e43cb30202c96871466df57f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +37 -0
- xinference/web/ui/node_modules/a-sync-waterfall/package.json +21 -0
- xinference/web/ui/node_modules/nunjucks/node_modules/commander/package.json +48 -0
- xinference/web/ui/node_modules/nunjucks/package.json +112 -0
- xinference/web/ui/package-lock.json +38 -0
- xinference/web/ui/package.json +1 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/METADATA +16 -10
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/RECORD +179 -127
- xinference/model/llm/transformers/llama_2.py +0 -108
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
- xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
- xinference/web/ui/build/static/css/main.4bafd904.css.map +0 -1
- xinference/web/ui/build/static/js/main.eb13fe95.js +0 -3
- xinference/web/ui/build/static/js/main.eb13fe95.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +0 -1
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from typing import Dict, Iterable, List, Literal, Optional, Union
|
|
15
|
+
|
|
14
16
|
from pydantic.version import VERSION as PYDANTIC_VERSION
|
|
15
17
|
|
|
16
18
|
PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
|
|
@@ -50,3 +52,52 @@ else:
|
|
|
50
52
|
from pydantic.parse import load_str_bytes # noqa: F401
|
|
51
53
|
from pydantic.types import StrBytes # noqa: F401
|
|
52
54
|
from pydantic.utils import ROOT_KEY # noqa: F401
|
|
55
|
+
|
|
56
|
+
from openai.types.chat.chat_completion_named_tool_choice_param import (
|
|
57
|
+
ChatCompletionNamedToolChoiceParam,
|
|
58
|
+
)
|
|
59
|
+
from openai.types.chat.chat_completion_stream_options_param import (
|
|
60
|
+
ChatCompletionStreamOptionsParam,
|
|
61
|
+
)
|
|
62
|
+
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
63
|
+
|
|
64
|
+
OpenAIChatCompletionStreamOptionsParam = create_model_from_typeddict(
|
|
65
|
+
ChatCompletionStreamOptionsParam
|
|
66
|
+
)
|
|
67
|
+
OpenAIChatCompletionToolParam = create_model_from_typeddict(ChatCompletionToolParam)
|
|
68
|
+
OpenAIChatCompletionNamedToolChoiceParam = create_model_from_typeddict(
|
|
69
|
+
ChatCompletionNamedToolChoiceParam
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class CreateChatCompletionOpenAI(BaseModel):
|
|
74
|
+
"""
|
|
75
|
+
Comes from source code: https://github.com/openai/openai-python/blob/main/src/openai/types/chat/completion_create_params.py
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
messages: List[Dict]
|
|
79
|
+
model: str
|
|
80
|
+
frequency_penalty: Optional[float]
|
|
81
|
+
logit_bias: Optional[Dict[str, int]]
|
|
82
|
+
logprobs: Optional[bool]
|
|
83
|
+
max_tokens: Optional[int]
|
|
84
|
+
n: Optional[int]
|
|
85
|
+
parallel_tool_calls: Optional[bool]
|
|
86
|
+
presence_penalty: Optional[float]
|
|
87
|
+
# we do not support this
|
|
88
|
+
# response_format: ResponseFormat
|
|
89
|
+
seed: Optional[int]
|
|
90
|
+
service_tier: Optional[Literal["auto", "default"]]
|
|
91
|
+
stop: Union[Optional[str], List[str]]
|
|
92
|
+
stream_options: Optional[OpenAIChatCompletionStreamOptionsParam] # type: ignore
|
|
93
|
+
temperature: Optional[float]
|
|
94
|
+
tool_choice: Optional[ # type: ignore
|
|
95
|
+
Union[
|
|
96
|
+
Literal["none", "auto", "required"],
|
|
97
|
+
OpenAIChatCompletionNamedToolChoiceParam,
|
|
98
|
+
]
|
|
99
|
+
]
|
|
100
|
+
tools: Optional[Iterable[OpenAIChatCompletionToolParam]] # type: ignore
|
|
101
|
+
top_logprobs: Optional[int]
|
|
102
|
+
top_p: Optional[float]
|
|
103
|
+
user: Optional[str]
|
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-09-
|
|
11
|
+
"date": "2024-09-14T13:22:13+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.
|
|
14
|
+
"full-revisionid": "961d355102007e3cd7963a353105b2422a31d4fd",
|
|
15
|
+
"version": "0.15.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -57,14 +57,13 @@ from ..core.event import Event, EventCollectorActor, EventType
|
|
|
57
57
|
from ..core.supervisor import SupervisorActor
|
|
58
58
|
from ..core.utils import json_dumps
|
|
59
59
|
from ..types import (
|
|
60
|
-
SPECIAL_TOOL_PROMPT,
|
|
61
60
|
ChatCompletion,
|
|
62
|
-
ChatCompletionMessage,
|
|
63
61
|
Completion,
|
|
64
62
|
CreateChatCompletion,
|
|
65
63
|
CreateCompletion,
|
|
66
64
|
ImageList,
|
|
67
65
|
PeftModelConfig,
|
|
66
|
+
SDAPIResult,
|
|
68
67
|
VideoList,
|
|
69
68
|
max_tokens_field,
|
|
70
69
|
)
|
|
@@ -124,6 +123,43 @@ class TextToImageRequest(BaseModel):
|
|
|
124
123
|
user: Optional[str] = None
|
|
125
124
|
|
|
126
125
|
|
|
126
|
+
class SDAPIOptionsRequest(BaseModel):
|
|
127
|
+
sd_model_checkpoint: Optional[str] = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class SDAPITxt2imgRequst(BaseModel):
|
|
131
|
+
model: Optional[str]
|
|
132
|
+
prompt: Optional[str] = ""
|
|
133
|
+
negative_prompt: Optional[str] = ""
|
|
134
|
+
steps: Optional[int] = None
|
|
135
|
+
seed: Optional[int] = -1
|
|
136
|
+
cfg_scale: Optional[float] = 7.0
|
|
137
|
+
override_settings: Optional[dict] = {}
|
|
138
|
+
width: Optional[int] = 512
|
|
139
|
+
height: Optional[int] = 512
|
|
140
|
+
sampler_name: Optional[str] = None
|
|
141
|
+
denoising_strength: Optional[float] = None
|
|
142
|
+
kwargs: Optional[str] = None
|
|
143
|
+
user: Optional[str] = None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class SDAPIImg2imgRequst(BaseModel):
|
|
147
|
+
model: Optional[str]
|
|
148
|
+
init_images: Optional[list]
|
|
149
|
+
prompt: Optional[str] = ""
|
|
150
|
+
negative_prompt: Optional[str] = ""
|
|
151
|
+
steps: Optional[int] = None
|
|
152
|
+
seed: Optional[int] = -1
|
|
153
|
+
cfg_scale: Optional[float] = 7.0
|
|
154
|
+
override_settings: Optional[dict] = {}
|
|
155
|
+
width: Optional[int] = 512
|
|
156
|
+
height: Optional[int] = 512
|
|
157
|
+
sampler_name: Optional[str] = None
|
|
158
|
+
denoising_strength: Optional[float] = None
|
|
159
|
+
kwargs: Optional[str] = None
|
|
160
|
+
user: Optional[str] = None
|
|
161
|
+
|
|
162
|
+
|
|
127
163
|
class TextToVideoRequest(BaseModel):
|
|
128
164
|
model: str
|
|
129
165
|
prompt: Union[str, List[str]] = Field(description="The input to embed.")
|
|
@@ -165,7 +201,7 @@ class BuildGradioImageInterfaceRequest(BaseModel):
|
|
|
165
201
|
model_name: str
|
|
166
202
|
model_family: str
|
|
167
203
|
model_id: str
|
|
168
|
-
controlnet: Union[None, List[Dict[str, Union[str, None]]]]
|
|
204
|
+
controlnet: Union[None, List[Dict[str, Union[str, dict, None]]]]
|
|
169
205
|
model_revision: str
|
|
170
206
|
model_ability: List[str]
|
|
171
207
|
|
|
@@ -199,14 +235,14 @@ class RESTfulAPI:
|
|
|
199
235
|
async def _get_supervisor_ref(self) -> xo.ActorRefType[SupervisorActor]:
|
|
200
236
|
if self._supervisor_ref is None:
|
|
201
237
|
self._supervisor_ref = await xo.actor_ref(
|
|
202
|
-
address=self._supervisor_address, uid=SupervisorActor.
|
|
238
|
+
address=self._supervisor_address, uid=SupervisorActor.default_uid()
|
|
203
239
|
)
|
|
204
240
|
return self._supervisor_ref
|
|
205
241
|
|
|
206
242
|
async def _get_event_collector_ref(self) -> xo.ActorRefType[EventCollectorActor]:
|
|
207
243
|
if self._event_collector_ref is None:
|
|
208
244
|
self._event_collector_ref = await xo.actor_ref(
|
|
209
|
-
address=self._supervisor_address, uid=EventCollectorActor.
|
|
245
|
+
address=self._supervisor_address, uid=EventCollectorActor.default_uid()
|
|
210
246
|
)
|
|
211
247
|
return self._event_collector_ref
|
|
212
248
|
|
|
@@ -521,6 +557,59 @@ class RESTfulAPI:
|
|
|
521
557
|
else None
|
|
522
558
|
),
|
|
523
559
|
)
|
|
560
|
+
# SD WebUI API
|
|
561
|
+
self._router.add_api_route(
|
|
562
|
+
"/sdapi/v1/options",
|
|
563
|
+
self.sdapi_options,
|
|
564
|
+
methods=["POST"],
|
|
565
|
+
dependencies=(
|
|
566
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
567
|
+
if self.is_authenticated()
|
|
568
|
+
else None
|
|
569
|
+
),
|
|
570
|
+
)
|
|
571
|
+
self._router.add_api_route(
|
|
572
|
+
"/sdapi/v1/sd-models",
|
|
573
|
+
self.sdapi_sd_models,
|
|
574
|
+
methods=["GET"],
|
|
575
|
+
dependencies=(
|
|
576
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
577
|
+
if self.is_authenticated()
|
|
578
|
+
else None
|
|
579
|
+
),
|
|
580
|
+
)
|
|
581
|
+
self._router.add_api_route(
|
|
582
|
+
"/sdapi/v1/samplers",
|
|
583
|
+
self.sdapi_samplers,
|
|
584
|
+
methods=["GET"],
|
|
585
|
+
dependencies=(
|
|
586
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
587
|
+
if self.is_authenticated()
|
|
588
|
+
else None
|
|
589
|
+
),
|
|
590
|
+
)
|
|
591
|
+
self._router.add_api_route(
|
|
592
|
+
"/sdapi/v1/txt2img",
|
|
593
|
+
self.sdapi_txt2img,
|
|
594
|
+
methods=["POST"],
|
|
595
|
+
response_model=SDAPIResult,
|
|
596
|
+
dependencies=(
|
|
597
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
598
|
+
if self.is_authenticated()
|
|
599
|
+
else None
|
|
600
|
+
),
|
|
601
|
+
)
|
|
602
|
+
self._router.add_api_route(
|
|
603
|
+
"/sdapi/v1/img2img",
|
|
604
|
+
self.sdapi_img2img,
|
|
605
|
+
methods=["POST"],
|
|
606
|
+
response_model=SDAPIResult,
|
|
607
|
+
dependencies=(
|
|
608
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
609
|
+
if self.is_authenticated()
|
|
610
|
+
else None
|
|
611
|
+
),
|
|
612
|
+
)
|
|
524
613
|
self._router.add_api_route(
|
|
525
614
|
"/v1/video/generations",
|
|
526
615
|
self.create_videos,
|
|
@@ -1431,6 +1520,118 @@ class RESTfulAPI:
|
|
|
1431
1520
|
await self._report_error_event(model_uid, str(e))
|
|
1432
1521
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1433
1522
|
|
|
1523
|
+
async def sdapi_options(self, request: Request) -> Response:
|
|
1524
|
+
body = SDAPIOptionsRequest.parse_obj(await request.json())
|
|
1525
|
+
model_uid = body.sd_model_checkpoint
|
|
1526
|
+
|
|
1527
|
+
try:
|
|
1528
|
+
if not model_uid:
|
|
1529
|
+
raise ValueError("Unknown model")
|
|
1530
|
+
await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1531
|
+
return Response()
|
|
1532
|
+
except ValueError as ve:
|
|
1533
|
+
logger.error(str(ve), exc_info=True)
|
|
1534
|
+
await self._report_error_event(model_uid, str(ve))
|
|
1535
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
1536
|
+
except Exception as e:
|
|
1537
|
+
logger.error(e, exc_info=True)
|
|
1538
|
+
await self._report_error_event(model_uid, str(e))
|
|
1539
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1540
|
+
|
|
1541
|
+
async def sdapi_sd_models(self, request: Request) -> Response:
|
|
1542
|
+
try:
|
|
1543
|
+
models = await (await self._get_supervisor_ref()).list_models()
|
|
1544
|
+
sd_models = []
|
|
1545
|
+
for model_name, info in models.items():
|
|
1546
|
+
if info["model_type"] != "image":
|
|
1547
|
+
continue
|
|
1548
|
+
sd_models.append({"model_name": model_name, "config": None})
|
|
1549
|
+
return JSONResponse(content=sd_models)
|
|
1550
|
+
except Exception as e:
|
|
1551
|
+
logger.error(e, exc_info=True)
|
|
1552
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1553
|
+
|
|
1554
|
+
async def sdapi_samplers(self, request: Request) -> Response:
|
|
1555
|
+
try:
|
|
1556
|
+
from ..model.image.stable_diffusion.core import SAMPLING_METHODS
|
|
1557
|
+
|
|
1558
|
+
samplers = [
|
|
1559
|
+
{"name": sample_method, "alias": [], "options": {}}
|
|
1560
|
+
for sample_method in SAMPLING_METHODS
|
|
1561
|
+
]
|
|
1562
|
+
return JSONResponse(content=samplers)
|
|
1563
|
+
except Exception as e:
|
|
1564
|
+
logger.error(e, exc_info=True)
|
|
1565
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1566
|
+
|
|
1567
|
+
async def sdapi_txt2img(self, request: Request) -> Response:
|
|
1568
|
+
body = SDAPITxt2imgRequst.parse_obj(await request.json())
|
|
1569
|
+
model_uid = body.model or body.override_settings.get("sd_model_checkpoint")
|
|
1570
|
+
|
|
1571
|
+
try:
|
|
1572
|
+
if not model_uid:
|
|
1573
|
+
raise ValueError("Unknown model")
|
|
1574
|
+
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1575
|
+
except ValueError as ve:
|
|
1576
|
+
logger.error(str(ve), exc_info=True)
|
|
1577
|
+
await self._report_error_event(model_uid, str(ve))
|
|
1578
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
1579
|
+
except Exception as e:
|
|
1580
|
+
logger.error(e, exc_info=True)
|
|
1581
|
+
await self._report_error_event(model_uid, str(e))
|
|
1582
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1583
|
+
|
|
1584
|
+
try:
|
|
1585
|
+
kwargs = dict(body)
|
|
1586
|
+
kwargs.update(json.loads(body.kwargs) if body.kwargs else {})
|
|
1587
|
+
image_list = await model.txt2img(
|
|
1588
|
+
**kwargs,
|
|
1589
|
+
)
|
|
1590
|
+
return Response(content=image_list, media_type="application/json")
|
|
1591
|
+
except RuntimeError as re:
|
|
1592
|
+
logger.error(re, exc_info=True)
|
|
1593
|
+
await self._report_error_event(model_uid, str(re))
|
|
1594
|
+
self.handle_request_limit_error(re)
|
|
1595
|
+
raise HTTPException(status_code=400, detail=str(re))
|
|
1596
|
+
except Exception as e:
|
|
1597
|
+
logger.error(e, exc_info=True)
|
|
1598
|
+
await self._report_error_event(model_uid, str(e))
|
|
1599
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1600
|
+
|
|
1601
|
+
async def sdapi_img2img(self, request: Request) -> Response:
|
|
1602
|
+
body = SDAPIImg2imgRequst.parse_obj(await request.json())
|
|
1603
|
+
model_uid = body.model or body.override_settings.get("sd_model_checkpoint")
|
|
1604
|
+
|
|
1605
|
+
try:
|
|
1606
|
+
if not model_uid:
|
|
1607
|
+
raise ValueError("Unknown model")
|
|
1608
|
+
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1609
|
+
except ValueError as ve:
|
|
1610
|
+
logger.error(str(ve), exc_info=True)
|
|
1611
|
+
await self._report_error_event(model_uid, str(ve))
|
|
1612
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
1613
|
+
except Exception as e:
|
|
1614
|
+
logger.error(e, exc_info=True)
|
|
1615
|
+
await self._report_error_event(model_uid, str(e))
|
|
1616
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1617
|
+
|
|
1618
|
+
try:
|
|
1619
|
+
kwargs = dict(body)
|
|
1620
|
+
kwargs.update(json.loads(body.kwargs) if body.kwargs else {})
|
|
1621
|
+
image_list = await model.img2img(
|
|
1622
|
+
**kwargs,
|
|
1623
|
+
)
|
|
1624
|
+
return Response(content=image_list, media_type="application/json")
|
|
1625
|
+
except RuntimeError as re:
|
|
1626
|
+
logger.error(re, exc_info=True)
|
|
1627
|
+
await self._report_error_event(model_uid, str(re))
|
|
1628
|
+
self.handle_request_limit_error(re)
|
|
1629
|
+
raise HTTPException(status_code=400, detail=str(re))
|
|
1630
|
+
except Exception as e:
|
|
1631
|
+
logger.error(e, exc_info=True)
|
|
1632
|
+
await self._report_error_event(model_uid, str(e))
|
|
1633
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1634
|
+
|
|
1434
1635
|
async def create_variations(
|
|
1435
1636
|
self,
|
|
1436
1637
|
model: str = Form(...),
|
|
@@ -1627,33 +1828,7 @@ class RESTfulAPI:
|
|
|
1627
1828
|
status_code=400, detail="Invalid input. Please specify the prompt."
|
|
1628
1829
|
)
|
|
1629
1830
|
|
|
1630
|
-
system_messages: List["ChatCompletionMessage"] = []
|
|
1631
|
-
system_messages_contents = []
|
|
1632
|
-
non_system_messages = []
|
|
1633
|
-
for msg in messages:
|
|
1634
|
-
assert (
|
|
1635
|
-
msg.get("content") != SPECIAL_TOOL_PROMPT
|
|
1636
|
-
), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
|
|
1637
|
-
if msg["role"] == "system":
|
|
1638
|
-
system_messages_contents.append(msg["content"])
|
|
1639
|
-
else:
|
|
1640
|
-
non_system_messages.append(msg)
|
|
1641
|
-
system_messages.append(
|
|
1642
|
-
{"role": "system", "content": ". ".join(system_messages_contents)}
|
|
1643
|
-
)
|
|
1644
|
-
|
|
1645
1831
|
has_tool_message = messages[-1].get("role") == "tool"
|
|
1646
|
-
if has_tool_message:
|
|
1647
|
-
prompt = SPECIAL_TOOL_PROMPT
|
|
1648
|
-
system_prompt = system_messages[0]["content"] if system_messages else None
|
|
1649
|
-
chat_history = non_system_messages # exclude the prompt
|
|
1650
|
-
else:
|
|
1651
|
-
prompt = None
|
|
1652
|
-
if non_system_messages:
|
|
1653
|
-
prompt = non_system_messages[-1]["content"]
|
|
1654
|
-
system_prompt = system_messages[0]["content"] if system_messages else None
|
|
1655
|
-
chat_history = non_system_messages[:-1] # exclude the prompt
|
|
1656
|
-
|
|
1657
1832
|
model_uid = body.model
|
|
1658
1833
|
|
|
1659
1834
|
try:
|
|
@@ -1681,9 +1856,7 @@ class RESTfulAPI:
|
|
|
1681
1856
|
from ..model.llm.utils import GLM4_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY
|
|
1682
1857
|
|
|
1683
1858
|
model_family = desc.get("model_family", "")
|
|
1684
|
-
function_call_models =
|
|
1685
|
-
["gorilla-openfunctions-v1"] + QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
|
|
1686
|
-
)
|
|
1859
|
+
function_call_models = QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
|
|
1687
1860
|
|
|
1688
1861
|
if model_family not in function_call_models:
|
|
1689
1862
|
if body.tools:
|
|
@@ -1716,9 +1889,7 @@ class RESTfulAPI:
|
|
|
1716
1889
|
try:
|
|
1717
1890
|
try:
|
|
1718
1891
|
iterator = await model.chat(
|
|
1719
|
-
|
|
1720
|
-
system_prompt,
|
|
1721
|
-
chat_history,
|
|
1892
|
+
messages,
|
|
1722
1893
|
kwargs,
|
|
1723
1894
|
raw_params=raw_kwargs,
|
|
1724
1895
|
)
|
|
@@ -1750,9 +1921,7 @@ class RESTfulAPI:
|
|
|
1750
1921
|
else:
|
|
1751
1922
|
try:
|
|
1752
1923
|
data = await model.chat(
|
|
1753
|
-
|
|
1754
|
-
system_prompt,
|
|
1755
|
-
chat_history,
|
|
1924
|
+
messages,
|
|
1756
1925
|
kwargs,
|
|
1757
1926
|
raw_params=raw_kwargs,
|
|
1758
1927
|
)
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import json
|
|
15
15
|
import typing
|
|
16
|
-
import warnings
|
|
17
16
|
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
|
|
18
17
|
|
|
19
18
|
import requests
|
|
@@ -470,9 +469,7 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
|
|
|
470
469
|
class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
471
470
|
def chat(
|
|
472
471
|
self,
|
|
473
|
-
|
|
474
|
-
system_prompt: Optional[str] = None,
|
|
475
|
-
chat_history: Optional[List["ChatCompletionMessage"]] = None,
|
|
472
|
+
messages: List[Dict],
|
|
476
473
|
tools: Optional[List[Dict]] = None,
|
|
477
474
|
generate_config: Optional[
|
|
478
475
|
Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
|
|
@@ -483,11 +480,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
|
483
480
|
|
|
484
481
|
Parameters
|
|
485
482
|
----------
|
|
486
|
-
|
|
487
|
-
The user's input.
|
|
488
|
-
system_prompt: Optional[str]
|
|
489
|
-
The system context provide to Model prior to any chats.
|
|
490
|
-
chat_history: Optional[List["ChatCompletionMessage"]]
|
|
483
|
+
messages: List[Dict]
|
|
491
484
|
A list of messages comprising the conversation so far.
|
|
492
485
|
tools: Optional[List[Dict]]
|
|
493
486
|
A tool list.
|
|
@@ -509,25 +502,11 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
|
|
|
509
502
|
Report the failure to generate the chat from the server. Detailed information provided in error message.
|
|
510
503
|
|
|
511
504
|
"""
|
|
512
|
-
warnings.warn(
|
|
513
|
-
"The parameters `prompt`, `system_prompt` and `chat_history` will be deprecated in version v0.15.0, "
|
|
514
|
-
"and will be replaced by the parameter `messages`, "
|
|
515
|
-
"similar to the OpenAI API: https://platform.openai.com/docs/guides/chat-completions/getting-started",
|
|
516
|
-
category=DeprecationWarning,
|
|
517
|
-
stacklevel=2,
|
|
518
|
-
)
|
|
519
|
-
|
|
520
505
|
url = f"{self._base_url}/v1/chat/completions"
|
|
521
506
|
|
|
522
|
-
if chat_history is None:
|
|
523
|
-
chat_history = []
|
|
524
|
-
|
|
525
|
-
chat_history = handle_system_prompts(chat_history, system_prompt)
|
|
526
|
-
chat_history.append({"role": "user", "content": prompt}) # type: ignore
|
|
527
|
-
|
|
528
507
|
request_body: Dict[str, Any] = {
|
|
529
508
|
"model": self._model_uid,
|
|
530
|
-
"messages":
|
|
509
|
+
"messages": messages,
|
|
531
510
|
}
|
|
532
511
|
if tools is not None:
|
|
533
512
|
request_body["tools"] = tools
|
|
@@ -730,10 +709,12 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
730
709
|
)
|
|
731
710
|
)
|
|
732
711
|
response = requests.post(
|
|
733
|
-
url, data=params, files=files, headers=self.auth_headers
|
|
712
|
+
url, data=params, files=files, headers=self.auth_headers, stream=stream
|
|
734
713
|
)
|
|
735
714
|
else:
|
|
736
|
-
response = requests.post(
|
|
715
|
+
response = requests.post(
|
|
716
|
+
url, json=params, headers=self.auth_headers, stream=stream
|
|
717
|
+
)
|
|
737
718
|
if response.status_code != 200:
|
|
738
719
|
raise RuntimeError(
|
|
739
720
|
f"Failed to speech the text, detail: {_get_error_string(response)}"
|
xinference/conftest.py
CHANGED
|
@@ -144,7 +144,7 @@ async def _start_test_cluster(
|
|
|
144
144
|
address=f"test://{address}", logging_conf=logging_conf
|
|
145
145
|
)
|
|
146
146
|
await xo.create_actor(
|
|
147
|
-
SupervisorActor, address=address, uid=SupervisorActor.
|
|
147
|
+
SupervisorActor, address=address, uid=SupervisorActor.default_uid()
|
|
148
148
|
)
|
|
149
149
|
await start_worker_components(
|
|
150
150
|
address=address,
|
xinference/constants.py
CHANGED
|
@@ -38,6 +38,10 @@ def get_xinference_home() -> str:
|
|
|
38
38
|
# if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
|
|
39
39
|
os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
|
|
40
40
|
os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
|
|
41
|
+
# In multi-tenant mode,
|
|
42
|
+
# gradio's temporary files are stored in their respective home directories,
|
|
43
|
+
# to prevent insufficient permissions
|
|
44
|
+
os.environ["GRADIO_TEMP_DIR"] = os.path.join(home_path, "tmp", "gradio")
|
|
41
45
|
return home_path
|
|
42
46
|
|
|
43
47
|
|
|
@@ -59,6 +63,7 @@ XINFERENCE_DEFAULT_ENDPOINT_PORT = 9997
|
|
|
59
63
|
XINFERENCE_DEFAULT_LOG_FILE_NAME = "xinference.log"
|
|
60
64
|
XINFERENCE_LOG_MAX_BYTES = 100 * 1024 * 1024
|
|
61
65
|
XINFERENCE_LOG_BACKUP_COUNT = 30
|
|
66
|
+
XINFERENCE_LOG_ARG_MAX_LENGTH = 100
|
|
62
67
|
XINFERENCE_HEALTH_CHECK_FAILURE_THRESHOLD = int(
|
|
63
68
|
os.environ.get(XINFERENCE_ENV_HEALTH_CHECK_FAILURE_THRESHOLD, 5)
|
|
64
69
|
)
|
xinference/core/cache_tracker.py
CHANGED
|
@@ -16,7 +16,7 @@ import base64
|
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
18
|
from io import BytesIO
|
|
19
|
-
from typing import Generator, List, Optional
|
|
19
|
+
from typing import Dict, Generator, List, Optional
|
|
20
20
|
|
|
21
21
|
import gradio as gr
|
|
22
22
|
import PIL.Image
|
|
@@ -27,7 +27,6 @@ from ..client.restful.restful_client import (
|
|
|
27
27
|
RESTfulChatModelHandle,
|
|
28
28
|
RESTfulGenerateModelHandle,
|
|
29
29
|
)
|
|
30
|
-
from ..types import ChatCompletionMessage
|
|
31
30
|
|
|
32
31
|
logger = logging.getLogger(__name__)
|
|
33
32
|
|
|
@@ -96,11 +95,11 @@ class GradioInterface:
|
|
|
96
95
|
flat_list += row
|
|
97
96
|
return flat_list
|
|
98
97
|
|
|
99
|
-
def to_chat(lst: List[str]) -> List[
|
|
98
|
+
def to_chat(lst: List[str]) -> List[Dict]:
|
|
100
99
|
res = []
|
|
101
100
|
for i in range(len(lst)):
|
|
102
101
|
role = "assistant" if i % 2 == 1 else "user"
|
|
103
|
-
res.append(
|
|
102
|
+
res.append(dict(role=role, content=lst[i]))
|
|
104
103
|
return res
|
|
105
104
|
|
|
106
105
|
def generate_wrapper(
|
|
@@ -116,11 +115,12 @@ class GradioInterface:
|
|
|
116
115
|
client._set_token(self._access_token)
|
|
117
116
|
model = client.get_model(self.model_uid)
|
|
118
117
|
assert isinstance(model, RESTfulChatModelHandle)
|
|
118
|
+
messages = to_chat(flatten(history))
|
|
119
|
+
messages.append(dict(role="user", content=message))
|
|
119
120
|
|
|
120
121
|
response_content = ""
|
|
121
122
|
for chunk in model.chat(
|
|
122
|
-
|
|
123
|
-
chat_history=to_chat(flatten(history)),
|
|
123
|
+
messages,
|
|
124
124
|
generate_config={
|
|
125
125
|
"max_tokens": int(max_tokens),
|
|
126
126
|
"temperature": temperature,
|
|
@@ -191,15 +191,10 @@ class GradioInterface:
|
|
|
191
191
|
model = client.get_model(self.model_uid)
|
|
192
192
|
assert isinstance(model, RESTfulChatModelHandle)
|
|
193
193
|
|
|
194
|
-
prompt = history[-1]
|
|
195
|
-
assert prompt["role"] == "user"
|
|
196
|
-
prompt = prompt["content"]
|
|
197
|
-
# multimodal chat does not support stream.
|
|
198
194
|
if stream:
|
|
199
195
|
response_content = ""
|
|
200
196
|
for chunk in model.chat(
|
|
201
|
-
|
|
202
|
-
chat_history=history[:-1],
|
|
197
|
+
messages=history,
|
|
203
198
|
generate_config={
|
|
204
199
|
"max_tokens": max_tokens,
|
|
205
200
|
"temperature": temperature,
|
|
@@ -224,8 +219,7 @@ class GradioInterface:
|
|
|
224
219
|
yield history, bot
|
|
225
220
|
else:
|
|
226
221
|
response = model.chat(
|
|
227
|
-
|
|
228
|
-
chat_history=history[:-1],
|
|
222
|
+
messages=history,
|
|
229
223
|
generate_config={
|
|
230
224
|
"max_tokens": max_tokens,
|
|
231
225
|
"temperature": temperature,
|