xinference 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/client/restful/restful_client.py +9 -1
- xinference/core/model.py +19 -0
- xinference/core/resource.py +7 -1
- xinference/core/scheduler.py +4 -7
- xinference/core/status_guard.py +1 -0
- xinference/core/supervisor.py +228 -19
- xinference/core/utils.py +1 -29
- xinference/core/worker.py +28 -2
- xinference/deploy/cmdline.py +33 -3
- xinference/deploy/local.py +2 -1
- xinference/deploy/test/test_cmdline.py +32 -0
- xinference/device_utils.py +43 -1
- xinference/model/audio/core.py +5 -0
- xinference/model/audio/kokoro.py +122 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/stable_diffusion/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +866 -46
- xinference/model/llm/llm_family.py +7 -2
- xinference/model/llm/llm_family_modelscope.json +873 -16
- xinference/model/llm/mlx/core.py +11 -3
- xinference/model/llm/reasoning_parsers/__init__.py +13 -0
- xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
- xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
- xinference/model/llm/sglang/core.py +99 -11
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/intern_vl.py +23 -14
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +164 -20
- xinference/model/llm/vllm/core.py +36 -4
- xinference/model/llm/vllm/xavier/executor.py +2 -2
- xinference/model/llm/vllm/xavier/scheduler.py +3 -3
- xinference/thirdparty/internvl/conversation.py +26 -17
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.f8177338.css +2 -0
- xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
- xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
- xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
- xinference/web/ui/src/locales/en.json +14 -1
- xinference/web/ui/src/locales/zh.json +14 -1
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/METADATA +18 -17
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/RECORD +67 -60
- xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
- xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
- xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
- /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/LICENSE +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/WHEEL +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/top_level.txt +0 -0
xinference/deploy/cmdline.py
CHANGED
|
@@ -770,11 +770,17 @@ def remove_cache(
|
|
|
770
770
|
type=int,
|
|
771
771
|
help="The replica count of the model, default is 1.",
|
|
772
772
|
)
|
|
773
|
+
@click.option(
|
|
774
|
+
"--n-worker",
|
|
775
|
+
default=1,
|
|
776
|
+
type=int,
|
|
777
|
+
help="The number of workers used by the model, default is 1.",
|
|
778
|
+
)
|
|
773
779
|
@click.option(
|
|
774
780
|
"--n-gpu",
|
|
775
781
|
default="auto",
|
|
776
782
|
type=str,
|
|
777
|
-
help='The number of GPUs used by the model, default is "auto".',
|
|
783
|
+
help='The number of GPUs used by the model, if n_worker>1, means number of GPUs per worker, default is "auto".',
|
|
778
784
|
)
|
|
779
785
|
@click.option(
|
|
780
786
|
"--lora-modules",
|
|
@@ -815,6 +821,12 @@ def remove_cache(
|
|
|
815
821
|
type=bool,
|
|
816
822
|
help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
|
|
817
823
|
)
|
|
824
|
+
@click.option(
|
|
825
|
+
"--reasoning-content",
|
|
826
|
+
default=False,
|
|
827
|
+
type=bool,
|
|
828
|
+
help="Whether or not to enable reasoning content in model responses.",
|
|
829
|
+
)
|
|
818
830
|
@click.option(
|
|
819
831
|
"--api-key",
|
|
820
832
|
"-ak",
|
|
@@ -822,6 +834,7 @@ def remove_cache(
|
|
|
822
834
|
type=str,
|
|
823
835
|
help="Api-Key for access xinference api with authorization.",
|
|
824
836
|
)
|
|
837
|
+
@click.option("--model-path", "-mp", default=None, type=str, help="Model path to run.")
|
|
825
838
|
@click.pass_context
|
|
826
839
|
def model_launch(
|
|
827
840
|
ctx,
|
|
@@ -834,6 +847,7 @@ def model_launch(
|
|
|
834
847
|
model_format: str,
|
|
835
848
|
quantization: str,
|
|
836
849
|
replica: int,
|
|
850
|
+
n_worker: int,
|
|
837
851
|
n_gpu: str,
|
|
838
852
|
lora_modules: Optional[Tuple],
|
|
839
853
|
image_lora_load_kwargs: Optional[Tuple],
|
|
@@ -841,15 +855,28 @@ def model_launch(
|
|
|
841
855
|
worker_ip: Optional[str],
|
|
842
856
|
gpu_idx: Optional[str],
|
|
843
857
|
trust_remote_code: bool,
|
|
858
|
+
reasoning_content: bool,
|
|
844
859
|
api_key: Optional[str],
|
|
860
|
+
model_path: Optional[str],
|
|
845
861
|
):
|
|
846
862
|
kwargs = {}
|
|
847
863
|
for i in range(0, len(ctx.args), 2):
|
|
848
864
|
if not ctx.args[i].startswith("--"):
|
|
849
865
|
raise ValueError(
|
|
850
|
-
f"You must specify extra kwargs with `--` prefix.
|
|
866
|
+
f"You must specify extra kwargs with `--` prefix. "
|
|
867
|
+
f"There is an error in parameter passing that is {ctx.args[i]}."
|
|
851
868
|
)
|
|
852
|
-
|
|
869
|
+
param_name = ctx.args[i][2:]
|
|
870
|
+
param_value = handle_click_args_type(ctx.args[i + 1])
|
|
871
|
+
if param_name == "model_path":
|
|
872
|
+
# fix for --model_path which is the old fashion to set model_path,
|
|
873
|
+
# now model_path is a builtin option, try to make it compatible
|
|
874
|
+
if model_path is None:
|
|
875
|
+
model_path = param_value
|
|
876
|
+
continue
|
|
877
|
+
else:
|
|
878
|
+
raise ValueError("Cannot set both for --model-path and --model_path")
|
|
879
|
+
kwargs[param_name] = param_value
|
|
853
880
|
print(f"Launch model name: {model_name} with kwargs: {kwargs}", file=sys.stderr)
|
|
854
881
|
|
|
855
882
|
if model_type == "LLM" and model_engine is None:
|
|
@@ -914,11 +941,14 @@ def model_launch(
|
|
|
914
941
|
model_format=model_format,
|
|
915
942
|
quantization=quantization,
|
|
916
943
|
replica=replica,
|
|
944
|
+
n_worker=n_worker,
|
|
917
945
|
n_gpu=_n_gpu,
|
|
918
946
|
peft_model_config=peft_model_config,
|
|
919
947
|
worker_ip=worker_ip,
|
|
920
948
|
gpu_idx=_gpu_idx,
|
|
921
949
|
trust_remote_code=trust_remote_code,
|
|
950
|
+
model_path=model_path,
|
|
951
|
+
reasoning_content=reasoning_content,
|
|
922
952
|
**kwargs,
|
|
923
953
|
)
|
|
924
954
|
|
xinference/deploy/local.py
CHANGED
|
@@ -147,6 +147,38 @@ def test_cmdline(setup, stream, model_uid):
|
|
|
147
147
|
assert model_uid not in result.stdout
|
|
148
148
|
|
|
149
149
|
|
|
150
|
+
def test_cmdline_model_path_error(setup):
|
|
151
|
+
endpoint, _ = setup
|
|
152
|
+
runner = CliRunner(mix_stderr=False)
|
|
153
|
+
|
|
154
|
+
# launch model
|
|
155
|
+
result = runner.invoke(
|
|
156
|
+
model_launch,
|
|
157
|
+
[
|
|
158
|
+
"--endpoint",
|
|
159
|
+
endpoint,
|
|
160
|
+
"--model-name",
|
|
161
|
+
"tiny-llama",
|
|
162
|
+
"--size-in-billions",
|
|
163
|
+
1,
|
|
164
|
+
"--model-format",
|
|
165
|
+
"ggufv2",
|
|
166
|
+
"--quantization",
|
|
167
|
+
"Q2_K",
|
|
168
|
+
"--model-path",
|
|
169
|
+
"/path/to/model",
|
|
170
|
+
"--model_path",
|
|
171
|
+
"/path/to/model",
|
|
172
|
+
],
|
|
173
|
+
)
|
|
174
|
+
assert result.exit_code > 0
|
|
175
|
+
with pytest.raises(
|
|
176
|
+
ValueError, match="Cannot set both for --model-path and --model_path"
|
|
177
|
+
):
|
|
178
|
+
t, e, tb = result.exc_info
|
|
179
|
+
raise e.with_traceback(tb)
|
|
180
|
+
|
|
181
|
+
|
|
150
182
|
def test_cmdline_of_custom_model(setup):
|
|
151
183
|
endpoint, _ = setup
|
|
152
184
|
runner = CliRunner()
|
xinference/device_utils.py
CHANGED
|
@@ -13,9 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
|
+
from typing import Dict, Literal, Union
|
|
16
17
|
|
|
17
18
|
import torch
|
|
18
|
-
from typing_extensions import Literal, Union
|
|
19
19
|
|
|
20
20
|
DeviceType = Literal["cuda", "mps", "xpu", "npu", "cpu"]
|
|
21
21
|
DEVICE_TO_ENV_NAME = {
|
|
@@ -122,3 +122,45 @@ def gpu_count():
|
|
|
122
122
|
return torch.npu.device_count()
|
|
123
123
|
else:
|
|
124
124
|
return 0
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _get_nvidia_gpu_mem_info(gpu_id: int) -> Dict[str, float]:
|
|
128
|
+
from pynvml import (
|
|
129
|
+
nvmlDeviceGetHandleByIndex,
|
|
130
|
+
nvmlDeviceGetMemoryInfo,
|
|
131
|
+
nvmlDeviceGetName,
|
|
132
|
+
nvmlDeviceGetUtilizationRates,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
handler = nvmlDeviceGetHandleByIndex(gpu_id)
|
|
136
|
+
gpu_name = nvmlDeviceGetName(handler)
|
|
137
|
+
mem_info = nvmlDeviceGetMemoryInfo(handler)
|
|
138
|
+
utilization = nvmlDeviceGetUtilizationRates(handler)
|
|
139
|
+
return {
|
|
140
|
+
"name": gpu_name,
|
|
141
|
+
"total": mem_info.total,
|
|
142
|
+
"used": mem_info.used,
|
|
143
|
+
"free": mem_info.free,
|
|
144
|
+
"util": utilization.gpu,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def get_nvidia_gpu_info() -> Dict:
|
|
149
|
+
from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
nvmlInit()
|
|
153
|
+
device_count = nvmlDeviceGetCount()
|
|
154
|
+
res = {}
|
|
155
|
+
for i in range(device_count):
|
|
156
|
+
res[f"gpu-{i}"] = _get_nvidia_gpu_mem_info(i)
|
|
157
|
+
return res
|
|
158
|
+
except:
|
|
159
|
+
# TODO: add log here
|
|
160
|
+
# logger.debug(f"Cannot init nvml. Maybe due to lack of NVIDIA GPUs or incorrect installation of CUDA.")
|
|
161
|
+
return {}
|
|
162
|
+
finally:
|
|
163
|
+
try:
|
|
164
|
+
nvmlShutdown()
|
|
165
|
+
except:
|
|
166
|
+
pass
|
xinference/model/audio/core.py
CHANGED
|
@@ -25,6 +25,7 @@ from .f5tts import F5TTSModel
|
|
|
25
25
|
from .f5tts_mlx import F5TTSMLXModel
|
|
26
26
|
from .fish_speech import FishSpeechModel
|
|
27
27
|
from .funasr import FunASRModel
|
|
28
|
+
from .kokoro import KokoroModel
|
|
28
29
|
from .melotts import MeloTTSModel
|
|
29
30
|
from .whisper import WhisperModel
|
|
30
31
|
from .whisper_mlx import WhisperMLXModel
|
|
@@ -176,6 +177,7 @@ def create_audio_model_instance(
|
|
|
176
177
|
F5TTSModel,
|
|
177
178
|
F5TTSMLXModel,
|
|
178
179
|
MeloTTSModel,
|
|
180
|
+
KokoroModel,
|
|
179
181
|
],
|
|
180
182
|
AudioModelDescription,
|
|
181
183
|
]:
|
|
@@ -192,6 +194,7 @@ def create_audio_model_instance(
|
|
|
192
194
|
F5TTSModel,
|
|
193
195
|
F5TTSMLXModel,
|
|
194
196
|
MeloTTSModel,
|
|
197
|
+
KokoroModel,
|
|
195
198
|
]
|
|
196
199
|
if model_spec.model_family == "whisper":
|
|
197
200
|
if not model_spec.engine:
|
|
@@ -212,6 +215,8 @@ def create_audio_model_instance(
|
|
|
212
215
|
model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
|
|
213
216
|
elif model_spec.model_family == "MeloTTS":
|
|
214
217
|
model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
|
|
218
|
+
elif model_spec.model_family == "Kokoro":
|
|
219
|
+
model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
|
|
215
220
|
else:
|
|
216
221
|
raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
|
|
217
222
|
model_description = AudioModelDescription(
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
from io import BytesIO
|
|
16
|
+
from typing import TYPE_CHECKING, Optional
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from ...device_utils import get_available_device, is_device_available
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from .core import AudioModelFamilyV1
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class KokoroModel:
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
model_uid: str,
|
|
32
|
+
model_path: str,
|
|
33
|
+
model_spec: "AudioModelFamilyV1",
|
|
34
|
+
device: Optional[str] = None,
|
|
35
|
+
**kwargs,
|
|
36
|
+
):
|
|
37
|
+
self._model_uid = model_uid
|
|
38
|
+
self._model_path = model_path
|
|
39
|
+
self._model_spec = model_spec
|
|
40
|
+
self._device = device
|
|
41
|
+
self._model = None
|
|
42
|
+
self._kwargs = kwargs
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def model_ability(self):
|
|
46
|
+
return self._model_spec.model_ability
|
|
47
|
+
|
|
48
|
+
def load(self):
|
|
49
|
+
if self._device is None:
|
|
50
|
+
self._device = get_available_device()
|
|
51
|
+
else:
|
|
52
|
+
if not is_device_available(self._device):
|
|
53
|
+
raise ValueError(f"Device {self._device} is not available!")
|
|
54
|
+
|
|
55
|
+
import os
|
|
56
|
+
|
|
57
|
+
from kokoro import KModel, KPipeline
|
|
58
|
+
|
|
59
|
+
config_path = os.path.join(self._model_path, "config.json")
|
|
60
|
+
model_path = os.path.join(self._model_path, "kokoro-v1_0.pth")
|
|
61
|
+
# LANG_CODES = dict(
|
|
62
|
+
# # pip install misaki[en]
|
|
63
|
+
# a='American English',
|
|
64
|
+
# b='British English',
|
|
65
|
+
#
|
|
66
|
+
# # espeak-ng
|
|
67
|
+
# e='es',
|
|
68
|
+
# f='fr-fr',
|
|
69
|
+
# h='hi',
|
|
70
|
+
# i='it',
|
|
71
|
+
# p='pt-br',
|
|
72
|
+
#
|
|
73
|
+
# # pip install misaki[ja]
|
|
74
|
+
# j='Japanese',
|
|
75
|
+
#
|
|
76
|
+
# # pip install misaki[zh]
|
|
77
|
+
# z='Mandarin Chinese',
|
|
78
|
+
# )
|
|
79
|
+
lang_code = self._kwargs.get("lang_code", "a")
|
|
80
|
+
logger.info("Launching Kokoro model with language code: %s", lang_code)
|
|
81
|
+
self._model = KPipeline(
|
|
82
|
+
lang_code=lang_code,
|
|
83
|
+
model=KModel(config=config_path, model=model_path),
|
|
84
|
+
device=self._device,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def speech(
|
|
88
|
+
self,
|
|
89
|
+
input: str,
|
|
90
|
+
voice: str,
|
|
91
|
+
response_format: str = "mp3",
|
|
92
|
+
speed: float = 1.0,
|
|
93
|
+
stream: bool = False,
|
|
94
|
+
**kwargs,
|
|
95
|
+
):
|
|
96
|
+
import soundfile
|
|
97
|
+
|
|
98
|
+
if stream:
|
|
99
|
+
raise Exception("Kokoro does not support stream mode.")
|
|
100
|
+
assert self._model is not None
|
|
101
|
+
if not voice:
|
|
102
|
+
voice = "af_alloy"
|
|
103
|
+
logger.info("Auto select speaker: %s", voice)
|
|
104
|
+
elif voice.endswith(".pt"):
|
|
105
|
+
logger.info("Using custom voice pt: %s", voice)
|
|
106
|
+
else:
|
|
107
|
+
logger.info("Using voice: %s", voice)
|
|
108
|
+
logger.info("Speech kwargs: %s", kwargs)
|
|
109
|
+
generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
|
|
110
|
+
results = list(generator)
|
|
111
|
+
audio = np.concatenate([r[2] for r in results])
|
|
112
|
+
# Save the generated audio
|
|
113
|
+
with BytesIO() as out:
|
|
114
|
+
with soundfile.SoundFile(
|
|
115
|
+
out,
|
|
116
|
+
"w",
|
|
117
|
+
24000,
|
|
118
|
+
1,
|
|
119
|
+
format=response_format.upper(),
|
|
120
|
+
) as f:
|
|
121
|
+
f.write(audio)
|
|
122
|
+
return out.getvalue()
|
|
@@ -338,5 +338,13 @@
|
|
|
338
338
|
"model_ability": "text-to-audio",
|
|
339
339
|
"multilingual": false,
|
|
340
340
|
"language": "KR"
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
"model_name": "Kokoro-82M",
|
|
344
|
+
"model_family": "Kokoro",
|
|
345
|
+
"model_id": "hexgrad/Kokoro-82M",
|
|
346
|
+
"model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
|
|
347
|
+
"model_ability": "text-to-audio",
|
|
348
|
+
"multilingual": true
|
|
341
349
|
}
|
|
342
350
|
]
|
|
@@ -100,5 +100,14 @@
|
|
|
100
100
|
"model_revision": "master",
|
|
101
101
|
"model_ability": "text-to-audio",
|
|
102
102
|
"multilingual": true
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"model_name": "Kokoro-82M",
|
|
106
|
+
"model_family": "Kokoro",
|
|
107
|
+
"model_hub": "modelscope",
|
|
108
|
+
"model_id": "AI-ModelScope/Kokoro-82M",
|
|
109
|
+
"model_revision": "master",
|
|
110
|
+
"model_ability": "text-to-audio",
|
|
111
|
+
"multilingual": true
|
|
103
112
|
}
|
|
104
113
|
]
|
|
@@ -22,7 +22,6 @@ import logging
|
|
|
22
22
|
import os
|
|
23
23
|
import re
|
|
24
24
|
import sys
|
|
25
|
-
import warnings
|
|
26
25
|
from glob import glob
|
|
27
26
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
28
27
|
|
|
@@ -412,12 +411,22 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
412
411
|
else:
|
|
413
412
|
raise ValueError(f"Unknown sampler: {sampler_name}")
|
|
414
413
|
|
|
415
|
-
|
|
414
|
+
def _need_set_scheduler(self, scheduler: Any) -> bool:
|
|
415
|
+
"""Determine whether it is necessary to set up a scheduler"""
|
|
416
|
+
if self._model_spec is None:
|
|
417
|
+
return False
|
|
418
|
+
if scheduler is None:
|
|
419
|
+
return False
|
|
420
|
+
if "FLUX" in self._model_spec.model_name:
|
|
421
|
+
logger.warning("FLUX model, skipping scheduler setup")
|
|
422
|
+
return False
|
|
423
|
+
return True
|
|
424
|
+
|
|
416
425
|
@contextlib.contextmanager
|
|
417
|
-
def _reset_when_done(model: Any, sampler_name: str):
|
|
418
|
-
assert model is not None
|
|
426
|
+
def _reset_when_done(self, model: Any, sampler_name: str):
|
|
419
427
|
scheduler = DiffusionModel._get_scheduler(model, sampler_name)
|
|
420
|
-
if scheduler:
|
|
428
|
+
if self._need_set_scheduler(scheduler):
|
|
429
|
+
logger.debug("Use scheduler %s", scheduler)
|
|
421
430
|
default_scheduler = model.scheduler
|
|
422
431
|
model.scheduler = scheduler
|
|
423
432
|
try:
|
|
@@ -517,7 +526,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
517
526
|
for key in list(kwargs):
|
|
518
527
|
allow_key = model_accept_param(key, model)
|
|
519
528
|
if not allow_key:
|
|
520
|
-
|
|
529
|
+
logger.warning(f"{type(model)} cannot accept `{key}`, will ignore it")
|
|
521
530
|
kwargs.pop(key)
|
|
522
531
|
|
|
523
532
|
def text_to_image(
|
|
@@ -28,7 +28,7 @@ from ....types import (
|
|
|
28
28
|
)
|
|
29
29
|
from ..core import LLM
|
|
30
30
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
31
|
-
from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
31
|
+
from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
32
32
|
|
|
33
33
|
logger = logging.getLogger(__name__)
|
|
34
34
|
|
|
@@ -123,18 +123,22 @@ class LlamaCppModel(LLM):
|
|
|
123
123
|
|
|
124
124
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
os.path.
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
126
|
+
if os.path.isfile(self.model_path):
|
|
127
|
+
# mostly passed from --model_path
|
|
128
|
+
model_path = os.path.realpath(self.model_path)
|
|
129
|
+
else:
|
|
130
|
+
# handle legacy cache.
|
|
131
|
+
model_path = os.path.realpath(
|
|
132
|
+
os.path.join(
|
|
133
|
+
self.model_path,
|
|
134
|
+
self.model_spec.model_file_name_template.format(
|
|
135
|
+
quantization=self.quantization
|
|
136
|
+
),
|
|
137
|
+
)
|
|
133
138
|
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
model_path = legacy_model_file_path
|
|
139
|
+
legacy_model_file_path = os.path.join(self.model_path, "model.bin")
|
|
140
|
+
if os.path.exists(legacy_model_file_path):
|
|
141
|
+
model_path = legacy_model_file_path
|
|
138
142
|
|
|
139
143
|
try:
|
|
140
144
|
self._llm = Llama(
|
|
@@ -272,8 +276,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
272
276
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
273
277
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
274
278
|
full_context_kwargs = {}
|
|
275
|
-
if tools
|
|
276
|
-
|
|
279
|
+
if tools:
|
|
280
|
+
if model_family in QWEN_TOOL_CALL_FAMILY:
|
|
281
|
+
full_context_kwargs["tools"] = tools
|
|
282
|
+
elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
|
|
283
|
+
self._tools_to_messages_for_deepseek(messages, tools)
|
|
277
284
|
assert self.model_family.chat_template is not None
|
|
278
285
|
full_prompt = self.get_full_context(
|
|
279
286
|
messages, self.model_family.chat_template, **full_context_kwargs
|