xinference 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/core/chat_interface.py +1 -1
- xinference/core/image_interface.py +9 -0
- xinference/core/model.py +4 -1
- xinference/core/worker.py +60 -44
- xinference/model/audio/chattts.py +25 -9
- xinference/model/audio/core.py +8 -2
- xinference/model/audio/cosyvoice.py +4 -3
- xinference/model/audio/custom.py +4 -5
- xinference/model/audio/fish_speech.py +228 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/embedding/core.py +25 -1
- xinference/model/embedding/custom.py +4 -5
- xinference/model/flexible/core.py +5 -1
- xinference/model/image/custom.py +4 -5
- xinference/model/image/model_spec.json +2 -1
- xinference/model/image/model_spec_modelscope.json +2 -1
- xinference/model/image/stable_diffusion/core.py +66 -3
- xinference/model/llm/__init__.py +6 -0
- xinference/model/llm/llm_family.json +54 -9
- xinference/model/llm/llm_family.py +7 -6
- xinference/model/llm/llm_family_modelscope.json +56 -10
- xinference/model/llm/lmdeploy/__init__.py +0 -0
- xinference/model/llm/lmdeploy/core.py +557 -0
- xinference/model/llm/sglang/core.py +7 -1
- xinference/model/llm/transformers/cogvlm2.py +4 -45
- xinference/model/llm/transformers/cogvlm2_video.py +524 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm4v.py +2 -23
- xinference/model/llm/transformers/intern_vl.py +94 -11
- xinference/model/llm/transformers/minicpmv25.py +2 -23
- xinference/model/llm/transformers/minicpmv26.py +2 -22
- xinference/model/llm/transformers/yi_vl.py +2 -24
- xinference/model/llm/utils.py +13 -1
- xinference/model/llm/vllm/core.py +1 -34
- xinference/model/rerank/custom.py +4 -5
- xinference/model/utils.py +41 -1
- xinference/model/video/core.py +3 -1
- xinference/model/video/diffusers.py +41 -38
- xinference/model/video/model_spec.json +24 -1
- xinference/model/video/model_spec_modelscope.json +25 -1
- xinference/thirdparty/fish_speech/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
- xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
- xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
- xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +495 -0
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
- xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
- xinference/thirdparty/fish_speech/tools/file.py +108 -0
- xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
- xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
- xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
- xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
- xinference/thirdparty/fish_speech/tools/webui.py +619 -0
- xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
- xinference/thirdparty/matcha/__init__.py +0 -0
- xinference/thirdparty/matcha/app.py +357 -0
- xinference/thirdparty/matcha/cli.py +419 -0
- xinference/thirdparty/matcha/data/__init__.py +0 -0
- xinference/thirdparty/matcha/data/components/__init__.py +0 -0
- xinference/thirdparty/matcha/data/text_mel_datamodule.py +274 -0
- xinference/thirdparty/matcha/hifigan/__init__.py +0 -0
- xinference/thirdparty/matcha/hifigan/config.py +28 -0
- xinference/thirdparty/matcha/hifigan/denoiser.py +64 -0
- xinference/thirdparty/matcha/hifigan/env.py +17 -0
- xinference/thirdparty/matcha/hifigan/meldataset.py +217 -0
- xinference/thirdparty/matcha/hifigan/models.py +368 -0
- xinference/thirdparty/matcha/hifigan/xutils.py +60 -0
- xinference/thirdparty/matcha/models/__init__.py +0 -0
- xinference/thirdparty/matcha/models/baselightningmodule.py +210 -0
- xinference/thirdparty/matcha/models/components/__init__.py +0 -0
- xinference/thirdparty/matcha/models/components/decoder.py +443 -0
- xinference/thirdparty/matcha/models/components/flow_matching.py +132 -0
- xinference/thirdparty/matcha/models/components/text_encoder.py +410 -0
- xinference/thirdparty/matcha/models/components/transformer.py +316 -0
- xinference/thirdparty/matcha/models/matcha_tts.py +244 -0
- xinference/thirdparty/matcha/onnx/__init__.py +0 -0
- xinference/thirdparty/matcha/onnx/export.py +181 -0
- xinference/thirdparty/matcha/onnx/infer.py +168 -0
- xinference/thirdparty/matcha/text/__init__.py +53 -0
- xinference/thirdparty/matcha/text/cleaners.py +121 -0
- xinference/thirdparty/matcha/text/numbers.py +71 -0
- xinference/thirdparty/matcha/text/symbols.py +17 -0
- xinference/thirdparty/matcha/train.py +122 -0
- xinference/thirdparty/matcha/utils/__init__.py +5 -0
- xinference/thirdparty/matcha/utils/audio.py +82 -0
- xinference/thirdparty/matcha/utils/generate_data_statistics.py +112 -0
- xinference/thirdparty/matcha/utils/get_durations_from_trained_model.py +195 -0
- xinference/thirdparty/matcha/utils/instantiators.py +56 -0
- xinference/thirdparty/matcha/utils/logging_utils.py +53 -0
- xinference/thirdparty/matcha/utils/model.py +90 -0
- xinference/thirdparty/matcha/utils/monotonic_align/__init__.py +22 -0
- xinference/thirdparty/matcha/utils/monotonic_align/core.pyx +47 -0
- xinference/thirdparty/matcha/utils/monotonic_align/setup.py +7 -0
- xinference/thirdparty/matcha/utils/pylogger.py +21 -0
- xinference/thirdparty/matcha/utils/rich_utils.py +101 -0
- xinference/thirdparty/matcha/utils/utils.py +259 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.ffc26121.js → main.661c7b0a.js} +3 -3
- xinference/web/ui/build/static/js/main.661c7b0a.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
- {xinference-0.14.2.dist-info → xinference-0.14.4.dist-info}/METADATA +31 -11
- {xinference-0.14.2.dist-info → xinference-0.14.4.dist-info}/RECORD +189 -49
- xinference/web/ui/build/static/js/main.ffc26121.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
- /xinference/web/ui/build/static/js/{main.ffc26121.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
- {xinference-0.14.2.dist-info → xinference-0.14.4.dist-info}/LICENSE +0 -0
- {xinference-0.14.2.dist-info → xinference-0.14.4.dist-info}/WHEEL +0 -0
- {xinference-0.14.2.dist-info → xinference-0.14.4.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.2.dist-info → xinference-0.14.4.dist-info}/top_level.txt +0 -0
|
@@ -146,5 +146,13 @@
|
|
|
146
146
|
"model_revision": "fb5f676733139f35670bed9b59a77d476b1aa898",
|
|
147
147
|
"ability": "text-to-audio",
|
|
148
148
|
"multilingual": true
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"model_name": "FishSpeech-1.2-SFT",
|
|
152
|
+
"model_family": "FishAudio",
|
|
153
|
+
"model_id": "fishaudio/fish-speech-1.2-sft",
|
|
154
|
+
"model_revision": "180288e21ec5c50cfc564023a22f789e4b88a0e0",
|
|
155
|
+
"ability": "text-to-audio",
|
|
156
|
+
"multilingual": true
|
|
149
157
|
}
|
|
150
158
|
]
|
|
@@ -124,6 +124,7 @@ class EmbeddingModel:
|
|
|
124
124
|
model_path: str,
|
|
125
125
|
model_spec: EmbeddingModelSpec,
|
|
126
126
|
device: Optional[str] = None,
|
|
127
|
+
**kwargs,
|
|
127
128
|
):
|
|
128
129
|
self._model_uid = model_uid
|
|
129
130
|
self._model_path = model_path
|
|
@@ -131,6 +132,7 @@ class EmbeddingModel:
|
|
|
131
132
|
self._model = None
|
|
132
133
|
self._counter = 0
|
|
133
134
|
self._model_spec = model_spec
|
|
135
|
+
self._kwargs = kwargs
|
|
134
136
|
|
|
135
137
|
def load(self):
|
|
136
138
|
try:
|
|
@@ -154,10 +156,32 @@ class EmbeddingModel:
|
|
|
154
156
|
"gte" in self._model_spec.model_name.lower()
|
|
155
157
|
and "qwen2" in self._model_spec.model_name.lower()
|
|
156
158
|
):
|
|
159
|
+
import torch
|
|
160
|
+
|
|
161
|
+
torch_dtype_str = self._kwargs.get("torch_dtype")
|
|
162
|
+
if torch_dtype_str is not None:
|
|
163
|
+
try:
|
|
164
|
+
torch_dtype = getattr(torch, torch_dtype_str)
|
|
165
|
+
if torch_dtype not in [
|
|
166
|
+
torch.float16,
|
|
167
|
+
torch.float32,
|
|
168
|
+
torch.bfloat16,
|
|
169
|
+
]:
|
|
170
|
+
logger.warning(
|
|
171
|
+
f"Load embedding model with unsupported torch dtype : {torch_dtype_str}. Using default torch dtype: fp32."
|
|
172
|
+
)
|
|
173
|
+
torch_dtype = torch.float32
|
|
174
|
+
except AttributeError:
|
|
175
|
+
logger.warning(
|
|
176
|
+
f"Load embedding model with unknown torch dtype '{torch_dtype_str}'. Using default torch dtype: fp32."
|
|
177
|
+
)
|
|
178
|
+
torch_dtype = torch.float32
|
|
179
|
+
else:
|
|
180
|
+
torch_dtype = "auto"
|
|
157
181
|
self._model = XSentenceTransformer(
|
|
158
182
|
self._model_path,
|
|
159
183
|
device=self._device,
|
|
160
|
-
model_kwargs={"device_map": "auto"},
|
|
184
|
+
model_kwargs={"device_map": "auto", "torch_dtype": torch_dtype},
|
|
161
185
|
)
|
|
162
186
|
else:
|
|
163
187
|
self._model = SentenceTransformer(self._model_path, device=self._device)
|
|
@@ -47,6 +47,10 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
|
|
|
47
47
|
if not is_valid_model_name(model_spec.model_name):
|
|
48
48
|
raise ValueError(f"Invalid model name {model_spec.model_name}.")
|
|
49
49
|
|
|
50
|
+
model_uri = model_spec.model_uri
|
|
51
|
+
if model_uri and not is_valid_model_uri(model_uri):
|
|
52
|
+
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
53
|
+
|
|
50
54
|
with UD_EMBEDDING_LOCK:
|
|
51
55
|
for model_name in (
|
|
52
56
|
list(BUILTIN_EMBEDDING_MODELS.keys())
|
|
@@ -61,11 +65,6 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
|
|
|
61
65
|
UD_EMBEDDINGS.append(model_spec)
|
|
62
66
|
|
|
63
67
|
if persist:
|
|
64
|
-
# We only validate model URL when persist is True.
|
|
65
|
-
model_uri = model_spec.model_uri
|
|
66
|
-
if model_uri and not is_valid_model_uri(model_uri):
|
|
67
|
-
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
68
|
-
|
|
69
68
|
persist_path = os.path.join(
|
|
70
69
|
XINFERENCE_MODEL_DIR, "embedding", f"{model_spec.model_name}.json"
|
|
71
70
|
)
|
|
@@ -99,11 +99,15 @@ def get_flexible_model_descriptions():
|
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
def register_flexible_model(model_spec: FlexibleModelSpec, persist: bool):
|
|
102
|
-
from ..utils import is_valid_model_name
|
|
102
|
+
from ..utils import is_valid_model_name, is_valid_model_uri
|
|
103
103
|
|
|
104
104
|
if not is_valid_model_name(model_spec.model_name):
|
|
105
105
|
raise ValueError(f"Invalid model name {model_spec.model_name}.")
|
|
106
106
|
|
|
107
|
+
model_uri = model_spec.model_uri
|
|
108
|
+
if model_uri and not is_valid_model_uri(model_uri):
|
|
109
|
+
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
110
|
+
|
|
107
111
|
if model_spec.launcher_args:
|
|
108
112
|
try:
|
|
109
113
|
model_spec.parser_args()
|
xinference/model/image/custom.py
CHANGED
|
@@ -47,6 +47,10 @@ def register_image(model_spec: CustomImageModelFamilyV1, persist: bool):
|
|
|
47
47
|
if not is_valid_model_name(model_spec.model_name):
|
|
48
48
|
raise ValueError(f"Invalid model name {model_spec.model_name}.")
|
|
49
49
|
|
|
50
|
+
model_uri = model_spec.model_uri
|
|
51
|
+
if model_uri and not is_valid_model_uri(model_uri):
|
|
52
|
+
raise ValueError(f"Invalid model URI {model_uri}")
|
|
53
|
+
|
|
50
54
|
with UD_IMAGE_LOCK:
|
|
51
55
|
for model_name in (
|
|
52
56
|
list(BUILTIN_IMAGE_MODELS.keys())
|
|
@@ -60,11 +64,6 @@ def register_image(model_spec: CustomImageModelFamilyV1, persist: bool):
|
|
|
60
64
|
UD_IMAGES.append(model_spec)
|
|
61
65
|
|
|
62
66
|
if persist:
|
|
63
|
-
# We only validate model URL when persist is True.
|
|
64
|
-
model_uri = model_spec.model_uri
|
|
65
|
-
if model_uri and not is_valid_model_uri(model_uri):
|
|
66
|
-
raise ValueError(f"Invalid model URI {model_uri}")
|
|
67
|
-
|
|
68
67
|
persist_path = os.path.join(
|
|
69
68
|
XINFERENCE_MODEL_DIR, "image", f"{model_spec.model_name}.json"
|
|
70
69
|
)
|
|
@@ -24,6 +24,9 @@ from functools import partial
|
|
|
24
24
|
from io import BytesIO
|
|
25
25
|
from typing import Dict, List, Optional, Union
|
|
26
26
|
|
|
27
|
+
import PIL.Image
|
|
28
|
+
from PIL import ImageOps
|
|
29
|
+
|
|
27
30
|
from ....constants import XINFERENCE_IMAGE_DIR
|
|
28
31
|
from ....device_utils import move_model_to_available_device
|
|
29
32
|
from ....types import Image, ImageList, LoRA
|
|
@@ -46,8 +49,13 @@ class DiffusionModel:
|
|
|
46
49
|
self._model_uid = model_uid
|
|
47
50
|
self._model_path = model_path
|
|
48
51
|
self._device = device
|
|
52
|
+
# when a model has text2image ability,
|
|
53
|
+
# it will be loaded as AutoPipelineForText2Image
|
|
54
|
+
# for image2image and inpainting,
|
|
55
|
+
# we convert to the corresponding model
|
|
49
56
|
self._model = None
|
|
50
57
|
self._i2i_model = None # image to image model
|
|
58
|
+
self._inpainting_model = None # inpainting model
|
|
51
59
|
self._lora_model = lora_model
|
|
52
60
|
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
53
61
|
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
@@ -152,6 +160,10 @@ class DiffusionModel:
|
|
|
152
160
|
model=None,
|
|
153
161
|
**kwargs,
|
|
154
162
|
):
|
|
163
|
+
import gc
|
|
164
|
+
|
|
165
|
+
from ....device_utils import empty_cache
|
|
166
|
+
|
|
155
167
|
logger.debug(
|
|
156
168
|
"stable diffusion args: %s",
|
|
157
169
|
kwargs,
|
|
@@ -159,6 +171,11 @@ class DiffusionModel:
|
|
|
159
171
|
model = model if model is not None else self._model
|
|
160
172
|
assert callable(model)
|
|
161
173
|
images = model(**kwargs).images
|
|
174
|
+
|
|
175
|
+
# clean cache
|
|
176
|
+
gc.collect()
|
|
177
|
+
empty_cache()
|
|
178
|
+
|
|
162
179
|
if response_format == "url":
|
|
163
180
|
os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
|
|
164
181
|
image_list = []
|
|
@@ -209,9 +226,17 @@ class DiffusionModel:
|
|
|
209
226
|
**kwargs,
|
|
210
227
|
)
|
|
211
228
|
|
|
229
|
+
@staticmethod
|
|
230
|
+
def pad_to_multiple(image, multiple=8):
|
|
231
|
+
x, y = image.size
|
|
232
|
+
padding_x = (multiple - x % multiple) % multiple
|
|
233
|
+
padding_y = (multiple - y % multiple) % multiple
|
|
234
|
+
padding = (0, 0, padding_x, padding_y)
|
|
235
|
+
return ImageOps.expand(image, padding)
|
|
236
|
+
|
|
212
237
|
def image_to_image(
|
|
213
238
|
self,
|
|
214
|
-
image:
|
|
239
|
+
image: PIL.Image,
|
|
215
240
|
prompt: Optional[Union[str, List[str]]] = None,
|
|
216
241
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
|
217
242
|
n: int = 1,
|
|
@@ -232,10 +257,19 @@ class DiffusionModel:
|
|
|
232
257
|
self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
|
|
233
258
|
self._model
|
|
234
259
|
)
|
|
260
|
+
|
|
261
|
+
if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
|
|
262
|
+
# Model like SD3 image to image requires image's height and width is times of 16
|
|
263
|
+
# padding the image if specified
|
|
264
|
+
image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
|
|
265
|
+
|
|
235
266
|
if size:
|
|
236
267
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
268
|
+
if padding_image_to_multiple:
|
|
269
|
+
width, height = image.size
|
|
237
270
|
kwargs["width"] = width
|
|
238
271
|
kwargs["height"] = height
|
|
272
|
+
|
|
239
273
|
self._filter_kwargs(kwargs)
|
|
240
274
|
return self._call_model(
|
|
241
275
|
image=image,
|
|
@@ -249,8 +283,8 @@ class DiffusionModel:
|
|
|
249
283
|
|
|
250
284
|
def inpainting(
|
|
251
285
|
self,
|
|
252
|
-
image:
|
|
253
|
-
mask_image:
|
|
286
|
+
image: PIL.Image,
|
|
287
|
+
mask_image: PIL.Image,
|
|
254
288
|
prompt: Optional[Union[str, List[str]]] = None,
|
|
255
289
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
|
256
290
|
n: int = 1,
|
|
@@ -258,7 +292,35 @@ class DiffusionModel:
|
|
|
258
292
|
response_format: str = "url",
|
|
259
293
|
**kwargs,
|
|
260
294
|
):
|
|
295
|
+
if "inpainting" not in self._abilities:
|
|
296
|
+
raise RuntimeError(f"{self._model_uid} does not support inpainting")
|
|
297
|
+
|
|
298
|
+
if (
|
|
299
|
+
"text2image" in self._abilities or "image2image" in self._abilities
|
|
300
|
+
) and self._model is not None:
|
|
301
|
+
from diffusers import AutoPipelineForInpainting
|
|
302
|
+
|
|
303
|
+
if self._inpainting_model is not None:
|
|
304
|
+
model = self._inpainting_model
|
|
305
|
+
else:
|
|
306
|
+
model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
|
|
307
|
+
self._model
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
model = self._model
|
|
311
|
+
|
|
261
312
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
313
|
+
|
|
314
|
+
if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
|
|
315
|
+
# Model like SD3 inpainting requires image's height and width is times of 16
|
|
316
|
+
# padding the image if specified
|
|
317
|
+
image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
|
|
318
|
+
mask_image = self.pad_to_multiple(
|
|
319
|
+
mask_image, multiple=int(padding_image_to_multiple)
|
|
320
|
+
)
|
|
321
|
+
# calculate actual image size after padding
|
|
322
|
+
width, height = image.size
|
|
323
|
+
|
|
262
324
|
return self._call_model(
|
|
263
325
|
image=image,
|
|
264
326
|
mask_image=mask_image,
|
|
@@ -268,5 +330,6 @@ class DiffusionModel:
|
|
|
268
330
|
width=width,
|
|
269
331
|
num_images_per_prompt=n,
|
|
270
332
|
response_format=response_format,
|
|
333
|
+
model=model,
|
|
271
334
|
**kwargs,
|
|
272
335
|
)
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -34,6 +34,7 @@ from .llm_family import (
|
|
|
34
34
|
BUILTIN_MODELSCOPE_LLM_FAMILIES,
|
|
35
35
|
LLAMA_CLASSES,
|
|
36
36
|
LLM_ENGINES,
|
|
37
|
+
LMDEPLOY_CLASSES,
|
|
37
38
|
MLX_CLASSES,
|
|
38
39
|
SGLANG_CLASSES,
|
|
39
40
|
SUPPORTED_ENGINES,
|
|
@@ -113,10 +114,12 @@ def generate_engine_config_by_model_family(model_family):
|
|
|
113
114
|
|
|
114
115
|
def _install():
|
|
115
116
|
from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
|
|
117
|
+
from .lmdeploy.core import LMDeployChatModel, LMDeployModel
|
|
116
118
|
from .mlx.core import MLXChatModel, MLXModel
|
|
117
119
|
from .sglang.core import SGLANGChatModel, SGLANGModel
|
|
118
120
|
from .transformers.chatglm import ChatglmPytorchChatModel
|
|
119
121
|
from .transformers.cogvlm2 import CogVLM2Model
|
|
122
|
+
from .transformers.cogvlm2_video import CogVLM2VideoModel
|
|
120
123
|
from .transformers.core import PytorchChatModel, PytorchModel
|
|
121
124
|
from .transformers.deepseek_vl import DeepSeekVLChatModel
|
|
122
125
|
from .transformers.glm4v import Glm4VModel
|
|
@@ -147,6 +150,7 @@ def _install():
|
|
|
147
150
|
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
|
|
148
151
|
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
|
|
149
152
|
MLX_CLASSES.extend([MLXModel, MLXChatModel])
|
|
153
|
+
LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
|
|
150
154
|
TRANSFORMERS_CLASSES.extend(
|
|
151
155
|
[
|
|
152
156
|
ChatglmPytorchChatModel,
|
|
@@ -160,6 +164,7 @@ def _install():
|
|
|
160
164
|
InternVLChatModel,
|
|
161
165
|
PytorchModel,
|
|
162
166
|
CogVLM2Model,
|
|
167
|
+
CogVLM2VideoModel,
|
|
163
168
|
MiniCPMV25Model,
|
|
164
169
|
MiniCPMV26Model,
|
|
165
170
|
Glm4VModel,
|
|
@@ -174,6 +179,7 @@ def _install():
|
|
|
174
179
|
SUPPORTED_ENGINES["Transformers"] = TRANSFORMERS_CLASSES
|
|
175
180
|
SUPPORTED_ENGINES["llama.cpp"] = LLAMA_CLASSES
|
|
176
181
|
SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
|
|
182
|
+
SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
|
|
177
183
|
|
|
178
184
|
json_path = os.path.join(
|
|
179
185
|
os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
|
|
@@ -7189,15 +7189,6 @@
|
|
|
7189
7189
|
"model_id": "OpenGVLab/InternVL2-4B",
|
|
7190
7190
|
"model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
|
|
7191
7191
|
},
|
|
7192
|
-
{
|
|
7193
|
-
"model_format": "awq",
|
|
7194
|
-
"model_size_in_billions": 4,
|
|
7195
|
-
"quantizations": [
|
|
7196
|
-
"Int4"
|
|
7197
|
-
],
|
|
7198
|
-
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
|
|
7199
|
-
"model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
|
|
7200
|
-
},
|
|
7201
7192
|
{
|
|
7202
7193
|
"model_format": "pytorch",
|
|
7203
7194
|
"model_size_in_billions": 8,
|
|
@@ -7209,6 +7200,15 @@
|
|
|
7209
7200
|
"model_id": "OpenGVLab/InternVL2-8B",
|
|
7210
7201
|
"model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
|
|
7211
7202
|
},
|
|
7203
|
+
{
|
|
7204
|
+
"model_format": "awq",
|
|
7205
|
+
"model_size_in_billions": 8,
|
|
7206
|
+
"quantizations": [
|
|
7207
|
+
"Int4"
|
|
7208
|
+
],
|
|
7209
|
+
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
|
|
7210
|
+
"model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
|
|
7211
|
+
},
|
|
7212
7212
|
{
|
|
7213
7213
|
"model_format": "pytorch",
|
|
7214
7214
|
"model_size_in_billions": 26,
|
|
@@ -7342,6 +7342,51 @@
|
|
|
7342
7342
|
]
|
|
7343
7343
|
}
|
|
7344
7344
|
},
|
|
7345
|
+
{
|
|
7346
|
+
"version": 1,
|
|
7347
|
+
"context_length": 8192,
|
|
7348
|
+
"model_name": "cogvlm2-video-llama3-chat",
|
|
7349
|
+
"model_lang": [
|
|
7350
|
+
"en",
|
|
7351
|
+
"zh"
|
|
7352
|
+
],
|
|
7353
|
+
"model_ability": [
|
|
7354
|
+
"chat",
|
|
7355
|
+
"vision"
|
|
7356
|
+
],
|
|
7357
|
+
"model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
|
|
7358
|
+
"model_specs": [
|
|
7359
|
+
{
|
|
7360
|
+
"model_format": "pytorch",
|
|
7361
|
+
"model_size_in_billions": 12,
|
|
7362
|
+
"quantizations": [
|
|
7363
|
+
"4-bit",
|
|
7364
|
+
"8-bit",
|
|
7365
|
+
"none"
|
|
7366
|
+
],
|
|
7367
|
+
"model_id": "THUDM/cogvlm2-video-llama3-chat",
|
|
7368
|
+
"model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
|
|
7369
|
+
}
|
|
7370
|
+
],
|
|
7371
|
+
"prompt_style": {
|
|
7372
|
+
"style_name": "LLAMA3",
|
|
7373
|
+
"system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
|
|
7374
|
+
"roles": [
|
|
7375
|
+
"user",
|
|
7376
|
+
"assistant"
|
|
7377
|
+
],
|
|
7378
|
+
"intra_message_sep": "\n\n",
|
|
7379
|
+
"inter_message_sep": "<|eot_id|>",
|
|
7380
|
+
"stop_token_ids": [
|
|
7381
|
+
128001,
|
|
7382
|
+
128009
|
|
7383
|
+
],
|
|
7384
|
+
"stop": [
|
|
7385
|
+
"<|end_of_text|>",
|
|
7386
|
+
"<|eot_id|>"
|
|
7387
|
+
]
|
|
7388
|
+
}
|
|
7389
|
+
},
|
|
7345
7390
|
{
|
|
7346
7391
|
"version": 1,
|
|
7347
7392
|
"context_length": 8192,
|
|
@@ -271,6 +271,8 @@ VLLM_CLASSES: List[Type[LLM]] = []
|
|
|
271
271
|
|
|
272
272
|
MLX_CLASSES: List[Type[LLM]] = []
|
|
273
273
|
|
|
274
|
+
LMDEPLOY_CLASSES: List[Type[LLM]] = []
|
|
275
|
+
|
|
274
276
|
LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
|
|
275
277
|
SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}
|
|
276
278
|
|
|
@@ -1002,6 +1004,11 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
|
|
|
1002
1004
|
if not is_valid_model_name(llm_family.model_name):
|
|
1003
1005
|
raise ValueError(f"Invalid model name {llm_family.model_name}.")
|
|
1004
1006
|
|
|
1007
|
+
for spec in llm_family.model_specs:
|
|
1008
|
+
model_uri = spec.model_uri
|
|
1009
|
+
if model_uri and not is_valid_model_uri(model_uri):
|
|
1010
|
+
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
1011
|
+
|
|
1005
1012
|
with UD_LLM_FAMILIES_LOCK:
|
|
1006
1013
|
for family in BUILTIN_LLM_FAMILIES + UD_LLM_FAMILIES:
|
|
1007
1014
|
if llm_family.model_name == family.model_name:
|
|
@@ -1013,12 +1020,6 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
|
|
|
1013
1020
|
generate_engine_config_by_model_family(llm_family)
|
|
1014
1021
|
|
|
1015
1022
|
if persist:
|
|
1016
|
-
# We only validate model URL when persist is True.
|
|
1017
|
-
for spec in llm_family.model_specs:
|
|
1018
|
-
model_uri = spec.model_uri
|
|
1019
|
-
if model_uri and not is_valid_model_uri(model_uri):
|
|
1020
|
-
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
1021
|
-
|
|
1022
1023
|
persist_path = os.path.join(
|
|
1023
1024
|
XINFERENCE_MODEL_DIR, "llm", f"{llm_family.model_name}.json"
|
|
1024
1025
|
)
|
|
@@ -4778,10 +4778,10 @@
|
|
|
4778
4778
|
"model_revision": "master"
|
|
4779
4779
|
},
|
|
4780
4780
|
{
|
|
4781
|
-
"model_format": "
|
|
4781
|
+
"model_format": "awq",
|
|
4782
4782
|
"model_size_in_billions": 2,
|
|
4783
4783
|
"quantizations": [
|
|
4784
|
-
"
|
|
4784
|
+
"Int4"
|
|
4785
4785
|
],
|
|
4786
4786
|
"model_hub": "modelscope",
|
|
4787
4787
|
"model_id": "OpenGVLab/InternVL2-2B-AWQ",
|
|
@@ -4812,10 +4812,10 @@
|
|
|
4812
4812
|
"model_revision": "master"
|
|
4813
4813
|
},
|
|
4814
4814
|
{
|
|
4815
|
-
"model_format": "
|
|
4815
|
+
"model_format": "awq",
|
|
4816
4816
|
"model_size_in_billions": 8,
|
|
4817
4817
|
"quantizations": [
|
|
4818
|
-
"
|
|
4818
|
+
"Int4"
|
|
4819
4819
|
],
|
|
4820
4820
|
"model_hub": "modelscope",
|
|
4821
4821
|
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
|
|
@@ -4834,10 +4834,10 @@
|
|
|
4834
4834
|
"model_revision": "master"
|
|
4835
4835
|
},
|
|
4836
4836
|
{
|
|
4837
|
-
"model_format": "
|
|
4837
|
+
"model_format": "awq",
|
|
4838
4838
|
"model_size_in_billions": 26,
|
|
4839
4839
|
"quantizations": [
|
|
4840
|
-
"
|
|
4840
|
+
"Int4"
|
|
4841
4841
|
],
|
|
4842
4842
|
"model_hub": "modelscope",
|
|
4843
4843
|
"model_id": "OpenGVLab/InternVL2-26B-AWQ",
|
|
@@ -4856,10 +4856,10 @@
|
|
|
4856
4856
|
"model_revision": "master"
|
|
4857
4857
|
},
|
|
4858
4858
|
{
|
|
4859
|
-
"model_format": "
|
|
4859
|
+
"model_format": "awq",
|
|
4860
4860
|
"model_size_in_billions": 40,
|
|
4861
4861
|
"quantizations": [
|
|
4862
|
-
"
|
|
4862
|
+
"Int4"
|
|
4863
4863
|
],
|
|
4864
4864
|
"model_hub": "modelscope",
|
|
4865
4865
|
"model_id": "OpenGVLab/InternVL2-40B-AWQ",
|
|
@@ -4878,10 +4878,10 @@
|
|
|
4878
4878
|
"model_revision": "master"
|
|
4879
4879
|
},
|
|
4880
4880
|
{
|
|
4881
|
-
"model_format": "
|
|
4881
|
+
"model_format": "awq",
|
|
4882
4882
|
"model_size_in_billions": 76,
|
|
4883
4883
|
"quantizations": [
|
|
4884
|
-
"
|
|
4884
|
+
"Int4"
|
|
4885
4885
|
],
|
|
4886
4886
|
"model_hub": "modelscope",
|
|
4887
4887
|
"model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
|
|
@@ -4962,6 +4962,52 @@
|
|
|
4962
4962
|
]
|
|
4963
4963
|
}
|
|
4964
4964
|
},
|
|
4965
|
+
{
|
|
4966
|
+
"version": 1,
|
|
4967
|
+
"context_length": 8192,
|
|
4968
|
+
"model_name": "cogvlm2-video-llama3-chat",
|
|
4969
|
+
"model_lang": [
|
|
4970
|
+
"en",
|
|
4971
|
+
"zh"
|
|
4972
|
+
],
|
|
4973
|
+
"model_ability": [
|
|
4974
|
+
"chat",
|
|
4975
|
+
"vision"
|
|
4976
|
+
],
|
|
4977
|
+
"model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
|
|
4978
|
+
"model_specs": [
|
|
4979
|
+
{
|
|
4980
|
+
"model_format": "pytorch",
|
|
4981
|
+
"model_size_in_billions": 12,
|
|
4982
|
+
"quantizations": [
|
|
4983
|
+
"4-bit",
|
|
4984
|
+
"8-bit",
|
|
4985
|
+
"none"
|
|
4986
|
+
],
|
|
4987
|
+
"model_hub": "modelscope",
|
|
4988
|
+
"model_id": "ZhipuAI/cogvlm2-video-llama3-chat",
|
|
4989
|
+
"model_revision": "master"
|
|
4990
|
+
}
|
|
4991
|
+
],
|
|
4992
|
+
"prompt_style": {
|
|
4993
|
+
"style_name": "LLAMA3",
|
|
4994
|
+
"system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
|
|
4995
|
+
"roles": [
|
|
4996
|
+
"user",
|
|
4997
|
+
"assistant"
|
|
4998
|
+
],
|
|
4999
|
+
"intra_message_sep": "\n\n",
|
|
5000
|
+
"inter_message_sep": "<|eot_id|>",
|
|
5001
|
+
"stop_token_ids": [
|
|
5002
|
+
128001,
|
|
5003
|
+
128009
|
|
5004
|
+
],
|
|
5005
|
+
"stop": [
|
|
5006
|
+
"<|end_of_text|>",
|
|
5007
|
+
"<|eot_id|>"
|
|
5008
|
+
]
|
|
5009
|
+
}
|
|
5010
|
+
},
|
|
4965
5011
|
{
|
|
4966
5012
|
"version": 1,
|
|
4967
5013
|
"context_length": 8192,
|
|
File without changes
|