xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +56 -8
- xinference/client/restful/restful_client.py +49 -4
- xinference/core/model.py +36 -4
- xinference/core/scheduler.py +2 -0
- xinference/core/supervisor.py +132 -15
- xinference/core/worker.py +239 -53
- xinference/deploy/cmdline.py +5 -0
- xinference/deploy/utils.py +33 -2
- xinference/model/audio/chattts.py +6 -6
- xinference/model/audio/core.py +23 -15
- xinference/model/core.py +12 -3
- xinference/model/embedding/core.py +25 -16
- xinference/model/flexible/__init__.py +40 -0
- xinference/model/flexible/core.py +228 -0
- xinference/model/flexible/launchers/__init__.py +15 -0
- xinference/model/flexible/launchers/transformers_launcher.py +63 -0
- xinference/model/flexible/utils.py +33 -0
- xinference/model/image/core.py +18 -14
- xinference/model/image/custom.py +1 -1
- xinference/model/llm/__init__.py +5 -2
- xinference/model/llm/core.py +3 -2
- xinference/model/llm/ggml/llamacpp.py +1 -10
- xinference/model/llm/llm_family.json +292 -36
- xinference/model/llm/llm_family.py +102 -53
- xinference/model/llm/llm_family_modelscope.json +247 -27
- xinference/model/llm/mlx/__init__.py +13 -0
- xinference/model/llm/mlx/core.py +408 -0
- xinference/model/llm/pytorch/chatglm.py +2 -9
- xinference/model/llm/pytorch/cogvlm2.py +206 -21
- xinference/model/llm/pytorch/core.py +213 -120
- xinference/model/llm/pytorch/glm4v.py +171 -15
- xinference/model/llm/pytorch/qwen_vl.py +168 -7
- xinference/model/llm/pytorch/utils.py +53 -62
- xinference/model/llm/utils.py +28 -7
- xinference/model/rerank/core.py +29 -25
- xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
- xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
- xinference/types.py +0 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
- xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
- xinference/model/llm/ggml/chatglm.py +0 -457
- xinference/thirdparty/ChatTTS/__init__.py +0 -1
- xinference/thirdparty/ChatTTS/core.py +0 -200
- xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
- xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/infer/api.py +0 -125
- xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
- xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
- xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
- xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
- xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
- xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
- xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
- /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
|
@@ -16,7 +16,7 @@ import gc
|
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
18
|
from collections import defaultdict
|
|
19
|
-
from typing import Dict, List, Optional, Tuple, Union, no_type_check
|
|
19
|
+
from typing import Dict, List, Literal, Optional, Tuple, Union, no_type_check
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
|
|
@@ -305,7 +305,10 @@ class EmbeddingModel:
|
|
|
305
305
|
)
|
|
306
306
|
|
|
307
307
|
|
|
308
|
-
def match_embedding(
|
|
308
|
+
def match_embedding(
|
|
309
|
+
model_name: str,
|
|
310
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
311
|
+
) -> EmbeddingModelSpec:
|
|
309
312
|
from ..utils import download_from_modelscope
|
|
310
313
|
from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
|
|
311
314
|
from .custom import get_user_defined_embeddings
|
|
@@ -315,29 +318,35 @@ def match_embedding(model_name: str) -> EmbeddingModelSpec:
|
|
|
315
318
|
if model_name == model_spec.model_name:
|
|
316
319
|
return model_spec
|
|
317
320
|
|
|
318
|
-
if
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
321
|
+
if download_hub == "modelscope" and model_name in MODELSCOPE_EMBEDDING_MODELS:
|
|
322
|
+
logger.debug(f"Embedding model {model_name} found in ModelScope.")
|
|
323
|
+
return MODELSCOPE_EMBEDDING_MODELS[model_name]
|
|
324
|
+
elif download_hub == "huggingface" and model_name in BUILTIN_EMBEDDING_MODELS:
|
|
325
|
+
logger.debug(f"Embedding model {model_name} found in Huggingface.")
|
|
326
|
+
return BUILTIN_EMBEDDING_MODELS[model_name]
|
|
327
|
+
elif download_from_modelscope() and model_name in MODELSCOPE_EMBEDDING_MODELS:
|
|
328
|
+
logger.debug(f"Embedding model {model_name} found in ModelScope.")
|
|
329
|
+
return MODELSCOPE_EMBEDDING_MODELS[model_name]
|
|
330
|
+
elif model_name in BUILTIN_EMBEDDING_MODELS:
|
|
331
|
+
logger.debug(f"Embedding model {model_name} found in Huggingface.")
|
|
329
332
|
return BUILTIN_EMBEDDING_MODELS[model_name]
|
|
330
333
|
else:
|
|
331
334
|
raise ValueError(
|
|
332
335
|
f"Embedding model {model_name} not found, available"
|
|
333
|
-
f"
|
|
336
|
+
f"Huggingface: {BUILTIN_EMBEDDING_MODELS.keys()}"
|
|
337
|
+
f"ModelScope: {MODELSCOPE_EMBEDDING_MODELS.keys()}"
|
|
334
338
|
)
|
|
335
339
|
|
|
336
340
|
|
|
337
341
|
def create_embedding_model_instance(
|
|
338
|
-
subpool_addr: str,
|
|
342
|
+
subpool_addr: str,
|
|
343
|
+
devices: List[str],
|
|
344
|
+
model_uid: str,
|
|
345
|
+
model_name: str,
|
|
346
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
347
|
+
**kwargs,
|
|
339
348
|
) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
|
|
340
|
-
model_spec = match_embedding(model_name)
|
|
349
|
+
model_spec = match_embedding(model_name, download_hub)
|
|
341
350
|
model_path = cache(model_spec)
|
|
342
351
|
model = EmbeddingModel(model_uid, model_path, **kwargs)
|
|
343
352
|
model_description = EmbeddingModelDescription(
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import codecs
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
from ...constants import XINFERENCE_MODEL_DIR
|
|
20
|
+
from .core import (
|
|
21
|
+
FLEXIBLE_MODEL_DESCRIPTIONS,
|
|
22
|
+
FlexibleModel,
|
|
23
|
+
FlexibleModelSpec,
|
|
24
|
+
generate_flexible_model_description,
|
|
25
|
+
get_flexible_model_descriptions,
|
|
26
|
+
get_flexible_models,
|
|
27
|
+
register_flexible_model,
|
|
28
|
+
unregister_flexible_model,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
model_dir = os.path.join(XINFERENCE_MODEL_DIR, "flexible")
|
|
32
|
+
if os.path.isdir(model_dir):
|
|
33
|
+
for f in os.listdir(model_dir):
|
|
34
|
+
with codecs.open(os.path.join(model_dir, f), encoding="utf-8") as fd:
|
|
35
|
+
model_spec = FlexibleModelSpec.parse_obj(json.load(fd))
|
|
36
|
+
register_flexible_model(model_spec, persist=False)
|
|
37
|
+
|
|
38
|
+
# register model description
|
|
39
|
+
for model in get_flexible_models():
|
|
40
|
+
FLEXIBLE_MODEL_DESCRIPTIONS.update(generate_flexible_model_description(model))
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
from threading import Lock
|
|
20
|
+
from typing import Dict, List, Optional, Tuple
|
|
21
|
+
|
|
22
|
+
from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
|
|
23
|
+
from ..core import CacheableModelSpec, ModelDescription
|
|
24
|
+
from .utils import get_launcher
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
FLEXIBLE_MODEL_LOCK = Lock()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FlexibleModelSpec(CacheableModelSpec):
|
|
32
|
+
model_id: Optional[str] # type: ignore
|
|
33
|
+
model_description: Optional[str]
|
|
34
|
+
model_uri: Optional[str]
|
|
35
|
+
launcher: str
|
|
36
|
+
launcher_args: Optional[str]
|
|
37
|
+
|
|
38
|
+
def parser_args(self):
|
|
39
|
+
return json.loads(self.launcher_args)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FlexibleModelDescription(ModelDescription):
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
address: Optional[str],
|
|
46
|
+
devices: Optional[List[str]],
|
|
47
|
+
model_spec: FlexibleModelSpec,
|
|
48
|
+
model_path: Optional[str] = None,
|
|
49
|
+
):
|
|
50
|
+
super().__init__(address, devices, model_path=model_path)
|
|
51
|
+
self._model_spec = model_spec
|
|
52
|
+
|
|
53
|
+
def to_dict(self):
|
|
54
|
+
return {
|
|
55
|
+
"model_type": "flexible",
|
|
56
|
+
"address": self.address,
|
|
57
|
+
"accelerators": self.devices,
|
|
58
|
+
"model_name": self._model_spec.model_name,
|
|
59
|
+
"launcher": self._model_spec.launcher,
|
|
60
|
+
"launcher_args": self._model_spec.launcher_args,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def get_model_version(self) -> str:
|
|
64
|
+
return f"{self._model_spec.model_name}"
|
|
65
|
+
|
|
66
|
+
def to_version_info(self):
|
|
67
|
+
return {
|
|
68
|
+
"model_version": self.get_model_version(),
|
|
69
|
+
"cache_status": True,
|
|
70
|
+
"model_file_location": self._model_spec.model_uri,
|
|
71
|
+
"launcher": self._model_spec.launcher,
|
|
72
|
+
"launcher_args": self._model_spec.launcher_args,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def generate_flexible_model_description(
|
|
77
|
+
model_spec: FlexibleModelSpec,
|
|
78
|
+
) -> Dict[str, List[Dict]]:
|
|
79
|
+
res = defaultdict(list)
|
|
80
|
+
res[model_spec.model_name].append(
|
|
81
|
+
FlexibleModelDescription(None, None, model_spec).to_version_info()
|
|
82
|
+
)
|
|
83
|
+
return res
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
FLEXIBLE_MODELS: List[FlexibleModelSpec] = []
|
|
87
|
+
FLEXIBLE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_flexible_models():
|
|
91
|
+
with FLEXIBLE_MODEL_LOCK:
|
|
92
|
+
return FLEXIBLE_MODELS.copy()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_flexible_model_descriptions():
|
|
96
|
+
import copy
|
|
97
|
+
|
|
98
|
+
return copy.deepcopy(FLEXIBLE_MODEL_DESCRIPTIONS)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def register_flexible_model(model_spec: FlexibleModelSpec, persist: bool):
|
|
102
|
+
from ..utils import is_valid_model_name
|
|
103
|
+
|
|
104
|
+
if not is_valid_model_name(model_spec.model_name):
|
|
105
|
+
raise ValueError(f"Invalid model name {model_spec.model_name}.")
|
|
106
|
+
|
|
107
|
+
if model_spec.launcher_args:
|
|
108
|
+
try:
|
|
109
|
+
model_spec.parser_args()
|
|
110
|
+
except Exception:
|
|
111
|
+
raise ValueError(f"Invalid model launcher args {model_spec.launcher_args}.")
|
|
112
|
+
|
|
113
|
+
with FLEXIBLE_MODEL_LOCK:
|
|
114
|
+
for model_name in [spec.model_name for spec in FLEXIBLE_MODELS]:
|
|
115
|
+
if model_spec.model_name == model_name:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"Model name conflicts with existing model {model_spec.model_name}"
|
|
118
|
+
)
|
|
119
|
+
FLEXIBLE_MODELS.append(model_spec)
|
|
120
|
+
|
|
121
|
+
if persist:
|
|
122
|
+
persist_path = os.path.join(
|
|
123
|
+
XINFERENCE_MODEL_DIR, "flexible", f"{model_spec.model_name}.json"
|
|
124
|
+
)
|
|
125
|
+
os.makedirs(os.path.dirname(persist_path), exist_ok=True)
|
|
126
|
+
with open(persist_path, mode="w") as fd:
|
|
127
|
+
fd.write(model_spec.json())
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def unregister_flexible_model(model_name: str, raise_error: bool = True):
|
|
131
|
+
with FLEXIBLE_MODEL_LOCK:
|
|
132
|
+
model_spec = None
|
|
133
|
+
for i, f in enumerate(FLEXIBLE_MODELS):
|
|
134
|
+
if f.model_name == model_name:
|
|
135
|
+
model_spec = f
|
|
136
|
+
break
|
|
137
|
+
if model_spec:
|
|
138
|
+
FLEXIBLE_MODELS.remove(model_spec)
|
|
139
|
+
|
|
140
|
+
persist_path = os.path.join(
|
|
141
|
+
XINFERENCE_MODEL_DIR, "flexible", f"{model_spec.model_name}.json"
|
|
142
|
+
)
|
|
143
|
+
if os.path.exists(persist_path):
|
|
144
|
+
os.remove(persist_path)
|
|
145
|
+
|
|
146
|
+
cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
147
|
+
if os.path.exists(cache_dir):
|
|
148
|
+
logger.warning(
|
|
149
|
+
f"Remove the cache of user-defined model {model_spec.model_name}. "
|
|
150
|
+
f"Cache directory: {cache_dir}"
|
|
151
|
+
)
|
|
152
|
+
if os.path.islink(cache_dir):
|
|
153
|
+
os.remove(cache_dir)
|
|
154
|
+
else:
|
|
155
|
+
logger.warning(
|
|
156
|
+
f"Cache directory is not a soft link, please remove it manually."
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
if raise_error:
|
|
160
|
+
raise ValueError(f"Model {model_name} not found")
|
|
161
|
+
else:
|
|
162
|
+
logger.warning(f"Model {model_name} not found")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class FlexibleModel:
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
model_uid: str,
|
|
169
|
+
model_path: str,
|
|
170
|
+
device: Optional[str] = None,
|
|
171
|
+
config: Optional[Dict] = None,
|
|
172
|
+
):
|
|
173
|
+
self._model_uid = model_uid
|
|
174
|
+
self._model_path = model_path
|
|
175
|
+
self._device = device
|
|
176
|
+
self._config = config
|
|
177
|
+
|
|
178
|
+
def load(self):
|
|
179
|
+
"""
|
|
180
|
+
Load the model.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
def infer(self, **kwargs):
|
|
184
|
+
"""
|
|
185
|
+
Call model to inference.
|
|
186
|
+
"""
|
|
187
|
+
raise NotImplementedError("infer method not implemented.")
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def model_uid(self):
|
|
191
|
+
return self._model_uid
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def model_path(self):
|
|
195
|
+
return self._model_path
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def device(self):
|
|
199
|
+
return self._device
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def config(self):
|
|
203
|
+
return self._config
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def match_flexible_model(model_name):
|
|
207
|
+
for model_spec in get_flexible_models():
|
|
208
|
+
if model_name == model_spec.model_name:
|
|
209
|
+
return model_spec
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def create_flexible_model_instance(
|
|
213
|
+
subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
|
|
214
|
+
) -> Tuple[FlexibleModel, FlexibleModelDescription]:
|
|
215
|
+
model_spec = match_flexible_model(model_name)
|
|
216
|
+
model_path = model_spec.model_uri
|
|
217
|
+
launcher_name = model_spec.launcher
|
|
218
|
+
launcher_args = model_spec.parser_args()
|
|
219
|
+
kwargs.update(launcher_args)
|
|
220
|
+
|
|
221
|
+
model = get_launcher(launcher_name)(
|
|
222
|
+
model_uid=model_uid, model_spec=model_spec, **kwargs
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
model_description = FlexibleModelDescription(
|
|
226
|
+
subpool_addr, devices, model_spec, model_path=model_path
|
|
227
|
+
)
|
|
228
|
+
return model, model_description
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .transformers_launcher import launcher as transformers
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from transformers import pipeline
|
|
16
|
+
|
|
17
|
+
from ..core import FlexibleModel, FlexibleModelSpec
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MockModel(FlexibleModel):
|
|
21
|
+
def infer(self, **kwargs):
|
|
22
|
+
return kwargs
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AutoModel(FlexibleModel):
|
|
26
|
+
def load(self):
|
|
27
|
+
config = self.config or {}
|
|
28
|
+
self._pipeline = pipeline(model=self.model_path, device=self.device, **config)
|
|
29
|
+
|
|
30
|
+
def infer(self, **kwargs):
|
|
31
|
+
return self._pipeline(**kwargs)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TransformersTextClassificationModel(FlexibleModel):
|
|
35
|
+
def load(self):
|
|
36
|
+
config = self.config or {}
|
|
37
|
+
|
|
38
|
+
self._pipeline = pipeline(model=self._model_path, device=self._device, **config)
|
|
39
|
+
|
|
40
|
+
def infer(self, **kwargs):
|
|
41
|
+
return self._pipeline(**kwargs)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> FlexibleModel:
|
|
45
|
+
task = kwargs.get("task")
|
|
46
|
+
device = kwargs.get("device")
|
|
47
|
+
|
|
48
|
+
model_path = model_spec.model_uri
|
|
49
|
+
if model_path is None:
|
|
50
|
+
raise ValueError("model_path required")
|
|
51
|
+
|
|
52
|
+
if task == "text-classification":
|
|
53
|
+
return TransformersTextClassificationModel(
|
|
54
|
+
model_uid=model_uid, model_path=model_path, device=device, config=kwargs
|
|
55
|
+
)
|
|
56
|
+
elif task == "mock":
|
|
57
|
+
return MockModel(
|
|
58
|
+
model_uid=model_uid, model_path=model_path, device=device, config=kwargs
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
return AutoModel(
|
|
62
|
+
model_uid=model_uid, model_path=model_path, device=device, config=kwargs
|
|
63
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import importlib
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_launcher(launcher_name: str):
|
|
19
|
+
try:
|
|
20
|
+
i = launcher_name.rfind(".")
|
|
21
|
+
if i != -1:
|
|
22
|
+
module = importlib.import_module(launcher_name[:i])
|
|
23
|
+
fn = getattr(module, launcher_name[i + 1 :])
|
|
24
|
+
else:
|
|
25
|
+
importlib.import_module(launcher_name)
|
|
26
|
+
fn = locals().get(launcher_name)
|
|
27
|
+
|
|
28
|
+
if fn is None:
|
|
29
|
+
raise ValueError(f"Launcher {launcher_name} not found.")
|
|
30
|
+
|
|
31
|
+
return fn
|
|
32
|
+
except ImportError as e:
|
|
33
|
+
raise ImportError(f"Failed to import {launcher_name}: {e}")
|
xinference/model/image/core.py
CHANGED
|
@@ -15,7 +15,7 @@ import collections.abc
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
from collections import defaultdict
|
|
18
|
-
from typing import Dict, List, Optional, Tuple
|
|
18
|
+
from typing import Dict, List, Literal, Optional, Tuple
|
|
19
19
|
|
|
20
20
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
21
21
|
from ...types import PeftModelConfig
|
|
@@ -117,7 +117,10 @@ def generate_image_description(
|
|
|
117
117
|
return res
|
|
118
118
|
|
|
119
119
|
|
|
120
|
-
def match_diffusion(
|
|
120
|
+
def match_diffusion(
|
|
121
|
+
model_name: str,
|
|
122
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
123
|
+
) -> ImageModelFamilyV1:
|
|
121
124
|
from ..utils import download_from_modelscope
|
|
122
125
|
from . import BUILTIN_IMAGE_MODELS, MODELSCOPE_IMAGE_MODELS
|
|
123
126
|
from .custom import get_user_defined_images
|
|
@@ -126,17 +129,17 @@ def match_diffusion(model_name: str) -> ImageModelFamilyV1:
|
|
|
126
129
|
if model_spec.model_name == model_name:
|
|
127
130
|
return model_spec
|
|
128
131
|
|
|
129
|
-
if
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
132
|
+
if download_hub == "modelscope" and model_name in MODELSCOPE_IMAGE_MODELS:
|
|
133
|
+
logger.debug(f"Image model {model_name} found in ModelScope.")
|
|
134
|
+
return MODELSCOPE_IMAGE_MODELS[model_name]
|
|
135
|
+
elif download_hub == "huggingface" and model_name in BUILTIN_IMAGE_MODELS:
|
|
136
|
+
logger.debug(f"Image model {model_name} found in Huggingface.")
|
|
137
|
+
return BUILTIN_IMAGE_MODELS[model_name]
|
|
138
|
+
elif download_from_modelscope() and model_name in MODELSCOPE_IMAGE_MODELS:
|
|
139
|
+
logger.debug(f"Image model {model_name} found in ModelScope.")
|
|
140
|
+
return MODELSCOPE_IMAGE_MODELS[model_name]
|
|
141
|
+
elif model_name in BUILTIN_IMAGE_MODELS:
|
|
142
|
+
logger.debug(f"Image model {model_name} found in Huggingface.")
|
|
140
143
|
return BUILTIN_IMAGE_MODELS[model_name]
|
|
141
144
|
else:
|
|
142
145
|
raise ValueError(
|
|
@@ -183,9 +186,10 @@ def create_image_model_instance(
|
|
|
183
186
|
model_uid: str,
|
|
184
187
|
model_name: str,
|
|
185
188
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
189
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
186
190
|
**kwargs,
|
|
187
191
|
) -> Tuple[DiffusionModel, ImageModelDescription]:
|
|
188
|
-
model_spec = match_diffusion(model_name)
|
|
192
|
+
model_spec = match_diffusion(model_name, download_hub)
|
|
189
193
|
controlnet = kwargs.get("controlnet")
|
|
190
194
|
# Handle controlnet
|
|
191
195
|
if controlnet is not None:
|
xinference/model/image/custom.py
CHANGED
|
@@ -66,7 +66,7 @@ def register_image(model_spec: CustomImageModelFamilyV1, persist: bool):
|
|
|
66
66
|
raise ValueError(f"Invalid model URI {model_uri}")
|
|
67
67
|
|
|
68
68
|
persist_path = os.path.join(
|
|
69
|
-
XINFERENCE_MODEL_DIR, "image", f"{model_spec.
|
|
69
|
+
XINFERENCE_MODEL_DIR, "image", f"{model_spec.model_name}.json"
|
|
70
70
|
)
|
|
71
71
|
os.makedirs(os.path.dirname(persist_path), exist_ok=True)
|
|
72
72
|
with open(persist_path, "w") as f:
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -34,6 +34,7 @@ from .llm_family import (
|
|
|
34
34
|
BUILTIN_MODELSCOPE_LLM_FAMILIES,
|
|
35
35
|
LLAMA_CLASSES,
|
|
36
36
|
LLM_ENGINES,
|
|
37
|
+
MLX_CLASSES,
|
|
37
38
|
SGLANG_CLASSES,
|
|
38
39
|
SUPPORTED_ENGINES,
|
|
39
40
|
TRANSFORMERS_CLASSES,
|
|
@@ -42,6 +43,7 @@ from .llm_family import (
|
|
|
42
43
|
GgmlLLMSpecV1,
|
|
43
44
|
LLMFamilyV1,
|
|
44
45
|
LLMSpecV1,
|
|
46
|
+
MLXLLMSpecV1,
|
|
45
47
|
PromptStyleV1,
|
|
46
48
|
PytorchLLMSpecV1,
|
|
47
49
|
get_cache_status,
|
|
@@ -110,8 +112,8 @@ def generate_engine_config_by_model_family(model_family):
|
|
|
110
112
|
|
|
111
113
|
|
|
112
114
|
def _install():
|
|
113
|
-
from .ggml.chatglm import ChatglmCppChatModel
|
|
114
115
|
from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
|
|
116
|
+
from .mlx.core import MLXChatModel, MLXModel
|
|
115
117
|
from .pytorch.baichuan import BaichuanPytorchChatModel
|
|
116
118
|
from .pytorch.chatglm import ChatglmPytorchChatModel
|
|
117
119
|
from .pytorch.cogvlm2 import CogVLM2Model
|
|
@@ -140,13 +142,13 @@ def _install():
|
|
|
140
142
|
# register llm classes.
|
|
141
143
|
LLAMA_CLASSES.extend(
|
|
142
144
|
[
|
|
143
|
-
ChatglmCppChatModel,
|
|
144
145
|
LlamaCppChatModel,
|
|
145
146
|
LlamaCppModel,
|
|
146
147
|
]
|
|
147
148
|
)
|
|
148
149
|
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
|
|
149
150
|
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel])
|
|
151
|
+
MLX_CLASSES.extend([MLXModel, MLXChatModel])
|
|
150
152
|
TRANSFORMERS_CLASSES.extend(
|
|
151
153
|
[
|
|
152
154
|
BaichuanPytorchChatModel,
|
|
@@ -176,6 +178,7 @@ def _install():
|
|
|
176
178
|
SUPPORTED_ENGINES["SGLang"] = SGLANG_CLASSES
|
|
177
179
|
SUPPORTED_ENGINES["Transformers"] = TRANSFORMERS_CLASSES
|
|
178
180
|
SUPPORTED_ENGINES["llama.cpp"] = LLAMA_CLASSES
|
|
181
|
+
SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
|
|
179
182
|
|
|
180
183
|
json_path = os.path.join(
|
|
181
184
|
os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
|
xinference/model/llm/core.py
CHANGED
|
@@ -20,7 +20,7 @@ import platform
|
|
|
20
20
|
from abc import abstractmethod
|
|
21
21
|
from collections import defaultdict
|
|
22
22
|
from functools import lru_cache
|
|
23
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
|
23
|
+
from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
|
|
24
24
|
|
|
25
25
|
from ...core.utils import parse_replica_model_uid
|
|
26
26
|
from ...types import PeftModelConfig
|
|
@@ -193,6 +193,7 @@ def create_llm_model_instance(
|
|
|
193
193
|
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
194
194
|
quantization: Optional[str] = None,
|
|
195
195
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
196
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
196
197
|
**kwargs,
|
|
197
198
|
) -> Tuple[LLM, LLMDescription]:
|
|
198
199
|
from .llm_family import cache, check_engine_by_spec_parameters, match_llm
|
|
@@ -200,7 +201,7 @@ def create_llm_model_instance(
|
|
|
200
201
|
if model_engine is None:
|
|
201
202
|
raise ValueError("model_engine is required for LLM model")
|
|
202
203
|
match_result = match_llm(
|
|
203
|
-
model_name, model_format, model_size_in_billions, quantization
|
|
204
|
+
model_name, model_format, model_size_in_billions, quantization, download_hub
|
|
204
205
|
)
|
|
205
206
|
|
|
206
207
|
if not match_result:
|
|
@@ -25,7 +25,6 @@ from ....types import (
|
|
|
25
25
|
CompletionChunk,
|
|
26
26
|
CompletionUsage,
|
|
27
27
|
CreateCompletionLlamaCpp,
|
|
28
|
-
Embedding,
|
|
29
28
|
LlamaCppGenerateConfig,
|
|
30
29
|
LlamaCppModelConfig,
|
|
31
30
|
)
|
|
@@ -65,7 +64,6 @@ class LlamaCppModel(LLM):
|
|
|
65
64
|
|
|
66
65
|
if self.model_family.context_length:
|
|
67
66
|
llamacpp_model_config.setdefault("n_ctx", self.model_family.context_length)
|
|
68
|
-
llamacpp_model_config.setdefault("embedding", True)
|
|
69
67
|
llamacpp_model_config.setdefault("use_mmap", False)
|
|
70
68
|
llamacpp_model_config.setdefault("use_mlock", True)
|
|
71
69
|
|
|
@@ -185,7 +183,7 @@ class LlamaCppModel(LLM):
|
|
|
185
183
|
) -> bool:
|
|
186
184
|
if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
|
|
187
185
|
return False
|
|
188
|
-
if "
|
|
186
|
+
if "qwen" in llm_family.model_name:
|
|
189
187
|
return False
|
|
190
188
|
if "generate" not in llm_family.model_ability:
|
|
191
189
|
return False
|
|
@@ -261,11 +259,6 @@ class LlamaCppModel(LLM):
|
|
|
261
259
|
else:
|
|
262
260
|
return generator_wrapper(prompt, generate_config)
|
|
263
261
|
|
|
264
|
-
def create_embedding(self, input: Union[str, List[str]]) -> Embedding:
|
|
265
|
-
assert self._llm is not None
|
|
266
|
-
embedding = self._llm.create_embedding(input)
|
|
267
|
-
return embedding
|
|
268
|
-
|
|
269
262
|
|
|
270
263
|
class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
271
264
|
def __init__(
|
|
@@ -292,8 +285,6 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
292
285
|
) -> bool:
|
|
293
286
|
if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
|
|
294
287
|
return False
|
|
295
|
-
if "chatglm" in llm_family.model_name:
|
|
296
|
-
return False
|
|
297
288
|
if "chat" not in llm_family.model_ability:
|
|
298
289
|
return False
|
|
299
290
|
return True
|