xinference 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/client/restful/async_restful_client.py +8 -13
- xinference/client/restful/restful_client.py +6 -2
- xinference/core/chat_interface.py +6 -4
- xinference/core/media_interface.py +5 -0
- xinference/core/model.py +1 -5
- xinference/core/supervisor.py +117 -68
- xinference/core/worker.py +49 -37
- xinference/deploy/test/test_cmdline.py +2 -6
- xinference/model/audio/__init__.py +26 -23
- xinference/model/audio/chattts.py +3 -2
- xinference/model/audio/core.py +49 -98
- xinference/model/audio/cosyvoice.py +3 -2
- xinference/model/audio/custom.py +28 -73
- xinference/model/audio/f5tts.py +3 -2
- xinference/model/audio/f5tts_mlx.py +3 -2
- xinference/model/audio/fish_speech.py +3 -2
- xinference/model/audio/funasr.py +17 -4
- xinference/model/audio/kokoro.py +3 -2
- xinference/model/audio/megatts.py +3 -2
- xinference/model/audio/melotts.py +3 -2
- xinference/model/audio/model_spec.json +572 -171
- xinference/model/audio/utils.py +0 -6
- xinference/model/audio/whisper.py +3 -2
- xinference/model/audio/whisper_mlx.py +3 -2
- xinference/model/cache_manager.py +141 -0
- xinference/model/core.py +6 -49
- xinference/model/custom.py +174 -0
- xinference/model/embedding/__init__.py +67 -56
- xinference/model/embedding/cache_manager.py +35 -0
- xinference/model/embedding/core.py +104 -84
- xinference/model/embedding/custom.py +55 -78
- xinference/model/embedding/embed_family.py +80 -31
- xinference/model/embedding/flag/core.py +21 -5
- xinference/model/embedding/llama_cpp/__init__.py +0 -0
- xinference/model/embedding/llama_cpp/core.py +234 -0
- xinference/model/embedding/model_spec.json +968 -103
- xinference/model/embedding/sentence_transformers/core.py +30 -20
- xinference/model/embedding/vllm/core.py +11 -5
- xinference/model/flexible/__init__.py +8 -2
- xinference/model/flexible/core.py +26 -119
- xinference/model/flexible/custom.py +69 -0
- xinference/model/flexible/launchers/image_process_launcher.py +1 -0
- xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
- xinference/model/flexible/launchers/transformers_launcher.py +15 -3
- xinference/model/flexible/launchers/yolo_launcher.py +5 -1
- xinference/model/image/__init__.py +20 -20
- xinference/model/image/cache_manager.py +62 -0
- xinference/model/image/core.py +70 -182
- xinference/model/image/custom.py +28 -72
- xinference/model/image/model_spec.json +402 -119
- xinference/model/image/ocr/got_ocr2.py +3 -2
- xinference/model/image/stable_diffusion/core.py +22 -7
- xinference/model/image/stable_diffusion/mlx.py +6 -6
- xinference/model/image/utils.py +2 -2
- xinference/model/llm/__init__.py +71 -94
- xinference/model/llm/cache_manager.py +292 -0
- xinference/model/llm/core.py +37 -111
- xinference/model/llm/custom.py +88 -0
- xinference/model/llm/llama_cpp/core.py +5 -7
- xinference/model/llm/llm_family.json +16260 -8151
- xinference/model/llm/llm_family.py +138 -839
- xinference/model/llm/lmdeploy/core.py +5 -7
- xinference/model/llm/memory.py +3 -4
- xinference/model/llm/mlx/core.py +6 -8
- xinference/model/llm/reasoning_parser.py +3 -1
- xinference/model/llm/sglang/core.py +32 -14
- xinference/model/llm/transformers/chatglm.py +3 -7
- xinference/model/llm/transformers/core.py +49 -27
- xinference/model/llm/transformers/deepseek_v2.py +2 -2
- xinference/model/llm/transformers/gemma3.py +2 -2
- xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
- xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
- xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
- xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
- xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
- xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
- xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
- xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
- xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
- xinference/model/llm/transformers/opt.py +3 -7
- xinference/model/llm/utils.py +34 -49
- xinference/model/llm/vllm/core.py +77 -27
- xinference/model/llm/vllm/xavier/engine.py +5 -3
- xinference/model/llm/vllm/xavier/scheduler.py +10 -6
- xinference/model/llm/vllm/xavier/transfer.py +1 -1
- xinference/model/rerank/__init__.py +26 -25
- xinference/model/rerank/core.py +47 -87
- xinference/model/rerank/custom.py +25 -71
- xinference/model/rerank/model_spec.json +158 -33
- xinference/model/rerank/utils.py +2 -2
- xinference/model/utils.py +115 -54
- xinference/model/video/__init__.py +13 -17
- xinference/model/video/core.py +44 -102
- xinference/model/video/diffusers.py +4 -3
- xinference/model/video/model_spec.json +90 -21
- xinference/types.py +5 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
- xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
- xinference/web/ui/src/locales/en.json +0 -1
- xinference/web/ui/src/locales/ja.json +0 -1
- xinference/web/ui/src/locales/ko.json +0 -1
- xinference/web/ui/src/locales/zh.json +0 -1
- {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
- {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
- xinference/model/audio/model_spec_modelscope.json +0 -231
- xinference/model/embedding/model_spec_modelscope.json +0 -293
- xinference/model/embedding/utils.py +0 -18
- xinference/model/image/model_spec_modelscope.json +0 -375
- xinference/model/llm/llama_cpp/memory.py +0 -457
- xinference/model/llm/llm_family_csghub.json +0 -56
- xinference/model/llm/llm_family_modelscope.json +0 -8700
- xinference/model/llm/llm_family_openmind_hub.json +0 -1019
- xinference/model/rerank/model_spec_modelscope.json +0 -85
- xinference/model/video/model_spec_modelscope.json +0 -184
- xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
- xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
- /xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
- {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
- {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0
|
@@ -12,98 +12,52 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
|
-
import os
|
|
16
|
-
from threading import Lock
|
|
17
15
|
from typing import List, Literal, Optional
|
|
18
16
|
|
|
19
|
-
from
|
|
20
|
-
from .core import
|
|
17
|
+
from ..custom import ModelRegistry
|
|
18
|
+
from .core import RerankModelFamilyV2
|
|
21
19
|
|
|
22
20
|
logger = logging.getLogger(__name__)
|
|
23
21
|
|
|
24
22
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class CustomRerankModelSpec(RerankModelSpec):
|
|
23
|
+
class CustomRerankModelFamilyV2(RerankModelFamilyV2):
|
|
24
|
+
version: Literal[2] = 2
|
|
29
25
|
model_id: Optional[str] # type: ignore
|
|
30
26
|
model_revision: Optional[str] # type: ignore
|
|
31
27
|
model_uri: Optional[str]
|
|
32
28
|
model_type: Literal["rerank"] = "rerank" # for frontend
|
|
33
29
|
|
|
34
30
|
|
|
35
|
-
UD_RERANKS: List[
|
|
31
|
+
UD_RERANKS: List[CustomRerankModelFamilyV2] = []
|
|
36
32
|
|
|
37
33
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
return UD_RERANKS.copy()
|
|
34
|
+
class RerankModelRegistry(ModelRegistry):
|
|
35
|
+
model_type = "rerank"
|
|
41
36
|
|
|
37
|
+
def __init__(self):
|
|
38
|
+
from . import BUILTIN_RERANK_MODELS
|
|
42
39
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
from . import BUILTIN_RERANK_MODELS, MODELSCOPE_RERANK_MODELS
|
|
40
|
+
super().__init__()
|
|
41
|
+
self.models = UD_RERANKS
|
|
42
|
+
self.builtin_models = list(BUILTIN_RERANK_MODELS.keys())
|
|
47
43
|
|
|
48
|
-
if not is_valid_model_name(model_spec.model_name):
|
|
49
|
-
raise ValueError(f"Invalid model name {model_spec.model_name}.")
|
|
50
44
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
45
|
+
def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
|
|
46
|
+
from ..custom import RegistryManager
|
|
54
47
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
list(BUILTIN_RERANK_MODELS.keys())
|
|
58
|
-
+ list(MODELSCOPE_RERANK_MODELS.keys())
|
|
59
|
-
+ [spec.model_name for spec in UD_RERANKS]
|
|
60
|
-
):
|
|
61
|
-
if model_spec.model_name == model_name:
|
|
62
|
-
raise ValueError(
|
|
63
|
-
f"Model name conflicts with existing model {model_spec.model_name}"
|
|
64
|
-
)
|
|
48
|
+
registry = RegistryManager.get_registry("rerank")
|
|
49
|
+
return registry.get_custom_models()
|
|
65
50
|
|
|
66
|
-
UD_RERANKS.append(model_spec)
|
|
67
51
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
with open(persist_path, mode="w") as fd:
|
|
74
|
-
fd.write(model_spec.json())
|
|
52
|
+
def register_rerank(model_spec: CustomRerankModelFamilyV2, persist: bool):
|
|
53
|
+
from ..custom import RegistryManager
|
|
54
|
+
|
|
55
|
+
registry = RegistryManager.get_registry("rerank")
|
|
56
|
+
registry.register(model_spec, persist)
|
|
75
57
|
|
|
76
58
|
|
|
77
59
|
def unregister_rerank(model_name: str, raise_error: bool = True):
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
model_spec = f
|
|
83
|
-
break
|
|
84
|
-
if model_spec:
|
|
85
|
-
UD_RERANKS.remove(model_spec)
|
|
86
|
-
|
|
87
|
-
persist_path = os.path.join(
|
|
88
|
-
XINFERENCE_MODEL_DIR, "rerank", f"{model_spec.model_name}.json"
|
|
89
|
-
)
|
|
90
|
-
if os.path.exists(persist_path):
|
|
91
|
-
os.remove(persist_path)
|
|
92
|
-
|
|
93
|
-
cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
94
|
-
if os.path.exists(cache_dir):
|
|
95
|
-
logger.warning(
|
|
96
|
-
f"Remove the cache of user-defined model {model_spec.model_name}. "
|
|
97
|
-
f"Cache directory: {cache_dir}"
|
|
98
|
-
)
|
|
99
|
-
if os.path.islink(cache_dir):
|
|
100
|
-
os.remove(cache_dir)
|
|
101
|
-
else:
|
|
102
|
-
logger.warning(
|
|
103
|
-
f"Cache directory is not a soft link, please remove it manually."
|
|
104
|
-
)
|
|
105
|
-
else:
|
|
106
|
-
if raise_error:
|
|
107
|
-
raise ValueError(f"Model {model_name} not found")
|
|
108
|
-
else:
|
|
109
|
-
logger.warning(f"Custom rerank model {model_name} not found")
|
|
60
|
+
from ..custom import RegistryManager
|
|
61
|
+
|
|
62
|
+
registry = RegistryManager.get_registry("rerank")
|
|
63
|
+
registry.unregister(model_name, raise_error)
|
|
@@ -1,90 +1,215 @@
|
|
|
1
1
|
[
|
|
2
2
|
{
|
|
3
|
+
"version": 2,
|
|
3
4
|
"model_name": "bge-reranker-large",
|
|
4
5
|
"type": "normal",
|
|
5
|
-
"language": [
|
|
6
|
+
"language": [
|
|
7
|
+
"en",
|
|
8
|
+
"zh"
|
|
9
|
+
],
|
|
6
10
|
"max_tokens": 512,
|
|
7
|
-
"
|
|
8
|
-
|
|
11
|
+
"model_src": {
|
|
12
|
+
"huggingface": {
|
|
13
|
+
"model_id": "BAAI/bge-reranker-large",
|
|
14
|
+
"model_revision": "27c9168d479987529781de8474dff94d69beca11"
|
|
15
|
+
},
|
|
16
|
+
"modelscope": {
|
|
17
|
+
"model_id": "Xorbits/bge-reranker-large",
|
|
18
|
+
"model_revision": "v0.0.1"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
9
21
|
},
|
|
10
22
|
{
|
|
23
|
+
"version": 2,
|
|
11
24
|
"model_name": "bge-reranker-base",
|
|
12
25
|
"type": "normal",
|
|
13
|
-
"language": [
|
|
26
|
+
"language": [
|
|
27
|
+
"en",
|
|
28
|
+
"zh"
|
|
29
|
+
],
|
|
14
30
|
"max_tokens": 512,
|
|
15
|
-
"
|
|
16
|
-
|
|
31
|
+
"model_src": {
|
|
32
|
+
"huggingface": {
|
|
33
|
+
"model_id": "BAAI/bge-reranker-base",
|
|
34
|
+
"model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
|
|
35
|
+
},
|
|
36
|
+
"modelscope": {
|
|
37
|
+
"model_id": "Xorbits/bge-reranker-base",
|
|
38
|
+
"model_revision": "v0.0.1"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
17
41
|
},
|
|
18
42
|
{
|
|
43
|
+
"version": 2,
|
|
19
44
|
"model_name": "bce-reranker-base_v1",
|
|
20
45
|
"type": "normal",
|
|
21
|
-
"language": [
|
|
46
|
+
"language": [
|
|
47
|
+
"en",
|
|
48
|
+
"zh"
|
|
49
|
+
],
|
|
22
50
|
"max_tokens": 512,
|
|
23
|
-
"
|
|
24
|
-
|
|
51
|
+
"model_src": {
|
|
52
|
+
"huggingface": {
|
|
53
|
+
"model_id": "maidalun1020/bce-reranker-base_v1",
|
|
54
|
+
"model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
|
|
55
|
+
},
|
|
56
|
+
"modelscope": {
|
|
57
|
+
"model_id": "maidalun/bce-reranker-base_v1",
|
|
58
|
+
"model_revision": "v0.0.1"
|
|
59
|
+
}
|
|
60
|
+
}
|
|
25
61
|
},
|
|
26
62
|
{
|
|
63
|
+
"version": 2,
|
|
27
64
|
"model_name": "bge-reranker-v2-m3",
|
|
28
65
|
"type": "normal",
|
|
29
|
-
"language": [
|
|
66
|
+
"language": [
|
|
67
|
+
"en",
|
|
68
|
+
"zh",
|
|
69
|
+
"multilingual"
|
|
70
|
+
],
|
|
30
71
|
"max_tokens": 8192,
|
|
31
|
-
"
|
|
32
|
-
|
|
72
|
+
"model_src": {
|
|
73
|
+
"huggingface": {
|
|
74
|
+
"model_id": "BAAI/bge-reranker-v2-m3",
|
|
75
|
+
"model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4"
|
|
76
|
+
},
|
|
77
|
+
"modelscope": {
|
|
78
|
+
"model_id": "AI-ModelScope/bge-reranker-v2-m3"
|
|
79
|
+
}
|
|
80
|
+
}
|
|
33
81
|
},
|
|
34
82
|
{
|
|
83
|
+
"version": 2,
|
|
35
84
|
"model_name": "bge-reranker-v2-gemma",
|
|
36
85
|
"type": "LLM-based",
|
|
37
|
-
"language": [
|
|
86
|
+
"language": [
|
|
87
|
+
"en",
|
|
88
|
+
"zh",
|
|
89
|
+
"multilingual"
|
|
90
|
+
],
|
|
38
91
|
"max_tokens": 8192,
|
|
39
|
-
"
|
|
40
|
-
|
|
92
|
+
"model_src": {
|
|
93
|
+
"huggingface": {
|
|
94
|
+
"model_id": "BAAI/bge-reranker-v2-gemma",
|
|
95
|
+
"model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17"
|
|
96
|
+
},
|
|
97
|
+
"modelscope": {
|
|
98
|
+
"model_id": "AI-ModelScope/bge-reranker-v2-gemma"
|
|
99
|
+
}
|
|
100
|
+
}
|
|
41
101
|
},
|
|
42
102
|
{
|
|
103
|
+
"version": 2,
|
|
43
104
|
"model_name": "bge-reranker-v2-minicpm-layerwise",
|
|
44
105
|
"type": "LLM-based layerwise",
|
|
45
|
-
"language": [
|
|
106
|
+
"language": [
|
|
107
|
+
"en",
|
|
108
|
+
"zh",
|
|
109
|
+
"multilingual"
|
|
110
|
+
],
|
|
46
111
|
"max_tokens": 2048,
|
|
47
|
-
"
|
|
48
|
-
|
|
112
|
+
"model_src": {
|
|
113
|
+
"huggingface": {
|
|
114
|
+
"model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
|
|
115
|
+
"model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881"
|
|
116
|
+
},
|
|
117
|
+
"modelscope": {
|
|
118
|
+
"model_id": "mirror013/bge-reranker-v2-minicpm-layerwise"
|
|
119
|
+
}
|
|
120
|
+
}
|
|
49
121
|
},
|
|
50
122
|
{
|
|
123
|
+
"version": 2,
|
|
51
124
|
"model_name": "jina-reranker-v2",
|
|
52
125
|
"type": "normal",
|
|
53
|
-
"language": [
|
|
126
|
+
"language": [
|
|
127
|
+
"en",
|
|
128
|
+
"zh",
|
|
129
|
+
"multilingual"
|
|
130
|
+
],
|
|
54
131
|
"max_tokens": 1024,
|
|
55
|
-
"
|
|
56
|
-
|
|
132
|
+
"model_src": {
|
|
133
|
+
"huggingface": {
|
|
134
|
+
"model_id": "jinaai/jina-reranker-v2-base-multilingual",
|
|
135
|
+
"model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
|
|
136
|
+
}
|
|
137
|
+
}
|
|
57
138
|
},
|
|
58
139
|
{
|
|
140
|
+
"version": 2,
|
|
59
141
|
"model_name": "minicpm-reranker",
|
|
60
142
|
"type": "normal",
|
|
61
|
-
"language": [
|
|
143
|
+
"language": [
|
|
144
|
+
"en",
|
|
145
|
+
"zh"
|
|
146
|
+
],
|
|
62
147
|
"max_tokens": 1024,
|
|
63
|
-
"
|
|
64
|
-
|
|
148
|
+
"model_src": {
|
|
149
|
+
"huggingface": {
|
|
150
|
+
"model_id": "openbmb/MiniCPM-Reranker",
|
|
151
|
+
"model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
|
|
152
|
+
},
|
|
153
|
+
"modelscope": {
|
|
154
|
+
"model_id": "OpenBMB/MiniCPM-Reranker"
|
|
155
|
+
}
|
|
156
|
+
}
|
|
65
157
|
},
|
|
66
158
|
{
|
|
159
|
+
"version": 2,
|
|
67
160
|
"model_name": "Qwen3-Reranker-0.6B",
|
|
68
161
|
"type": "normal",
|
|
69
|
-
"language": [
|
|
162
|
+
"language": [
|
|
163
|
+
"en",
|
|
164
|
+
"zh"
|
|
165
|
+
],
|
|
70
166
|
"max_tokens": 32768,
|
|
71
|
-
"
|
|
72
|
-
|
|
167
|
+
"model_src": {
|
|
168
|
+
"huggingface": {
|
|
169
|
+
"model_id": "Qwen/Qwen3-Reranker-0.6B",
|
|
170
|
+
"model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3"
|
|
171
|
+
},
|
|
172
|
+
"modelscope": {
|
|
173
|
+
"model_id": "Qwen/Qwen3-Reranker-0.6B"
|
|
174
|
+
}
|
|
175
|
+
}
|
|
73
176
|
},
|
|
74
177
|
{
|
|
178
|
+
"version": 2,
|
|
75
179
|
"model_name": "Qwen3-Reranker-4B",
|
|
76
180
|
"type": "normal",
|
|
77
|
-
"language": [
|
|
181
|
+
"language": [
|
|
182
|
+
"en",
|
|
183
|
+
"zh"
|
|
184
|
+
],
|
|
78
185
|
"max_tokens": 32768,
|
|
79
|
-
"
|
|
80
|
-
|
|
186
|
+
"model_src": {
|
|
187
|
+
"huggingface": {
|
|
188
|
+
"model_id": "Qwen/Qwen3-Reranker-4B",
|
|
189
|
+
"model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5"
|
|
190
|
+
},
|
|
191
|
+
"modelscope": {
|
|
192
|
+
"model_id": "Qwen/Qwen3-Reranker-4B"
|
|
193
|
+
}
|
|
194
|
+
}
|
|
81
195
|
},
|
|
82
196
|
{
|
|
197
|
+
"version": 2,
|
|
83
198
|
"model_name": "Qwen3-Reranker-8B",
|
|
84
199
|
"type": "normal",
|
|
85
|
-
"language": [
|
|
200
|
+
"language": [
|
|
201
|
+
"en",
|
|
202
|
+
"zh"
|
|
203
|
+
],
|
|
86
204
|
"max_tokens": 32768,
|
|
87
|
-
"
|
|
88
|
-
|
|
205
|
+
"model_src": {
|
|
206
|
+
"huggingface": {
|
|
207
|
+
"model_id": "Qwen/Qwen3-Reranker-8B",
|
|
208
|
+
"model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db"
|
|
209
|
+
},
|
|
210
|
+
"modelscope": {
|
|
211
|
+
"model_id": "Qwen/Qwen3-Reranker-8B"
|
|
212
|
+
}
|
|
213
|
+
}
|
|
89
214
|
}
|
|
90
215
|
]
|
xinference/model/rerank/utils.py
CHANGED
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
from typing import TYPE_CHECKING, Any
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
|
-
from .core import
|
|
17
|
+
from .core import RerankModelFamilyV2
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def get_model_version(rerank_model: "
|
|
20
|
+
def get_model_version(rerank_model: "RerankModelFamilyV2") -> str:
|
|
21
21
|
return rerank_model.model_name
|
|
22
22
|
|
|
23
23
|
|
xinference/model/utils.py
CHANGED
|
@@ -18,10 +18,22 @@ import logging
|
|
|
18
18
|
import os
|
|
19
19
|
import random
|
|
20
20
|
import threading
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
21
22
|
from copy import deepcopy
|
|
22
23
|
from json import JSONDecodeError
|
|
23
24
|
from pathlib import Path
|
|
24
|
-
from typing import
|
|
25
|
+
from typing import (
|
|
26
|
+
TYPE_CHECKING,
|
|
27
|
+
Any,
|
|
28
|
+
Callable,
|
|
29
|
+
Dict,
|
|
30
|
+
List,
|
|
31
|
+
Optional,
|
|
32
|
+
Set,
|
|
33
|
+
Tuple,
|
|
34
|
+
Type,
|
|
35
|
+
Union,
|
|
36
|
+
)
|
|
25
37
|
|
|
26
38
|
import huggingface_hub
|
|
27
39
|
import numpy as np
|
|
@@ -36,6 +48,10 @@ from ..constants import (
|
|
|
36
48
|
from ..device_utils import get_available_device, is_device_available
|
|
37
49
|
from .core import CacheableModelSpec
|
|
38
50
|
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from .embedding.core import LlamaCppEmbeddingSpecV1
|
|
53
|
+
from .llm.llm_family import LlamaCppLLMSpecV2
|
|
54
|
+
|
|
39
55
|
logger = logging.getLogger(__name__)
|
|
40
56
|
IS_NEW_HUGGINGFACE_HUB: bool = huggingface_hub.__version__ >= "0.23.0"
|
|
41
57
|
|
|
@@ -262,59 +278,6 @@ def cache_from_uri(model_spec: CacheableModelSpec) -> str:
|
|
|
262
278
|
raise ValueError(f"Unsupported URL scheme: {src_scheme}")
|
|
263
279
|
|
|
264
280
|
|
|
265
|
-
def cache(model_spec: CacheableModelSpec, model_description_type: type):
|
|
266
|
-
if (
|
|
267
|
-
hasattr(model_spec, "model_uri")
|
|
268
|
-
and getattr(model_spec, "model_uri", None) is not None
|
|
269
|
-
):
|
|
270
|
-
logger.info(f"Model caching from URI: {model_spec.model_uri}")
|
|
271
|
-
return cache_from_uri(model_spec=model_spec)
|
|
272
|
-
|
|
273
|
-
cache_dir = os.path.realpath(
|
|
274
|
-
os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
275
|
-
)
|
|
276
|
-
if not os.path.exists(cache_dir):
|
|
277
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
278
|
-
meta_path = os.path.join(cache_dir, "__valid_download")
|
|
279
|
-
if valid_model_revision(meta_path, model_spec.model_revision, model_spec.model_hub):
|
|
280
|
-
return cache_dir
|
|
281
|
-
|
|
282
|
-
from_modelscope: bool = model_spec.model_hub == "modelscope"
|
|
283
|
-
if from_modelscope:
|
|
284
|
-
from modelscope.hub.snapshot_download import snapshot_download as ms_download
|
|
285
|
-
|
|
286
|
-
download_dir = retry_download(
|
|
287
|
-
ms_download,
|
|
288
|
-
model_spec.model_name,
|
|
289
|
-
None,
|
|
290
|
-
model_spec.model_id,
|
|
291
|
-
revision=model_spec.model_revision,
|
|
292
|
-
)
|
|
293
|
-
create_symlink(download_dir, cache_dir)
|
|
294
|
-
else:
|
|
295
|
-
from huggingface_hub import snapshot_download as hf_download
|
|
296
|
-
|
|
297
|
-
use_symlinks = {}
|
|
298
|
-
if not IS_NEW_HUGGINGFACE_HUB:
|
|
299
|
-
use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
|
|
300
|
-
download_dir = retry_download(
|
|
301
|
-
hf_download,
|
|
302
|
-
model_spec.model_name,
|
|
303
|
-
None,
|
|
304
|
-
model_spec.model_id,
|
|
305
|
-
revision=model_spec.model_revision,
|
|
306
|
-
**use_symlinks,
|
|
307
|
-
)
|
|
308
|
-
if IS_NEW_HUGGINGFACE_HUB:
|
|
309
|
-
create_symlink(download_dir, cache_dir)
|
|
310
|
-
with open(meta_path, "w") as f:
|
|
311
|
-
import json
|
|
312
|
-
|
|
313
|
-
desc = model_description_type(None, None, model_spec)
|
|
314
|
-
json.dump(desc.to_dict(), f)
|
|
315
|
-
return cache_dir
|
|
316
|
-
|
|
317
|
-
|
|
318
281
|
def select_device(device):
|
|
319
282
|
try:
|
|
320
283
|
import torch # noqa: F401
|
|
@@ -497,3 +460,101 @@ def get_engine_params_by_name(
|
|
|
497
460
|
f"Cannot support model_engine for {model_type}, "
|
|
498
461
|
f"only available for LLM, embedding"
|
|
499
462
|
)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def generate_model_file_names_with_quantization_parts(
|
|
466
|
+
model_spec: Union["LlamaCppLLMSpecV2", "LlamaCppEmbeddingSpecV1"],
|
|
467
|
+
multimodal_projector: Optional[str] = None,
|
|
468
|
+
) -> Tuple[List[str], str, bool]:
|
|
469
|
+
file_names = []
|
|
470
|
+
final_file_name = model_spec.model_file_name_template.format(
|
|
471
|
+
quantization=model_spec.quantization
|
|
472
|
+
)
|
|
473
|
+
need_merge = False
|
|
474
|
+
|
|
475
|
+
if (
|
|
476
|
+
model_spec.quantization_parts is None
|
|
477
|
+
or model_spec.quantization not in model_spec.quantization_parts
|
|
478
|
+
):
|
|
479
|
+
file_names.append(final_file_name)
|
|
480
|
+
elif (
|
|
481
|
+
model_spec.quantization is not None
|
|
482
|
+
and model_spec.quantization in model_spec.quantization_parts
|
|
483
|
+
):
|
|
484
|
+
parts = model_spec.quantization_parts[model_spec.quantization]
|
|
485
|
+
need_merge = True
|
|
486
|
+
|
|
487
|
+
logger.info(
|
|
488
|
+
f"Model {model_spec.model_id} {model_spec.model_format} {model_spec.quantization} has {len(parts)} parts."
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
if model_spec.model_file_name_split_template is None:
|
|
492
|
+
raise ValueError(
|
|
493
|
+
f"No model_file_name_split_template for model spec {model_spec.model_id}"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
for part in parts:
|
|
497
|
+
file_name = model_spec.model_file_name_split_template.format(
|
|
498
|
+
quantization=model_spec.quantization, part=part
|
|
499
|
+
)
|
|
500
|
+
file_names.append(file_name)
|
|
501
|
+
if multimodal_projector:
|
|
502
|
+
file_names.append(multimodal_projector)
|
|
503
|
+
|
|
504
|
+
return file_names, final_file_name, need_merge
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def merge_cached_files(
|
|
508
|
+
cache_dir: str, input_file_names: List[str], output_file_name: str
|
|
509
|
+
):
|
|
510
|
+
# now llama.cpp can find the gguf parts automatically
|
|
511
|
+
# we only need to provide the first part
|
|
512
|
+
# thus we create the symlink to the first part
|
|
513
|
+
symlink_local_file(
|
|
514
|
+
os.path.join(cache_dir, input_file_names[0]), cache_dir, output_file_name
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
logger.info(f"Merge complete.")
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def flatten_model_src(input_json: dict):
|
|
521
|
+
flattened = []
|
|
522
|
+
base_info = {key: value for key, value in input_json.items() if key != "model_src"}
|
|
523
|
+
for model_hub, hub_info in input_json["model_src"].items():
|
|
524
|
+
record = base_info.copy()
|
|
525
|
+
hub_info.pop("model_hub", None)
|
|
526
|
+
record.update(hub_info)
|
|
527
|
+
record["model_hub"] = model_hub
|
|
528
|
+
flattened.append(record)
|
|
529
|
+
return flattened
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def flatten_quantizations(input_json: dict):
|
|
533
|
+
flattened = []
|
|
534
|
+
|
|
535
|
+
base_info = {key: value for key, value in input_json.items() if key != "model_src"}
|
|
536
|
+
|
|
537
|
+
for model_hub, hub_info in input_json["model_src"].items():
|
|
538
|
+
quantizations = hub_info["quantizations"]
|
|
539
|
+
|
|
540
|
+
for quant in quantizations:
|
|
541
|
+
record = base_info.copy()
|
|
542
|
+
record["model_hub"] = model_hub
|
|
543
|
+
record["quantization"] = quant
|
|
544
|
+
|
|
545
|
+
for key, value in hub_info.items():
|
|
546
|
+
if key != "quantizations":
|
|
547
|
+
record[key] = value
|
|
548
|
+
|
|
549
|
+
flattened.append(record)
|
|
550
|
+
return flattened
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class ModelInstanceInfoMixin(ABC):
|
|
554
|
+
@abstractmethod
|
|
555
|
+
def to_description(self):
|
|
556
|
+
""""""
|
|
557
|
+
|
|
558
|
+
@abstractmethod
|
|
559
|
+
def to_version_info(self):
|
|
560
|
+
""""""
|
|
@@ -15,40 +15,36 @@
|
|
|
15
15
|
import codecs
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
|
-
from itertools import chain
|
|
19
18
|
|
|
19
|
+
from ..utils import flatten_model_src
|
|
20
20
|
from .core import (
|
|
21
21
|
BUILTIN_VIDEO_MODELS,
|
|
22
|
-
MODEL_NAME_TO_REVISION,
|
|
23
|
-
MODELSCOPE_VIDEO_MODELS,
|
|
24
22
|
VIDEO_MODEL_DESCRIPTIONS,
|
|
25
|
-
|
|
23
|
+
VideoModelFamilyV2,
|
|
26
24
|
generate_video_description,
|
|
27
|
-
get_cache_status,
|
|
28
25
|
get_video_model_descriptions,
|
|
29
26
|
)
|
|
30
27
|
|
|
31
28
|
|
|
32
29
|
def _install():
|
|
33
30
|
load_model_family_from_json("model_spec.json", BUILTIN_VIDEO_MODELS)
|
|
34
|
-
load_model_family_from_json("model_spec_modelscope.json", MODELSCOPE_VIDEO_MODELS)
|
|
35
31
|
|
|
36
32
|
# register model description
|
|
37
|
-
for model_name,
|
|
38
|
-
|
|
39
|
-
):
|
|
33
|
+
for model_name, model_specs in BUILTIN_VIDEO_MODELS.items():
|
|
34
|
+
model_spec = [x for x in model_specs if x.model_hub == "huggingface"][0]
|
|
40
35
|
VIDEO_MODEL_DESCRIPTIONS.update(generate_video_description(model_spec))
|
|
41
36
|
|
|
42
37
|
|
|
43
38
|
def load_model_family_from_json(json_filename, target_families):
|
|
44
39
|
json_path = os.path.join(os.path.dirname(__file__), json_filename)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
40
|
+
flattened_model_specs = []
|
|
41
|
+
for spec in json.load(codecs.open(json_path, "r", encoding="utf-8")):
|
|
42
|
+
flattened_model_specs.extend(flatten_model_src(spec))
|
|
43
|
+
|
|
44
|
+
for spec in flattened_model_specs:
|
|
45
|
+
if spec["model_name"] not in target_families:
|
|
46
|
+
target_families[spec["model_name"]] = [VideoModelFamilyV2(**spec)]
|
|
47
|
+
else:
|
|
48
|
+
target_families[spec["model_name"]].append(VideoModelFamilyV2(**spec))
|
|
53
49
|
|
|
54
50
|
del json_path
|