xinference 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +132 -0
- xinference/api/restful_api.py +282 -78
- xinference/client/handlers.py +3 -0
- xinference/client/restful/restful_client.py +108 -75
- xinference/constants.py +14 -4
- xinference/core/cache_tracker.py +102 -0
- xinference/core/chat_interface.py +10 -4
- xinference/core/event.py +56 -0
- xinference/core/model.py +44 -0
- xinference/core/resource.py +19 -12
- xinference/core/status_guard.py +4 -0
- xinference/core/supervisor.py +278 -87
- xinference/core/utils.py +68 -3
- xinference/core/worker.py +98 -8
- xinference/deploy/cmdline.py +6 -3
- xinference/deploy/local.py +2 -2
- xinference/deploy/supervisor.py +2 -2
- xinference/model/audio/__init__.py +27 -0
- xinference/model/audio/core.py +161 -0
- xinference/model/audio/model_spec.json +79 -0
- xinference/model/audio/utils.py +18 -0
- xinference/model/audio/whisper.py +132 -0
- xinference/model/core.py +18 -13
- xinference/model/embedding/__init__.py +27 -2
- xinference/model/embedding/core.py +43 -3
- xinference/model/embedding/model_spec.json +24 -0
- xinference/model/embedding/model_spec_modelscope.json +24 -0
- xinference/model/embedding/utils.py +18 -0
- xinference/model/image/__init__.py +12 -1
- xinference/model/image/core.py +63 -9
- xinference/model/image/utils.py +26 -0
- xinference/model/llm/__init__.py +20 -1
- xinference/model/llm/core.py +43 -2
- xinference/model/llm/ggml/chatglm.py +15 -6
- xinference/model/llm/llm_family.json +197 -6
- xinference/model/llm/llm_family.py +9 -7
- xinference/model/llm/llm_family_modelscope.json +189 -4
- xinference/model/llm/pytorch/chatglm.py +3 -3
- xinference/model/llm/pytorch/core.py +4 -2
- xinference/model/{multimodal → llm/pytorch}/qwen_vl.py +10 -8
- xinference/model/llm/pytorch/utils.py +21 -9
- xinference/model/llm/pytorch/yi_vl.py +246 -0
- xinference/model/llm/utils.py +57 -4
- xinference/model/llm/vllm/core.py +5 -4
- xinference/model/rerank/__init__.py +25 -2
- xinference/model/rerank/core.py +51 -9
- xinference/model/rerank/model_spec.json +6 -0
- xinference/model/rerank/model_spec_modelscope.json +7 -0
- xinference/{api/oauth2/common.py → model/rerank/utils.py} +6 -2
- xinference/model/utils.py +5 -3
- xinference/thirdparty/__init__.py +0 -0
- xinference/thirdparty/llava/__init__.py +1 -0
- xinference/thirdparty/llava/conversation.py +205 -0
- xinference/thirdparty/llava/mm_utils.py +122 -0
- xinference/thirdparty/llava/model/__init__.py +1 -0
- xinference/thirdparty/llava/model/clip_encoder/__init__.py +0 -0
- xinference/thirdparty/llava/model/clip_encoder/builder.py +11 -0
- xinference/thirdparty/llava/model/clip_encoder/clip_encoder.py +86 -0
- xinference/thirdparty/llava/model/constants.py +6 -0
- xinference/thirdparty/llava/model/llava_arch.py +385 -0
- xinference/thirdparty/llava/model/llava_llama.py +163 -0
- xinference/thirdparty/llava/model/multimodal_projector/__init__.py +0 -0
- xinference/thirdparty/llava/model/multimodal_projector/builder.py +64 -0
- xinference/types.py +1 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.15822aeb.js +3 -0
- xinference/web/ui/build/static/js/main.15822aeb.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/139e5e4adf436923107d2b02994c7ff6dba2aac1989e9b6638984f0dfe782c4a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/64accc515dc6cd584a2873796cd7da6f93de57f7e465eb5423cca9a2f3fe3eff.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +1 -0
- {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/METADATA +33 -23
- {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/RECORD +81 -64
- xinference/api/oauth2/core.py +0 -93
- xinference/model/multimodal/__init__.py +0 -52
- xinference/model/multimodal/core.py +0 -467
- xinference/model/multimodal/model_spec.json +0 -43
- xinference/model/multimodal/model_spec_modelscope.json +0 -45
- xinference/web/ui/build/static/js/main.b83095c2.js +0 -3
- xinference/web/ui/build/static/js/main.b83095c2.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +0 -1
- /xinference/web/ui/build/static/js/{main.b83095c2.js.LICENSE.txt → main.15822aeb.js.LICENSE.txt} +0 -0
- {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/LICENSE +0 -0
- {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/WHEEL +0 -0
- {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
from typing import TYPE_CHECKING, Dict, Optional
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from .core import AudioModelFamilyV1
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class WhisperModel:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
model_uid: str,
|
|
27
|
+
model_path: str,
|
|
28
|
+
model_spec: "AudioModelFamilyV1",
|
|
29
|
+
device: Optional[str] = None,
|
|
30
|
+
**kwargs,
|
|
31
|
+
):
|
|
32
|
+
self._model_uid = model_uid
|
|
33
|
+
self._model_path = model_path
|
|
34
|
+
self._model_spec = model_spec
|
|
35
|
+
self._device = device
|
|
36
|
+
self._model = None
|
|
37
|
+
self._kwargs = kwargs
|
|
38
|
+
|
|
39
|
+
def load(self):
|
|
40
|
+
import torch
|
|
41
|
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
42
|
+
|
|
43
|
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
44
|
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
|
45
|
+
|
|
46
|
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
47
|
+
self._model_path,
|
|
48
|
+
torch_dtype=torch_dtype,
|
|
49
|
+
low_cpu_mem_usage=True,
|
|
50
|
+
use_safetensors=True,
|
|
51
|
+
)
|
|
52
|
+
model.to(device)
|
|
53
|
+
|
|
54
|
+
processor = AutoProcessor.from_pretrained(self._model_path)
|
|
55
|
+
|
|
56
|
+
self._model = pipeline(
|
|
57
|
+
"automatic-speech-recognition",
|
|
58
|
+
model=model,
|
|
59
|
+
tokenizer=processor.tokenizer,
|
|
60
|
+
feature_extractor=processor.feature_extractor,
|
|
61
|
+
max_new_tokens=128,
|
|
62
|
+
chunk_length_s=30,
|
|
63
|
+
batch_size=16,
|
|
64
|
+
return_timestamps=False,
|
|
65
|
+
torch_dtype=torch_dtype,
|
|
66
|
+
device=device,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def _call_model(
|
|
70
|
+
self,
|
|
71
|
+
audio: bytes,
|
|
72
|
+
generate_kwargs: Dict,
|
|
73
|
+
response_format: str,
|
|
74
|
+
):
|
|
75
|
+
if response_format == "json":
|
|
76
|
+
logger.debug("Call whisper model with generate_kwargs: %s", generate_kwargs)
|
|
77
|
+
assert callable(self._model)
|
|
78
|
+
result = self._model(audio, generate_kwargs=generate_kwargs)
|
|
79
|
+
return {"text": result["text"]}
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError(f"Unsupported response format: {response_format}")
|
|
82
|
+
|
|
83
|
+
def transcriptions(
|
|
84
|
+
self,
|
|
85
|
+
audio: bytes,
|
|
86
|
+
language: Optional[str] = None,
|
|
87
|
+
prompt: Optional[str] = None,
|
|
88
|
+
response_format: str = "json",
|
|
89
|
+
temperature: float = 0,
|
|
90
|
+
):
|
|
91
|
+
if temperature != 0:
|
|
92
|
+
logger.warning(
|
|
93
|
+
"Temperature for whisper transcriptions will be ignored: %s.",
|
|
94
|
+
temperature,
|
|
95
|
+
)
|
|
96
|
+
if prompt is not None:
|
|
97
|
+
logger.warning(
|
|
98
|
+
"Prompt for whisper transcriptions will be ignored: %s", prompt
|
|
99
|
+
)
|
|
100
|
+
return self._call_model(
|
|
101
|
+
audio=audio,
|
|
102
|
+
generate_kwargs={"language": language, "task": "transcribe"}
|
|
103
|
+
if language is not None
|
|
104
|
+
else {"task": "transcribe"},
|
|
105
|
+
response_format=response_format,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def translations(
|
|
109
|
+
self,
|
|
110
|
+
audio: bytes,
|
|
111
|
+
prompt: Optional[str] = None,
|
|
112
|
+
response_format: str = "json",
|
|
113
|
+
temperature: float = 0,
|
|
114
|
+
):
|
|
115
|
+
if not self._model_spec.multilingual:
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
f"Model {self._model_spec.model_name} is not suitable for translations."
|
|
118
|
+
)
|
|
119
|
+
if temperature != 0:
|
|
120
|
+
logger.warning(
|
|
121
|
+
"Temperature for whisper transcriptions will be ignored: %s.",
|
|
122
|
+
temperature,
|
|
123
|
+
)
|
|
124
|
+
if prompt is not None:
|
|
125
|
+
logger.warning(
|
|
126
|
+
"Prompt for whisper transcriptions will be ignored: %s", prompt
|
|
127
|
+
)
|
|
128
|
+
return self._call_model(
|
|
129
|
+
audio=audio,
|
|
130
|
+
generate_kwargs={"task": "translate"},
|
|
131
|
+
response_format=response_format,
|
|
132
|
+
)
|
xinference/model/core.py
CHANGED
|
@@ -12,14 +12,20 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from abc import ABC
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
16
|
from typing import Any, List, Optional, Tuple
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class ModelDescription(ABC):
|
|
20
|
-
def __init__(
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
address: Optional[str],
|
|
23
|
+
devices: Optional[List[str]],
|
|
24
|
+
model_path: Optional[str] = None,
|
|
25
|
+
):
|
|
21
26
|
self.address = address
|
|
22
27
|
self.devices = devices
|
|
28
|
+
self._model_path = model_path
|
|
23
29
|
|
|
24
30
|
def to_dict(self):
|
|
25
31
|
"""
|
|
@@ -28,6 +34,12 @@ class ModelDescription(ABC):
|
|
|
28
34
|
"""
|
|
29
35
|
raise NotImplementedError
|
|
30
36
|
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def to_version_info(self):
|
|
39
|
+
"""
|
|
40
|
+
Return a dict to describe version info about a model instance
|
|
41
|
+
"""
|
|
42
|
+
|
|
31
43
|
|
|
32
44
|
def create_model_instance(
|
|
33
45
|
subpool_addr: str,
|
|
@@ -41,10 +53,10 @@ def create_model_instance(
|
|
|
41
53
|
is_local_deployment: bool = False,
|
|
42
54
|
**kwargs,
|
|
43
55
|
) -> Tuple[Any, ModelDescription]:
|
|
56
|
+
from .audio.core import create_audio_model_instance
|
|
44
57
|
from .embedding.core import create_embedding_model_instance
|
|
45
58
|
from .image.core import create_image_model_instance
|
|
46
59
|
from .llm.core import create_llm_model_instance
|
|
47
|
-
from .multimodal.core import create_multimodal_model_instance
|
|
48
60
|
from .rerank.core import create_rerank_model_instance
|
|
49
61
|
|
|
50
62
|
if model_type == "LLM":
|
|
@@ -75,17 +87,10 @@ def create_model_instance(
|
|
|
75
87
|
return create_rerank_model_instance(
|
|
76
88
|
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
77
89
|
)
|
|
78
|
-
elif model_type == "
|
|
90
|
+
elif model_type == "audio":
|
|
79
91
|
kwargs.pop("trust_remote_code", None)
|
|
80
|
-
return
|
|
81
|
-
subpool_addr,
|
|
82
|
-
devices,
|
|
83
|
-
model_uid,
|
|
84
|
-
model_name,
|
|
85
|
-
model_format,
|
|
86
|
-
model_size_in_billions,
|
|
87
|
-
quantization,
|
|
88
|
-
**kwargs,
|
|
92
|
+
return create_audio_model_instance(
|
|
93
|
+
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
89
94
|
)
|
|
90
95
|
else:
|
|
91
96
|
raise ValueError(f"Unsupported model type: {model_type}.")
|
|
@@ -16,8 +16,20 @@ import codecs
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
|
|
19
|
-
from .core import
|
|
20
|
-
|
|
19
|
+
from .core import (
|
|
20
|
+
EMBEDDING_MODEL_DESCRIPTIONS,
|
|
21
|
+
MODEL_NAME_TO_REVISION,
|
|
22
|
+
EmbeddingModelSpec,
|
|
23
|
+
generate_embedding_description,
|
|
24
|
+
get_cache_status,
|
|
25
|
+
get_embedding_model_descriptions,
|
|
26
|
+
)
|
|
27
|
+
from .custom import (
|
|
28
|
+
CustomEmbeddingModelSpec,
|
|
29
|
+
get_user_defined_embeddings,
|
|
30
|
+
register_embedding,
|
|
31
|
+
unregister_embedding,
|
|
32
|
+
)
|
|
21
33
|
|
|
22
34
|
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
|
|
23
35
|
_model_spec_modelscope_json = os.path.join(
|
|
@@ -29,6 +41,7 @@ BUILTIN_EMBEDDING_MODELS = dict(
|
|
|
29
41
|
)
|
|
30
42
|
for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
|
|
31
43
|
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
44
|
+
|
|
32
45
|
MODELSCOPE_EMBEDDING_MODELS = dict(
|
|
33
46
|
(spec["model_name"], EmbeddingModelSpec(**spec))
|
|
34
47
|
for spec in json.load(
|
|
@@ -38,6 +51,14 @@ MODELSCOPE_EMBEDDING_MODELS = dict(
|
|
|
38
51
|
for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
|
|
39
52
|
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
40
53
|
|
|
54
|
+
# register model description after recording model revision
|
|
55
|
+
for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
|
|
56
|
+
for model_name, model_spec in model_spec_info.items():
|
|
57
|
+
if model_spec.model_name not in EMBEDDING_MODEL_DESCRIPTIONS:
|
|
58
|
+
EMBEDDING_MODEL_DESCRIPTIONS.update(
|
|
59
|
+
generate_embedding_description(model_spec)
|
|
60
|
+
)
|
|
61
|
+
|
|
41
62
|
from ...constants import XINFERENCE_MODEL_DIR
|
|
42
63
|
|
|
43
64
|
user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "embedding")
|
|
@@ -47,5 +68,9 @@ if os.path.isdir(user_defined_llm_dir):
|
|
|
47
68
|
user_defined_llm_family = CustomEmbeddingModelSpec.parse_obj(json.load(fd))
|
|
48
69
|
register_embedding(user_defined_llm_family, persist=False)
|
|
49
70
|
|
|
71
|
+
# register model description
|
|
72
|
+
for ud_embedding in get_user_defined_embeddings():
|
|
73
|
+
EMBEDDING_MODEL_DESCRIPTIONS.update(generate_embedding_description(ud_embedding))
|
|
74
|
+
|
|
50
75
|
del _model_spec_json
|
|
51
76
|
del _model_spec_modelscope_json
|
|
@@ -24,7 +24,7 @@ from pydantic import BaseModel
|
|
|
24
24
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
25
25
|
from ...types import Embedding, EmbeddingData, EmbeddingUsage
|
|
26
26
|
from ..core import ModelDescription
|
|
27
|
-
from ..utils import is_model_cached, valid_model_revision
|
|
27
|
+
from ..utils import get_cache_dir, is_model_cached, valid_model_revision
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger(__name__)
|
|
30
30
|
|
|
@@ -34,6 +34,15 @@ SUPPORTED_SCHEMES = ["s3"]
|
|
|
34
34
|
MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
EMBEDDING_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_embedding_model_descriptions():
|
|
41
|
+
import copy
|
|
42
|
+
|
|
43
|
+
return copy.deepcopy(EMBEDDING_MODEL_DESCRIPTIONS)
|
|
44
|
+
|
|
45
|
+
|
|
37
46
|
class EmbeddingModelSpec(BaseModel):
|
|
38
47
|
model_name: str
|
|
39
48
|
dimensions: int
|
|
@@ -50,8 +59,9 @@ class EmbeddingModelDescription(ModelDescription):
|
|
|
50
59
|
address: Optional[str],
|
|
51
60
|
devices: Optional[List[str]],
|
|
52
61
|
model_spec: EmbeddingModelSpec,
|
|
62
|
+
model_path: Optional[str] = None,
|
|
53
63
|
):
|
|
54
|
-
super().__init__(address, devices)
|
|
64
|
+
super().__init__(address, devices, model_path=model_path)
|
|
55
65
|
self._model_spec = model_spec
|
|
56
66
|
|
|
57
67
|
def to_dict(self):
|
|
@@ -66,6 +76,34 @@ class EmbeddingModelDescription(ModelDescription):
|
|
|
66
76
|
"model_revision": self._model_spec.model_revision,
|
|
67
77
|
}
|
|
68
78
|
|
|
79
|
+
def to_version_info(self):
|
|
80
|
+
from .utils import get_model_version
|
|
81
|
+
|
|
82
|
+
if self._model_path is None:
|
|
83
|
+
is_cached = get_cache_status(self._model_spec)
|
|
84
|
+
file_location = get_cache_dir(self._model_spec)
|
|
85
|
+
else:
|
|
86
|
+
is_cached = True
|
|
87
|
+
file_location = self._model_path
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"model_version": get_model_version(self._model_spec),
|
|
91
|
+
"model_file_location": file_location,
|
|
92
|
+
"cache_status": is_cached,
|
|
93
|
+
"dimensions": self._model_spec.dimensions,
|
|
94
|
+
"max_tokens": self._model_spec.max_tokens,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def generate_embedding_description(
|
|
99
|
+
model_spec: EmbeddingModelSpec,
|
|
100
|
+
) -> Dict[str, List[Dict]]:
|
|
101
|
+
res = defaultdict(list)
|
|
102
|
+
res[model_spec.model_name].append(
|
|
103
|
+
EmbeddingModelDescription(None, None, model_spec).to_version_info()
|
|
104
|
+
)
|
|
105
|
+
return res
|
|
106
|
+
|
|
69
107
|
|
|
70
108
|
def cache_from_uri(
|
|
71
109
|
model_spec: EmbeddingModelSpec,
|
|
@@ -421,5 +459,7 @@ def create_embedding_model_instance(
|
|
|
421
459
|
model_spec = match_embedding(model_name)
|
|
422
460
|
model_path = cache(model_spec)
|
|
423
461
|
model = EmbeddingModel(model_uid, model_path, **kwargs)
|
|
424
|
-
model_description = EmbeddingModelDescription(
|
|
462
|
+
model_description = EmbeddingModelDescription(
|
|
463
|
+
subpool_addr, devices, model_spec, model_path=model_path
|
|
464
|
+
)
|
|
425
465
|
return model, model_description
|
|
@@ -143,6 +143,14 @@
|
|
|
143
143
|
"model_id": "jinaai/jina-embeddings-v2-base-en",
|
|
144
144
|
"model_revision": "7302ac470bed880590f9344bfeee32ff8722d0e5"
|
|
145
145
|
},
|
|
146
|
+
{
|
|
147
|
+
"model_name": "jina-embeddings-v2-base-zh",
|
|
148
|
+
"dimensions": 768,
|
|
149
|
+
"max_tokens": 8192,
|
|
150
|
+
"language": ["zh", "en"],
|
|
151
|
+
"model_id": "jinaai/jina-embeddings-v2-base-zh",
|
|
152
|
+
"model_revision": "67974cbef5cf50562eadd745de8afc661c52c96f"
|
|
153
|
+
},
|
|
146
154
|
{
|
|
147
155
|
"model_name": "text2vec-large-chinese",
|
|
148
156
|
"dimensions": 1024,
|
|
@@ -182,5 +190,21 @@
|
|
|
182
190
|
"language": ["zh"],
|
|
183
191
|
"model_id": "shibing624/text2vec-base-multilingual",
|
|
184
192
|
"model_revision": "f241877385fa56ebcc75f04d1850e1579cfa661d"
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"model_name": "bge-m3",
|
|
196
|
+
"dimensions": 1024,
|
|
197
|
+
"max_tokens": 8192,
|
|
198
|
+
"language": ["zh", "en"],
|
|
199
|
+
"model_id": "BAAI/bge-m3",
|
|
200
|
+
"model_revision": "73a15ad29ab604f3bdc31601849a9defe86d563f"
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
"model_name": "bce-embedding-base_v1",
|
|
204
|
+
"dimensions": 768,
|
|
205
|
+
"max_tokens": 512,
|
|
206
|
+
"language": ["zh", "en"],
|
|
207
|
+
"model_id": "maidalun1020/bce-embedding-base_v1",
|
|
208
|
+
"model_revision": "236d9024fc1b4046f03848723f934521a66a9323"
|
|
185
209
|
}
|
|
186
210
|
]
|
|
@@ -161,6 +161,14 @@
|
|
|
161
161
|
"model_revision": "v0.0.1",
|
|
162
162
|
"model_hub": "modelscope"
|
|
163
163
|
},
|
|
164
|
+
{
|
|
165
|
+
"model_name": "jina-embeddings-v2-base-zh",
|
|
166
|
+
"dimensions": 768,
|
|
167
|
+
"max_tokens": 8192,
|
|
168
|
+
"language": ["zh", "en"],
|
|
169
|
+
"model_id": "jinaai/jina-embeddings-v2-base-zh",
|
|
170
|
+
"model_hub": "modelscope"
|
|
171
|
+
},
|
|
164
172
|
{
|
|
165
173
|
"model_name": "text2vec-large-chinese",
|
|
166
174
|
"dimensions": 1024,
|
|
@@ -184,5 +192,21 @@
|
|
|
184
192
|
"language": ["zh"],
|
|
185
193
|
"model_id": "mwei23/text2vec-base-chinese-paraphrase",
|
|
186
194
|
"model_hub": "modelscope"
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"model_name": "bge-m3",
|
|
198
|
+
"dimensions": 1024,
|
|
199
|
+
"max_tokens": 8192,
|
|
200
|
+
"language": ["zh", "en"],
|
|
201
|
+
"model_id": "Xorbits/bge-m3",
|
|
202
|
+
"model_hub": "modelscope"
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
"model_name": "bce-embedding-base_v1",
|
|
206
|
+
"dimensions": 768,
|
|
207
|
+
"max_tokens": 512,
|
|
208
|
+
"language": ["zh", "en"],
|
|
209
|
+
"model_id": "maidalun/bce-embedding-base_v1",
|
|
210
|
+
"model_hub": "modelscope"
|
|
187
211
|
}
|
|
188
212
|
]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from .core import EmbeddingModelSpec
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_model_version(embedding_model: EmbeddingModelSpec) -> str:
|
|
18
|
+
return f"{embedding_model.model_name}--{embedding_model.max_tokens}--{embedding_model.dimensions}"
|
|
@@ -16,11 +16,22 @@ import codecs
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
|
|
19
|
-
from .core import
|
|
19
|
+
from .core import (
|
|
20
|
+
IMAGE_MODEL_DESCRIPTIONS,
|
|
21
|
+
ImageModelFamilyV1,
|
|
22
|
+
generate_image_description,
|
|
23
|
+
get_cache_status,
|
|
24
|
+
get_image_model_descriptions,
|
|
25
|
+
)
|
|
20
26
|
|
|
21
27
|
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
|
|
22
28
|
BUILTIN_IMAGE_MODELS = dict(
|
|
23
29
|
(spec["model_name"], ImageModelFamilyV1(**spec))
|
|
24
30
|
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
25
31
|
)
|
|
32
|
+
|
|
33
|
+
# register model description
|
|
34
|
+
for model_name, model_spec in BUILTIN_IMAGE_MODELS.items():
|
|
35
|
+
IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(model_spec))
|
|
36
|
+
|
|
26
37
|
del _model_spec_json
|
xinference/model/image/core.py
CHANGED
|
@@ -14,7 +14,8 @@
|
|
|
14
14
|
import collections.abc
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
|
-
from
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from typing import Dict, List, Optional, Tuple
|
|
18
19
|
|
|
19
20
|
from pydantic import BaseModel
|
|
20
21
|
|
|
@@ -27,6 +28,14 @@ MAX_ATTEMPTS = 3
|
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
31
|
+
IMAGE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_image_model_descriptions():
|
|
35
|
+
import copy
|
|
36
|
+
|
|
37
|
+
return copy.deepcopy(IMAGE_MODEL_DESCRIPTIONS)
|
|
38
|
+
|
|
30
39
|
|
|
31
40
|
class ImageModelFamilyV1(BaseModel):
|
|
32
41
|
model_family: str
|
|
@@ -42,8 +51,9 @@ class ImageModelDescription(ModelDescription):
|
|
|
42
51
|
address: Optional[str],
|
|
43
52
|
devices: Optional[List[str]],
|
|
44
53
|
model_spec: ImageModelFamilyV1,
|
|
54
|
+
model_path: Optional[str] = None,
|
|
45
55
|
):
|
|
46
|
-
super().__init__(address, devices)
|
|
56
|
+
super().__init__(address, devices, model_path=model_path)
|
|
47
57
|
self._model_spec = model_spec
|
|
48
58
|
|
|
49
59
|
def to_dict(self):
|
|
@@ -57,6 +67,48 @@ class ImageModelDescription(ModelDescription):
|
|
|
57
67
|
"controlnet": self._model_spec.controlnet,
|
|
58
68
|
}
|
|
59
69
|
|
|
70
|
+
def to_version_info(self):
|
|
71
|
+
from .utils import get_model_version
|
|
72
|
+
|
|
73
|
+
if self._model_path is None:
|
|
74
|
+
is_cached = get_cache_status(self._model_spec)
|
|
75
|
+
file_location = get_cache_dir(self._model_spec)
|
|
76
|
+
else:
|
|
77
|
+
is_cached = True
|
|
78
|
+
file_location = self._model_path
|
|
79
|
+
|
|
80
|
+
if self._model_spec.controlnet is None:
|
|
81
|
+
return [
|
|
82
|
+
{
|
|
83
|
+
"model_version": get_model_version(self._model_spec, None),
|
|
84
|
+
"model_file_location": file_location,
|
|
85
|
+
"cache_status": is_cached,
|
|
86
|
+
"controlnet": "zoe-depth",
|
|
87
|
+
}
|
|
88
|
+
]
|
|
89
|
+
else:
|
|
90
|
+
res = []
|
|
91
|
+
for cn in self._model_spec.controlnet:
|
|
92
|
+
res.append(
|
|
93
|
+
{
|
|
94
|
+
"model_version": get_model_version(self._model_spec, cn),
|
|
95
|
+
"model_file_location": file_location,
|
|
96
|
+
"cache_status": is_cached,
|
|
97
|
+
"controlnet": cn.model_name,
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
return res
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def generate_image_description(
|
|
104
|
+
image_model: ImageModelFamilyV1,
|
|
105
|
+
) -> Dict[str, List[Dict]]:
|
|
106
|
+
res = defaultdict(list)
|
|
107
|
+
res[image_model.model_name].extend(
|
|
108
|
+
ImageModelDescription(None, None, image_model).to_version_info()
|
|
109
|
+
)
|
|
110
|
+
return res
|
|
111
|
+
|
|
60
112
|
|
|
61
113
|
def match_diffusion(model_name: str) -> ImageModelFamilyV1:
|
|
62
114
|
from . import BUILTIN_IMAGE_MODELS
|
|
@@ -74,9 +126,7 @@ def cache(model_spec: ImageModelFamilyV1):
|
|
|
74
126
|
# TODO: cache from uri
|
|
75
127
|
import huggingface_hub
|
|
76
128
|
|
|
77
|
-
cache_dir =
|
|
78
|
-
os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
79
|
-
)
|
|
129
|
+
cache_dir = get_cache_dir(model_spec)
|
|
80
130
|
if not os.path.exists(cache_dir):
|
|
81
131
|
os.makedirs(cache_dir, exist_ok=True)
|
|
82
132
|
|
|
@@ -113,12 +163,14 @@ def cache(model_spec: ImageModelFamilyV1):
|
|
|
113
163
|
return cache_dir
|
|
114
164
|
|
|
115
165
|
|
|
166
|
+
def get_cache_dir(model_spec: ImageModelFamilyV1):
|
|
167
|
+
return os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name))
|
|
168
|
+
|
|
169
|
+
|
|
116
170
|
def get_cache_status(
|
|
117
171
|
model_spec: ImageModelFamilyV1,
|
|
118
172
|
) -> bool:
|
|
119
|
-
cache_dir =
|
|
120
|
-
os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
121
|
-
)
|
|
173
|
+
cache_dir = get_cache_dir(model_spec)
|
|
122
174
|
meta_path = os.path.join(cache_dir, "__valid_download")
|
|
123
175
|
return valid_model_revision(meta_path, model_spec.model_revision)
|
|
124
176
|
|
|
@@ -157,5 +209,7 @@ def create_image_model_instance(
|
|
|
157
209
|
kwargs["controlnet"] = controlnet_model_paths
|
|
158
210
|
model_path = cache(model_spec)
|
|
159
211
|
model = DiffusionModel(model_uid, model_path, **kwargs)
|
|
160
|
-
model_description = ImageModelDescription(
|
|
212
|
+
model_description = ImageModelDescription(
|
|
213
|
+
subpool_addr, devices, model_spec, model_path=model_path
|
|
214
|
+
)
|
|
161
215
|
return model, model_description
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from .core import ImageModelFamilyV1
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_model_version(
|
|
20
|
+
image_model: ImageModelFamilyV1, controlnet: Optional[ImageModelFamilyV1]
|
|
21
|
+
) -> str:
|
|
22
|
+
return (
|
|
23
|
+
image_model.model_name
|
|
24
|
+
if controlnet is None
|
|
25
|
+
else f"{image_model.model_name}--{controlnet.model_name}"
|
|
26
|
+
)
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -16,7 +16,13 @@ import codecs
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
|
|
19
|
-
from .core import
|
|
19
|
+
from .core import (
|
|
20
|
+
LLM,
|
|
21
|
+
LLM_MODEL_DESCRIPTIONS,
|
|
22
|
+
LLMDescription,
|
|
23
|
+
generate_llm_description,
|
|
24
|
+
get_llm_model_descriptions,
|
|
25
|
+
)
|
|
20
26
|
from .llm_family import (
|
|
21
27
|
BUILTIN_LLM_FAMILIES,
|
|
22
28
|
BUILTIN_LLM_MODEL_CHAT_FAMILIES,
|
|
@@ -50,7 +56,9 @@ def _install():
|
|
|
50
56
|
from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
|
|
51
57
|
from .pytorch.internlm2 import Internlm2PytorchChatModel
|
|
52
58
|
from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
|
|
59
|
+
from .pytorch.qwen_vl import QwenVLChatModel
|
|
53
60
|
from .pytorch.vicuna import VicunaPytorchChatModel
|
|
61
|
+
from .pytorch.yi_vl import YiVLChatModel
|
|
54
62
|
from .vllm.core import VLLMChatModel, VLLMModel
|
|
55
63
|
|
|
56
64
|
# register llm classes.
|
|
@@ -82,6 +90,8 @@ def _install():
|
|
|
82
90
|
PytorchChatModel,
|
|
83
91
|
FalconPytorchModel,
|
|
84
92
|
Internlm2PytorchChatModel,
|
|
93
|
+
QwenVLChatModel,
|
|
94
|
+
YiVLChatModel,
|
|
85
95
|
PytorchModel,
|
|
86
96
|
]
|
|
87
97
|
)
|
|
@@ -131,6 +141,11 @@ def _install():
|
|
|
131
141
|
if "tool_call" in model_spec.model_ability:
|
|
132
142
|
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
133
143
|
|
|
144
|
+
for llm_specs in [BUILTIN_LLM_FAMILIES, BUILTIN_MODELSCOPE_LLM_FAMILIES]:
|
|
145
|
+
for llm_spec in llm_specs:
|
|
146
|
+
if llm_spec.model_name not in LLM_MODEL_DESCRIPTIONS:
|
|
147
|
+
LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(llm_spec))
|
|
148
|
+
|
|
134
149
|
from ...constants import XINFERENCE_MODEL_DIR
|
|
135
150
|
|
|
136
151
|
user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "llm")
|
|
@@ -141,3 +156,7 @@ def _install():
|
|
|
141
156
|
) as fd:
|
|
142
157
|
user_defined_llm_family = CustomLLMFamilyV1.parse_obj(json.load(fd))
|
|
143
158
|
register_llm(user_defined_llm_family, persist=False)
|
|
159
|
+
|
|
160
|
+
# register model description
|
|
161
|
+
for ud_llm in get_user_defined_llm_families():
|
|
162
|
+
LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(ud_llm))
|