xinference 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (95) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +132 -0
  3. xinference/api/restful_api.py +282 -78
  4. xinference/client/handlers.py +3 -0
  5. xinference/client/restful/restful_client.py +108 -75
  6. xinference/constants.py +14 -4
  7. xinference/core/cache_tracker.py +102 -0
  8. xinference/core/chat_interface.py +10 -4
  9. xinference/core/event.py +56 -0
  10. xinference/core/model.py +44 -0
  11. xinference/core/resource.py +19 -12
  12. xinference/core/status_guard.py +4 -0
  13. xinference/core/supervisor.py +278 -87
  14. xinference/core/utils.py +68 -3
  15. xinference/core/worker.py +98 -8
  16. xinference/deploy/cmdline.py +6 -3
  17. xinference/deploy/local.py +2 -2
  18. xinference/deploy/supervisor.py +2 -2
  19. xinference/model/audio/__init__.py +27 -0
  20. xinference/model/audio/core.py +161 -0
  21. xinference/model/audio/model_spec.json +79 -0
  22. xinference/model/audio/utils.py +18 -0
  23. xinference/model/audio/whisper.py +132 -0
  24. xinference/model/core.py +18 -13
  25. xinference/model/embedding/__init__.py +27 -2
  26. xinference/model/embedding/core.py +43 -3
  27. xinference/model/embedding/model_spec.json +24 -0
  28. xinference/model/embedding/model_spec_modelscope.json +24 -0
  29. xinference/model/embedding/utils.py +18 -0
  30. xinference/model/image/__init__.py +12 -1
  31. xinference/model/image/core.py +63 -9
  32. xinference/model/image/utils.py +26 -0
  33. xinference/model/llm/__init__.py +20 -1
  34. xinference/model/llm/core.py +43 -2
  35. xinference/model/llm/ggml/chatglm.py +15 -6
  36. xinference/model/llm/llm_family.json +197 -6
  37. xinference/model/llm/llm_family.py +9 -7
  38. xinference/model/llm/llm_family_modelscope.json +189 -4
  39. xinference/model/llm/pytorch/chatglm.py +3 -3
  40. xinference/model/llm/pytorch/core.py +4 -2
  41. xinference/model/{multimodal → llm/pytorch}/qwen_vl.py +10 -8
  42. xinference/model/llm/pytorch/utils.py +21 -9
  43. xinference/model/llm/pytorch/yi_vl.py +246 -0
  44. xinference/model/llm/utils.py +57 -4
  45. xinference/model/llm/vllm/core.py +5 -4
  46. xinference/model/rerank/__init__.py +25 -2
  47. xinference/model/rerank/core.py +51 -9
  48. xinference/model/rerank/model_spec.json +6 -0
  49. xinference/model/rerank/model_spec_modelscope.json +7 -0
  50. xinference/{api/oauth2/common.py → model/rerank/utils.py} +6 -2
  51. xinference/model/utils.py +5 -3
  52. xinference/thirdparty/__init__.py +0 -0
  53. xinference/thirdparty/llava/__init__.py +1 -0
  54. xinference/thirdparty/llava/conversation.py +205 -0
  55. xinference/thirdparty/llava/mm_utils.py +122 -0
  56. xinference/thirdparty/llava/model/__init__.py +1 -0
  57. xinference/thirdparty/llava/model/clip_encoder/__init__.py +0 -0
  58. xinference/thirdparty/llava/model/clip_encoder/builder.py +11 -0
  59. xinference/thirdparty/llava/model/clip_encoder/clip_encoder.py +86 -0
  60. xinference/thirdparty/llava/model/constants.py +6 -0
  61. xinference/thirdparty/llava/model/llava_arch.py +385 -0
  62. xinference/thirdparty/llava/model/llava_llama.py +163 -0
  63. xinference/thirdparty/llava/model/multimodal_projector/__init__.py +0 -0
  64. xinference/thirdparty/llava/model/multimodal_projector/builder.py +64 -0
  65. xinference/types.py +1 -1
  66. xinference/web/ui/build/asset-manifest.json +3 -3
  67. xinference/web/ui/build/index.html +1 -1
  68. xinference/web/ui/build/static/js/main.15822aeb.js +3 -0
  69. xinference/web/ui/build/static/js/main.15822aeb.js.map +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/139e5e4adf436923107d2b02994c7ff6dba2aac1989e9b6638984f0dfe782c4a.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/64accc515dc6cd584a2873796cd7da6f93de57f7e465eb5423cca9a2f3fe3eff.json +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +1 -0
  75. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/METADATA +33 -23
  76. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/RECORD +81 -64
  77. xinference/api/oauth2/core.py +0 -93
  78. xinference/model/multimodal/__init__.py +0 -52
  79. xinference/model/multimodal/core.py +0 -467
  80. xinference/model/multimodal/model_spec.json +0 -43
  81. xinference/model/multimodal/model_spec_modelscope.json +0 -45
  82. xinference/web/ui/build/static/js/main.b83095c2.js +0 -3
  83. xinference/web/ui/build/static/js/main.b83095c2.js.map +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +0 -1
  91. /xinference/web/ui/build/static/js/{main.b83095c2.js.LICENSE.txt → main.15822aeb.js.LICENSE.txt} +0 -0
  92. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/LICENSE +0 -0
  93. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/WHEEL +0 -0
  94. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/entry_points.txt +0 -0
  95. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,132 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ from typing import TYPE_CHECKING, Dict, Optional
16
+
17
+ if TYPE_CHECKING:
18
+ from .core import AudioModelFamilyV1
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class WhisperModel:
24
+ def __init__(
25
+ self,
26
+ model_uid: str,
27
+ model_path: str,
28
+ model_spec: "AudioModelFamilyV1",
29
+ device: Optional[str] = None,
30
+ **kwargs,
31
+ ):
32
+ self._model_uid = model_uid
33
+ self._model_path = model_path
34
+ self._model_spec = model_spec
35
+ self._device = device
36
+ self._model = None
37
+ self._kwargs = kwargs
38
+
39
+ def load(self):
40
+ import torch
41
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
42
+
43
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
44
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
45
+
46
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
47
+ self._model_path,
48
+ torch_dtype=torch_dtype,
49
+ low_cpu_mem_usage=True,
50
+ use_safetensors=True,
51
+ )
52
+ model.to(device)
53
+
54
+ processor = AutoProcessor.from_pretrained(self._model_path)
55
+
56
+ self._model = pipeline(
57
+ "automatic-speech-recognition",
58
+ model=model,
59
+ tokenizer=processor.tokenizer,
60
+ feature_extractor=processor.feature_extractor,
61
+ max_new_tokens=128,
62
+ chunk_length_s=30,
63
+ batch_size=16,
64
+ return_timestamps=False,
65
+ torch_dtype=torch_dtype,
66
+ device=device,
67
+ )
68
+
69
+ def _call_model(
70
+ self,
71
+ audio: bytes,
72
+ generate_kwargs: Dict,
73
+ response_format: str,
74
+ ):
75
+ if response_format == "json":
76
+ logger.debug("Call whisper model with generate_kwargs: %s", generate_kwargs)
77
+ assert callable(self._model)
78
+ result = self._model(audio, generate_kwargs=generate_kwargs)
79
+ return {"text": result["text"]}
80
+ else:
81
+ raise ValueError(f"Unsupported response format: {response_format}")
82
+
83
+ def transcriptions(
84
+ self,
85
+ audio: bytes,
86
+ language: Optional[str] = None,
87
+ prompt: Optional[str] = None,
88
+ response_format: str = "json",
89
+ temperature: float = 0,
90
+ ):
91
+ if temperature != 0:
92
+ logger.warning(
93
+ "Temperature for whisper transcriptions will be ignored: %s.",
94
+ temperature,
95
+ )
96
+ if prompt is not None:
97
+ logger.warning(
98
+ "Prompt for whisper transcriptions will be ignored: %s", prompt
99
+ )
100
+ return self._call_model(
101
+ audio=audio,
102
+ generate_kwargs={"language": language, "task": "transcribe"}
103
+ if language is not None
104
+ else {"task": "transcribe"},
105
+ response_format=response_format,
106
+ )
107
+
108
+ def translations(
109
+ self,
110
+ audio: bytes,
111
+ prompt: Optional[str] = None,
112
+ response_format: str = "json",
113
+ temperature: float = 0,
114
+ ):
115
+ if not self._model_spec.multilingual:
116
+ raise RuntimeError(
117
+ f"Model {self._model_spec.model_name} is not suitable for translations."
118
+ )
119
+ if temperature != 0:
120
+ logger.warning(
121
+ "Temperature for whisper transcriptions will be ignored: %s.",
122
+ temperature,
123
+ )
124
+ if prompt is not None:
125
+ logger.warning(
126
+ "Prompt for whisper transcriptions will be ignored: %s", prompt
127
+ )
128
+ return self._call_model(
129
+ audio=audio,
130
+ generate_kwargs={"task": "translate"},
131
+ response_format=response_format,
132
+ )
xinference/model/core.py CHANGED
@@ -12,14 +12,20 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from abc import ABC
15
+ from abc import ABC, abstractmethod
16
16
  from typing import Any, List, Optional, Tuple
17
17
 
18
18
 
19
19
  class ModelDescription(ABC):
20
- def __init__(self, address: Optional[str], devices: Optional[List[str]]):
20
+ def __init__(
21
+ self,
22
+ address: Optional[str],
23
+ devices: Optional[List[str]],
24
+ model_path: Optional[str] = None,
25
+ ):
21
26
  self.address = address
22
27
  self.devices = devices
28
+ self._model_path = model_path
23
29
 
24
30
  def to_dict(self):
25
31
  """
@@ -28,6 +34,12 @@ class ModelDescription(ABC):
28
34
  """
29
35
  raise NotImplementedError
30
36
 
37
+ @abstractmethod
38
+ def to_version_info(self):
39
+ """
40
+ Return a dict to describe version info about a model instance
41
+ """
42
+
31
43
 
32
44
  def create_model_instance(
33
45
  subpool_addr: str,
@@ -41,10 +53,10 @@ def create_model_instance(
41
53
  is_local_deployment: bool = False,
42
54
  **kwargs,
43
55
  ) -> Tuple[Any, ModelDescription]:
56
+ from .audio.core import create_audio_model_instance
44
57
  from .embedding.core import create_embedding_model_instance
45
58
  from .image.core import create_image_model_instance
46
59
  from .llm.core import create_llm_model_instance
47
- from .multimodal.core import create_multimodal_model_instance
48
60
  from .rerank.core import create_rerank_model_instance
49
61
 
50
62
  if model_type == "LLM":
@@ -75,17 +87,10 @@ def create_model_instance(
75
87
  return create_rerank_model_instance(
76
88
  subpool_addr, devices, model_uid, model_name, **kwargs
77
89
  )
78
- elif model_type == "multimodal":
90
+ elif model_type == "audio":
79
91
  kwargs.pop("trust_remote_code", None)
80
- return create_multimodal_model_instance(
81
- subpool_addr,
82
- devices,
83
- model_uid,
84
- model_name,
85
- model_format,
86
- model_size_in_billions,
87
- quantization,
88
- **kwargs,
92
+ return create_audio_model_instance(
93
+ subpool_addr, devices, model_uid, model_name, **kwargs
89
94
  )
90
95
  else:
91
96
  raise ValueError(f"Unsupported model type: {model_type}.")
@@ -16,8 +16,20 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
20
- from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
19
+ from .core import (
20
+ EMBEDDING_MODEL_DESCRIPTIONS,
21
+ MODEL_NAME_TO_REVISION,
22
+ EmbeddingModelSpec,
23
+ generate_embedding_description,
24
+ get_cache_status,
25
+ get_embedding_model_descriptions,
26
+ )
27
+ from .custom import (
28
+ CustomEmbeddingModelSpec,
29
+ get_user_defined_embeddings,
30
+ register_embedding,
31
+ unregister_embedding,
32
+ )
21
33
 
22
34
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
23
35
  _model_spec_modelscope_json = os.path.join(
@@ -29,6 +41,7 @@ BUILTIN_EMBEDDING_MODELS = dict(
29
41
  )
30
42
  for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
31
43
  MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
44
+
32
45
  MODELSCOPE_EMBEDDING_MODELS = dict(
33
46
  (spec["model_name"], EmbeddingModelSpec(**spec))
34
47
  for spec in json.load(
@@ -38,6 +51,14 @@ MODELSCOPE_EMBEDDING_MODELS = dict(
38
51
  for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
39
52
  MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
40
53
 
54
+ # register model description after recording model revision
55
+ for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
56
+ for model_name, model_spec in model_spec_info.items():
57
+ if model_spec.model_name not in EMBEDDING_MODEL_DESCRIPTIONS:
58
+ EMBEDDING_MODEL_DESCRIPTIONS.update(
59
+ generate_embedding_description(model_spec)
60
+ )
61
+
41
62
  from ...constants import XINFERENCE_MODEL_DIR
42
63
 
43
64
  user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "embedding")
@@ -47,5 +68,9 @@ if os.path.isdir(user_defined_llm_dir):
47
68
  user_defined_llm_family = CustomEmbeddingModelSpec.parse_obj(json.load(fd))
48
69
  register_embedding(user_defined_llm_family, persist=False)
49
70
 
71
+ # register model description
72
+ for ud_embedding in get_user_defined_embeddings():
73
+ EMBEDDING_MODEL_DESCRIPTIONS.update(generate_embedding_description(ud_embedding))
74
+
50
75
  del _model_spec_json
51
76
  del _model_spec_modelscope_json
@@ -24,7 +24,7 @@ from pydantic import BaseModel
24
24
  from ...constants import XINFERENCE_CACHE_DIR
25
25
  from ...types import Embedding, EmbeddingData, EmbeddingUsage
26
26
  from ..core import ModelDescription
27
- from ..utils import is_model_cached, valid_model_revision
27
+ from ..utils import get_cache_dir, is_model_cached, valid_model_revision
28
28
 
29
29
  logger = logging.getLogger(__name__)
30
30
 
@@ -34,6 +34,15 @@ SUPPORTED_SCHEMES = ["s3"]
34
34
  MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
35
35
 
36
36
 
37
+ EMBEDDING_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
38
+
39
+
40
+ def get_embedding_model_descriptions():
41
+ import copy
42
+
43
+ return copy.deepcopy(EMBEDDING_MODEL_DESCRIPTIONS)
44
+
45
+
37
46
  class EmbeddingModelSpec(BaseModel):
38
47
  model_name: str
39
48
  dimensions: int
@@ -50,8 +59,9 @@ class EmbeddingModelDescription(ModelDescription):
50
59
  address: Optional[str],
51
60
  devices: Optional[List[str]],
52
61
  model_spec: EmbeddingModelSpec,
62
+ model_path: Optional[str] = None,
53
63
  ):
54
- super().__init__(address, devices)
64
+ super().__init__(address, devices, model_path=model_path)
55
65
  self._model_spec = model_spec
56
66
 
57
67
  def to_dict(self):
@@ -66,6 +76,34 @@ class EmbeddingModelDescription(ModelDescription):
66
76
  "model_revision": self._model_spec.model_revision,
67
77
  }
68
78
 
79
+ def to_version_info(self):
80
+ from .utils import get_model_version
81
+
82
+ if self._model_path is None:
83
+ is_cached = get_cache_status(self._model_spec)
84
+ file_location = get_cache_dir(self._model_spec)
85
+ else:
86
+ is_cached = True
87
+ file_location = self._model_path
88
+
89
+ return {
90
+ "model_version": get_model_version(self._model_spec),
91
+ "model_file_location": file_location,
92
+ "cache_status": is_cached,
93
+ "dimensions": self._model_spec.dimensions,
94
+ "max_tokens": self._model_spec.max_tokens,
95
+ }
96
+
97
+
98
+ def generate_embedding_description(
99
+ model_spec: EmbeddingModelSpec,
100
+ ) -> Dict[str, List[Dict]]:
101
+ res = defaultdict(list)
102
+ res[model_spec.model_name].append(
103
+ EmbeddingModelDescription(None, None, model_spec).to_version_info()
104
+ )
105
+ return res
106
+
69
107
 
70
108
  def cache_from_uri(
71
109
  model_spec: EmbeddingModelSpec,
@@ -421,5 +459,7 @@ def create_embedding_model_instance(
421
459
  model_spec = match_embedding(model_name)
422
460
  model_path = cache(model_spec)
423
461
  model = EmbeddingModel(model_uid, model_path, **kwargs)
424
- model_description = EmbeddingModelDescription(subpool_addr, devices, model_spec)
462
+ model_description = EmbeddingModelDescription(
463
+ subpool_addr, devices, model_spec, model_path=model_path
464
+ )
425
465
  return model, model_description
@@ -143,6 +143,14 @@
143
143
  "model_id": "jinaai/jina-embeddings-v2-base-en",
144
144
  "model_revision": "7302ac470bed880590f9344bfeee32ff8722d0e5"
145
145
  },
146
+ {
147
+ "model_name": "jina-embeddings-v2-base-zh",
148
+ "dimensions": 768,
149
+ "max_tokens": 8192,
150
+ "language": ["zh", "en"],
151
+ "model_id": "jinaai/jina-embeddings-v2-base-zh",
152
+ "model_revision": "67974cbef5cf50562eadd745de8afc661c52c96f"
153
+ },
146
154
  {
147
155
  "model_name": "text2vec-large-chinese",
148
156
  "dimensions": 1024,
@@ -182,5 +190,21 @@
182
190
  "language": ["zh"],
183
191
  "model_id": "shibing624/text2vec-base-multilingual",
184
192
  "model_revision": "f241877385fa56ebcc75f04d1850e1579cfa661d"
193
+ },
194
+ {
195
+ "model_name": "bge-m3",
196
+ "dimensions": 1024,
197
+ "max_tokens": 8192,
198
+ "language": ["zh", "en"],
199
+ "model_id": "BAAI/bge-m3",
200
+ "model_revision": "73a15ad29ab604f3bdc31601849a9defe86d563f"
201
+ },
202
+ {
203
+ "model_name": "bce-embedding-base_v1",
204
+ "dimensions": 768,
205
+ "max_tokens": 512,
206
+ "language": ["zh", "en"],
207
+ "model_id": "maidalun1020/bce-embedding-base_v1",
208
+ "model_revision": "236d9024fc1b4046f03848723f934521a66a9323"
185
209
  }
186
210
  ]
@@ -161,6 +161,14 @@
161
161
  "model_revision": "v0.0.1",
162
162
  "model_hub": "modelscope"
163
163
  },
164
+ {
165
+ "model_name": "jina-embeddings-v2-base-zh",
166
+ "dimensions": 768,
167
+ "max_tokens": 8192,
168
+ "language": ["zh", "en"],
169
+ "model_id": "jinaai/jina-embeddings-v2-base-zh",
170
+ "model_hub": "modelscope"
171
+ },
164
172
  {
165
173
  "model_name": "text2vec-large-chinese",
166
174
  "dimensions": 1024,
@@ -184,5 +192,21 @@
184
192
  "language": ["zh"],
185
193
  "model_id": "mwei23/text2vec-base-chinese-paraphrase",
186
194
  "model_hub": "modelscope"
195
+ },
196
+ {
197
+ "model_name": "bge-m3",
198
+ "dimensions": 1024,
199
+ "max_tokens": 8192,
200
+ "language": ["zh", "en"],
201
+ "model_id": "Xorbits/bge-m3",
202
+ "model_hub": "modelscope"
203
+ },
204
+ {
205
+ "model_name": "bce-embedding-base_v1",
206
+ "dimensions": 768,
207
+ "max_tokens": 512,
208
+ "language": ["zh", "en"],
209
+ "model_id": "maidalun/bce-embedding-base_v1",
210
+ "model_hub": "modelscope"
187
211
  }
188
212
  ]
@@ -0,0 +1,18 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from .core import EmbeddingModelSpec
15
+
16
+
17
+ def get_model_version(embedding_model: EmbeddingModelSpec) -> str:
18
+ return f"{embedding_model.model_name}--{embedding_model.max_tokens}--{embedding_model.dimensions}"
@@ -16,11 +16,22 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import ImageModelFamilyV1, get_cache_status
19
+ from .core import (
20
+ IMAGE_MODEL_DESCRIPTIONS,
21
+ ImageModelFamilyV1,
22
+ generate_image_description,
23
+ get_cache_status,
24
+ get_image_model_descriptions,
25
+ )
20
26
 
21
27
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
22
28
  BUILTIN_IMAGE_MODELS = dict(
23
29
  (spec["model_name"], ImageModelFamilyV1(**spec))
24
30
  for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
25
31
  )
32
+
33
+ # register model description
34
+ for model_name, model_spec in BUILTIN_IMAGE_MODELS.items():
35
+ IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(model_spec))
36
+
26
37
  del _model_spec_json
@@ -14,7 +14,8 @@
14
14
  import collections.abc
15
15
  import logging
16
16
  import os
17
- from typing import List, Optional, Tuple
17
+ from collections import defaultdict
18
+ from typing import Dict, List, Optional, Tuple
18
19
 
19
20
  from pydantic import BaseModel
20
21
 
@@ -27,6 +28,14 @@ MAX_ATTEMPTS = 3
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
31
+ IMAGE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
32
+
33
+
34
+ def get_image_model_descriptions():
35
+ import copy
36
+
37
+ return copy.deepcopy(IMAGE_MODEL_DESCRIPTIONS)
38
+
30
39
 
31
40
  class ImageModelFamilyV1(BaseModel):
32
41
  model_family: str
@@ -42,8 +51,9 @@ class ImageModelDescription(ModelDescription):
42
51
  address: Optional[str],
43
52
  devices: Optional[List[str]],
44
53
  model_spec: ImageModelFamilyV1,
54
+ model_path: Optional[str] = None,
45
55
  ):
46
- super().__init__(address, devices)
56
+ super().__init__(address, devices, model_path=model_path)
47
57
  self._model_spec = model_spec
48
58
 
49
59
  def to_dict(self):
@@ -57,6 +67,48 @@ class ImageModelDescription(ModelDescription):
57
67
  "controlnet": self._model_spec.controlnet,
58
68
  }
59
69
 
70
+ def to_version_info(self):
71
+ from .utils import get_model_version
72
+
73
+ if self._model_path is None:
74
+ is_cached = get_cache_status(self._model_spec)
75
+ file_location = get_cache_dir(self._model_spec)
76
+ else:
77
+ is_cached = True
78
+ file_location = self._model_path
79
+
80
+ if self._model_spec.controlnet is None:
81
+ return [
82
+ {
83
+ "model_version": get_model_version(self._model_spec, None),
84
+ "model_file_location": file_location,
85
+ "cache_status": is_cached,
86
+ "controlnet": "zoe-depth",
87
+ }
88
+ ]
89
+ else:
90
+ res = []
91
+ for cn in self._model_spec.controlnet:
92
+ res.append(
93
+ {
94
+ "model_version": get_model_version(self._model_spec, cn),
95
+ "model_file_location": file_location,
96
+ "cache_status": is_cached,
97
+ "controlnet": cn.model_name,
98
+ }
99
+ )
100
+ return res
101
+
102
+
103
+ def generate_image_description(
104
+ image_model: ImageModelFamilyV1,
105
+ ) -> Dict[str, List[Dict]]:
106
+ res = defaultdict(list)
107
+ res[image_model.model_name].extend(
108
+ ImageModelDescription(None, None, image_model).to_version_info()
109
+ )
110
+ return res
111
+
60
112
 
61
113
  def match_diffusion(model_name: str) -> ImageModelFamilyV1:
62
114
  from . import BUILTIN_IMAGE_MODELS
@@ -74,9 +126,7 @@ def cache(model_spec: ImageModelFamilyV1):
74
126
  # TODO: cache from uri
75
127
  import huggingface_hub
76
128
 
77
- cache_dir = os.path.realpath(
78
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
79
- )
129
+ cache_dir = get_cache_dir(model_spec)
80
130
  if not os.path.exists(cache_dir):
81
131
  os.makedirs(cache_dir, exist_ok=True)
82
132
 
@@ -113,12 +163,14 @@ def cache(model_spec: ImageModelFamilyV1):
113
163
  return cache_dir
114
164
 
115
165
 
166
+ def get_cache_dir(model_spec: ImageModelFamilyV1):
167
+ return os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name))
168
+
169
+
116
170
  def get_cache_status(
117
171
  model_spec: ImageModelFamilyV1,
118
172
  ) -> bool:
119
- cache_dir = os.path.realpath(
120
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
121
- )
173
+ cache_dir = get_cache_dir(model_spec)
122
174
  meta_path = os.path.join(cache_dir, "__valid_download")
123
175
  return valid_model_revision(meta_path, model_spec.model_revision)
124
176
 
@@ -157,5 +209,7 @@ def create_image_model_instance(
157
209
  kwargs["controlnet"] = controlnet_model_paths
158
210
  model_path = cache(model_spec)
159
211
  model = DiffusionModel(model_uid, model_path, **kwargs)
160
- model_description = ImageModelDescription(subpool_addr, devices, model_spec)
212
+ model_description = ImageModelDescription(
213
+ subpool_addr, devices, model_spec, model_path=model_path
214
+ )
161
215
  return model, model_description
@@ -0,0 +1,26 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from typing import Optional
15
+
16
+ from .core import ImageModelFamilyV1
17
+
18
+
19
+ def get_model_version(
20
+ image_model: ImageModelFamilyV1, controlnet: Optional[ImageModelFamilyV1]
21
+ ) -> str:
22
+ return (
23
+ image_model.model_name
24
+ if controlnet is None
25
+ else f"{image_model.model_name}--{controlnet.model_name}"
26
+ )
@@ -16,7 +16,13 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import LLM
19
+ from .core import (
20
+ LLM,
21
+ LLM_MODEL_DESCRIPTIONS,
22
+ LLMDescription,
23
+ generate_llm_description,
24
+ get_llm_model_descriptions,
25
+ )
20
26
  from .llm_family import (
21
27
  BUILTIN_LLM_FAMILIES,
22
28
  BUILTIN_LLM_MODEL_CHAT_FAMILIES,
@@ -50,7 +56,9 @@ def _install():
50
56
  from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
51
57
  from .pytorch.internlm2 import Internlm2PytorchChatModel
52
58
  from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
59
+ from .pytorch.qwen_vl import QwenVLChatModel
53
60
  from .pytorch.vicuna import VicunaPytorchChatModel
61
+ from .pytorch.yi_vl import YiVLChatModel
54
62
  from .vllm.core import VLLMChatModel, VLLMModel
55
63
 
56
64
  # register llm classes.
@@ -82,6 +90,8 @@ def _install():
82
90
  PytorchChatModel,
83
91
  FalconPytorchModel,
84
92
  Internlm2PytorchChatModel,
93
+ QwenVLChatModel,
94
+ YiVLChatModel,
85
95
  PytorchModel,
86
96
  ]
87
97
  )
@@ -131,6 +141,11 @@ def _install():
131
141
  if "tool_call" in model_spec.model_ability:
132
142
  BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
133
143
 
144
+ for llm_specs in [BUILTIN_LLM_FAMILIES, BUILTIN_MODELSCOPE_LLM_FAMILIES]:
145
+ for llm_spec in llm_specs:
146
+ if llm_spec.model_name not in LLM_MODEL_DESCRIPTIONS:
147
+ LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(llm_spec))
148
+
134
149
  from ...constants import XINFERENCE_MODEL_DIR
135
150
 
136
151
  user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "llm")
@@ -141,3 +156,7 @@ def _install():
141
156
  ) as fd:
142
157
  user_defined_llm_family = CustomLLMFamilyV1.parse_obj(json.load(fd))
143
158
  register_llm(user_defined_llm_family, persist=False)
159
+
160
+ # register model description
161
+ for ud_llm in get_user_defined_llm_families():
162
+ LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(ud_llm))