xinference 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (95) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +132 -0
  3. xinference/api/restful_api.py +282 -78
  4. xinference/client/handlers.py +3 -0
  5. xinference/client/restful/restful_client.py +108 -75
  6. xinference/constants.py +14 -4
  7. xinference/core/cache_tracker.py +102 -0
  8. xinference/core/chat_interface.py +10 -4
  9. xinference/core/event.py +56 -0
  10. xinference/core/model.py +44 -0
  11. xinference/core/resource.py +19 -12
  12. xinference/core/status_guard.py +4 -0
  13. xinference/core/supervisor.py +278 -87
  14. xinference/core/utils.py +68 -3
  15. xinference/core/worker.py +98 -8
  16. xinference/deploy/cmdline.py +6 -3
  17. xinference/deploy/local.py +2 -2
  18. xinference/deploy/supervisor.py +2 -2
  19. xinference/model/audio/__init__.py +27 -0
  20. xinference/model/audio/core.py +161 -0
  21. xinference/model/audio/model_spec.json +79 -0
  22. xinference/model/audio/utils.py +18 -0
  23. xinference/model/audio/whisper.py +132 -0
  24. xinference/model/core.py +18 -13
  25. xinference/model/embedding/__init__.py +27 -2
  26. xinference/model/embedding/core.py +43 -3
  27. xinference/model/embedding/model_spec.json +24 -0
  28. xinference/model/embedding/model_spec_modelscope.json +24 -0
  29. xinference/model/embedding/utils.py +18 -0
  30. xinference/model/image/__init__.py +12 -1
  31. xinference/model/image/core.py +63 -9
  32. xinference/model/image/utils.py +26 -0
  33. xinference/model/llm/__init__.py +20 -1
  34. xinference/model/llm/core.py +43 -2
  35. xinference/model/llm/ggml/chatglm.py +15 -6
  36. xinference/model/llm/llm_family.json +197 -6
  37. xinference/model/llm/llm_family.py +9 -7
  38. xinference/model/llm/llm_family_modelscope.json +189 -4
  39. xinference/model/llm/pytorch/chatglm.py +3 -3
  40. xinference/model/llm/pytorch/core.py +4 -2
  41. xinference/model/{multimodal → llm/pytorch}/qwen_vl.py +10 -8
  42. xinference/model/llm/pytorch/utils.py +21 -9
  43. xinference/model/llm/pytorch/yi_vl.py +246 -0
  44. xinference/model/llm/utils.py +57 -4
  45. xinference/model/llm/vllm/core.py +5 -4
  46. xinference/model/rerank/__init__.py +25 -2
  47. xinference/model/rerank/core.py +51 -9
  48. xinference/model/rerank/model_spec.json +6 -0
  49. xinference/model/rerank/model_spec_modelscope.json +7 -0
  50. xinference/{api/oauth2/common.py → model/rerank/utils.py} +6 -2
  51. xinference/model/utils.py +5 -3
  52. xinference/thirdparty/__init__.py +0 -0
  53. xinference/thirdparty/llava/__init__.py +1 -0
  54. xinference/thirdparty/llava/conversation.py +205 -0
  55. xinference/thirdparty/llava/mm_utils.py +122 -0
  56. xinference/thirdparty/llava/model/__init__.py +1 -0
  57. xinference/thirdparty/llava/model/clip_encoder/__init__.py +0 -0
  58. xinference/thirdparty/llava/model/clip_encoder/builder.py +11 -0
  59. xinference/thirdparty/llava/model/clip_encoder/clip_encoder.py +86 -0
  60. xinference/thirdparty/llava/model/constants.py +6 -0
  61. xinference/thirdparty/llava/model/llava_arch.py +385 -0
  62. xinference/thirdparty/llava/model/llava_llama.py +163 -0
  63. xinference/thirdparty/llava/model/multimodal_projector/__init__.py +0 -0
  64. xinference/thirdparty/llava/model/multimodal_projector/builder.py +64 -0
  65. xinference/types.py +1 -1
  66. xinference/web/ui/build/asset-manifest.json +3 -3
  67. xinference/web/ui/build/index.html +1 -1
  68. xinference/web/ui/build/static/js/main.15822aeb.js +3 -0
  69. xinference/web/ui/build/static/js/main.15822aeb.js.map +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/139e5e4adf436923107d2b02994c7ff6dba2aac1989e9b6638984f0dfe782c4a.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/64accc515dc6cd584a2873796cd7da6f93de57f7e465eb5423cca9a2f3fe3eff.json +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +1 -0
  75. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/METADATA +33 -23
  76. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/RECORD +81 -64
  77. xinference/api/oauth2/core.py +0 -93
  78. xinference/model/multimodal/__init__.py +0 -52
  79. xinference/model/multimodal/core.py +0 -467
  80. xinference/model/multimodal/model_spec.json +0 -43
  81. xinference/model/multimodal/model_spec_modelscope.json +0 -45
  82. xinference/web/ui/build/static/js/main.b83095c2.js +0 -3
  83. xinference/web/ui/build/static/js/main.b83095c2.js.map +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +0 -1
  91. /xinference/web/ui/build/static/js/{main.b83095c2.js.LICENSE.txt → main.15822aeb.js.LICENSE.txt} +0 -0
  92. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/LICENSE +0 -0
  93. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/WHEEL +0 -0
  94. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/entry_points.txt +0 -0
  95. {xinference-0.8.1.dist-info → xinference-0.8.3.dist-info}/top_level.txt +0 -0
@@ -1,467 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import abc
16
- import logging
17
- import os
18
- from abc import abstractmethod
19
- from collections import defaultdict
20
- from typing import Dict, Iterator, List, Literal, Optional, Tuple, Type, Union
21
-
22
- from pydantic import BaseModel, validator
23
-
24
- from ...constants import XINFERENCE_CACHE_DIR
25
- from ...core.utils import parse_replica_model_uid
26
- from ...types import ChatCompletion, ChatCompletionChunk
27
- from ..core import ModelDescription
28
- from ..utils import (
29
- download_from_modelscope,
30
- is_model_cached,
31
- retry_download,
32
- symlink_local_file,
33
- valid_model_revision,
34
- )
35
-
36
- logger = logging.getLogger(__name__)
37
-
38
- DEFAULT_CONTEXT_LENGTH = 2048
39
- # Used for check whether the model is cached.
40
- # Init when registering all the builtin models.
41
- MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
42
-
43
-
44
- class LVLMSpecV1(BaseModel):
45
- model_format: Literal["pytorch", "gptq"]
46
- # Must in order that `str` first, then `int`
47
- model_size_in_billions: Union[str, int]
48
- quantizations: List[str]
49
- model_id: str
50
- model_hub: str = "huggingface"
51
- model_uri: Optional[str]
52
- model_revision: Optional[str]
53
-
54
- @validator("model_size_in_billions", pre=False)
55
- def validate_model_size_with_radix(cls, v: object) -> object:
56
- if isinstance(v, str):
57
- if (
58
- "_" in v
59
- ): # for example, "1_8" just returns "1_8", otherwise int("1_8") returns 18
60
- return v
61
- else:
62
- return int(v)
63
- return v
64
-
65
-
66
- class LVLMPromptStyleV1(BaseModel):
67
- style_name: str
68
- system_prompt: str = ""
69
- roles: List[str]
70
-
71
-
72
- class LVLMFamilyV1(BaseModel):
73
- version: Literal[1]
74
- context_length: Optional[int] = DEFAULT_CONTEXT_LENGTH
75
- model_name: str
76
- model_lang: List[str]
77
- model_ability: List[Literal["chat"]]
78
- model_description: Optional[str]
79
- model_specs: List["LVLMSpecV1"]
80
- prompt_style: Optional["LVLMPromptStyleV1"]
81
-
82
-
83
- class LVLMDescription(ModelDescription):
84
- def __init__(
85
- self,
86
- address: Optional[str],
87
- devices: Optional[List[str]],
88
- model_family: "LVLMFamilyV1",
89
- model_spec: "LVLMSpecV1",
90
- quantization: Optional[str],
91
- ):
92
- super().__init__(address, devices)
93
- self._model_family = model_family
94
- self._model_spec = model_spec
95
- self._quantization = quantization
96
-
97
- def to_dict(self):
98
- return {
99
- "model_type": "multimodal",
100
- "address": self.address,
101
- "accelerators": self.devices,
102
- "model_name": self._model_family.model_name,
103
- "model_lang": self._model_family.model_lang,
104
- "model_ability": self._model_family.model_ability,
105
- "model_description": self._model_family.model_description,
106
- "model_format": self._model_spec.model_format,
107
- "model_size_in_billions": self._model_spec.model_size_in_billions,
108
- "quantization": self._quantization,
109
- "model_hub": self._model_spec.model_hub,
110
- "revision": self._model_spec.model_revision,
111
- "context_length": self._model_family.context_length,
112
- }
113
-
114
-
115
- class LVLM(abc.ABC):
116
- def __init__(
117
- self,
118
- replica_model_uid: str,
119
- model_family: "LVLMFamilyV1",
120
- model_spec: "LVLMSpecV1",
121
- quantization: str,
122
- model_path: str,
123
- kwargs: Dict,
124
- ):
125
- self.model_uid, self.replica, self.rep_id = parse_replica_model_uid(
126
- replica_model_uid
127
- )
128
- self.model_family = model_family
129
- self.model_spec = model_spec
130
- self.quantization = quantization
131
- self.model_path = model_path
132
- self.kwargs = kwargs
133
- logger.info("Init model %s with kwargs: %s", self.model_uid, kwargs)
134
-
135
- @abstractmethod
136
- def load(self):
137
- raise NotImplementedError
138
-
139
- @abstractmethod
140
- def chat(
141
- self,
142
- prompt: str,
143
- system_prompt: Optional[str] = None,
144
- chat_history: Optional[List[Dict]] = None,
145
- generate_config: Optional[Dict] = None,
146
- ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
147
- raise NotImplementedError
148
-
149
- @classmethod
150
- def match(
151
- cls, model_family: "LVLMFamilyV1", model_spec: "LVLMSpecV1", quantization: str
152
- ) -> bool:
153
- raise NotImplementedError
154
-
155
-
156
- BUILTIN_LVLM_FAMILIES: List["LVLMFamilyV1"] = []
157
- BUILTIN_MODELSCOPE_LVLM_FAMILIES: List["LVLMFamilyV1"] = []
158
-
159
-
160
- def match_multimodal(
161
- model_name: str,
162
- model_format: Optional[str] = None,
163
- model_size_in_billions: Optional[int] = None,
164
- quantization: Optional[str] = None,
165
- ) -> Optional[Tuple[LVLMFamilyV1, LVLMSpecV1, str]]:
166
- """
167
- Find an multimodal family, spec, and quantization that satisfy given criteria.
168
- """
169
-
170
- def _match_quantization(q: Union[str, None], quantizations: List[str]):
171
- # Currently, the quantization name could include both uppercase and lowercase letters,
172
- # so it is necessary to ensure that the case sensitivity does not
173
- # affect the matching results.
174
- if q is None:
175
- return q
176
- for quant in quantizations:
177
- if q.lower() == quant.lower():
178
- return quant
179
-
180
- def _apply_format_to_model_id(spec: LVLMSpecV1, q: str) -> LVLMSpecV1:
181
- # Different quantized versions of some models use different model ids,
182
- # Here we check the `{}` in the model id to format the id.
183
- if "{" in spec.model_id:
184
- spec.model_id = spec.model_id.format(quantization=q)
185
- return spec
186
-
187
- if download_from_modelscope():
188
- all_families = BUILTIN_MODELSCOPE_LVLM_FAMILIES + BUILTIN_LVLM_FAMILIES
189
- else:
190
- all_families = BUILTIN_LVLM_FAMILIES
191
-
192
- for family in all_families:
193
- if model_name != family.model_name:
194
- continue
195
- for spec in family.model_specs:
196
- matched_quantization = _match_quantization(quantization, spec.quantizations)
197
- if (
198
- model_format
199
- and model_format != spec.model_format
200
- or model_size_in_billions
201
- and model_size_in_billions != spec.model_size_in_billions
202
- or quantization
203
- and matched_quantization is None
204
- ):
205
- continue
206
- # Copy spec to avoid _apply_format_to_model_id modify the original spec.
207
- spec = spec.copy()
208
- if quantization:
209
- return (
210
- family,
211
- _apply_format_to_model_id(spec, matched_quantization),
212
- matched_quantization,
213
- )
214
- else:
215
- return family, _apply_format_to_model_id(spec, "none"), "none"
216
- return None
217
-
218
-
219
- def create_multimodal_model_instance(
220
- subpool_addr: str,
221
- devices: List[str],
222
- model_uid: str,
223
- model_name: str,
224
- model_format: Optional[str] = None,
225
- model_size_in_billions: Optional[int] = None,
226
- quantization: Optional[str] = None,
227
- **kwargs,
228
- ) -> Tuple[LVLM, LVLMDescription]:
229
- match_result = match_multimodal(
230
- model_name,
231
- model_format,
232
- model_size_in_billions,
233
- quantization,
234
- )
235
- if not match_result:
236
- raise ValueError(
237
- f"Model not found, name: {model_name}, format: {model_format},"
238
- f" size: {model_size_in_billions}, quantization: {quantization}"
239
- )
240
- model_family, model_spec, quantization = match_result
241
-
242
- assert quantization is not None
243
- save_path = cache(model_family, model_spec, quantization)
244
-
245
- cls = match_cls(model_family, model_spec, quantization)
246
- logger.debug(f"Launching {model_uid} with {cls.__name__}")
247
-
248
- model = cls(model_uid, model_family, model_spec, quantization, save_path, kwargs)
249
- return model, LVLMDescription(
250
- subpool_addr, devices, model_family, model_spec, quantization
251
- )
252
-
253
-
254
- MODEL_CLASSES: List[Type[LVLM]] = []
255
-
256
-
257
- def match_cls(
258
- model_family: LVLMFamilyV1, model_spec: "LVLMSpecV1", quantization: str
259
- ) -> Type[LVLM]:
260
- """
261
- Find an multimodal implementation for given multimodal family and spec.
262
- """
263
- for cls in MODEL_CLASSES:
264
- if cls.match(model_family, model_spec, quantization):
265
- return cls
266
- raise Exception(f"Model {model_family.model_name} is not supported")
267
-
268
-
269
- def _get_cache_dir(
270
- model_family: LVLMFamilyV1,
271
- model_spec: "LVLMSpecV1",
272
- create_if_not_exist=True,
273
- ):
274
- cache_dir_name = (
275
- f"{model_family.model_name}-{model_spec.model_format}"
276
- f"-{model_spec.model_size_in_billions}b"
277
- )
278
- cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, cache_dir_name))
279
- if create_if_not_exist and not os.path.exists(cache_dir):
280
- os.makedirs(cache_dir, exist_ok=True)
281
- return cache_dir
282
-
283
-
284
- def _get_meta_path(
285
- cache_dir: str,
286
- model_format: str,
287
- model_hub: str,
288
- quantization: Optional[str] = None,
289
- ):
290
- if model_format == "pytorch":
291
- if model_hub == "huggingface":
292
- return os.path.join(cache_dir, "__valid_download")
293
- else:
294
- return os.path.join(cache_dir, f"__valid_download_{model_hub}")
295
- elif model_format in ["ggmlv3", "ggufv2", "gptq"]:
296
- assert quantization is not None
297
- if model_hub == "huggingface":
298
- return os.path.join(cache_dir, f"__valid_download_{quantization}")
299
- else:
300
- return os.path.join(
301
- cache_dir, f"__valid_download_{model_hub}_{quantization}"
302
- )
303
- else:
304
- raise ValueError(f"Unsupported format: {model_format}")
305
-
306
-
307
- def _skip_download(
308
- cache_dir: str,
309
- model_format: str,
310
- model_hub: str,
311
- model_revision: Optional[str],
312
- quantization: Optional[str] = None,
313
- ) -> bool:
314
- if model_format == "pytorch":
315
- model_hub_to_meta_path = {
316
- "huggingface": _get_meta_path(
317
- cache_dir, model_format, "huggingface", quantization
318
- ),
319
- "modelscope": _get_meta_path(
320
- cache_dir, model_format, "modelscope", quantization
321
- ),
322
- }
323
- if valid_model_revision(model_hub_to_meta_path[model_hub], model_revision):
324
- logger.info(f"Cache {cache_dir} exists")
325
- return True
326
- else:
327
- for hub, meta_path in model_hub_to_meta_path.items():
328
- if hub != model_hub and os.path.exists(meta_path):
329
- # PyTorch models from modelscope can also be loaded by transformers.
330
- logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
331
- return True
332
- return False
333
- elif model_format in ["ggmlv3", "ggufv2", "gptq"]:
334
- assert quantization is not None
335
- return os.path.exists(
336
- _get_meta_path(cache_dir, model_format, model_hub, quantization)
337
- )
338
- else:
339
- raise ValueError(f"Unsupported format: {model_format}")
340
-
341
-
342
- def _generate_meta_file(
343
- meta_path: str,
344
- model_family: "LVLMFamilyV1",
345
- model_spec: "LVLMSpecV1",
346
- quantization: Optional[str] = None,
347
- ):
348
- assert not valid_model_revision(
349
- meta_path, model_spec.model_revision
350
- ), f"meta file {meta_path} should not be valid"
351
- with open(meta_path, "w") as f:
352
- import json
353
-
354
- desc = LVLMDescription(None, None, model_family, model_spec, quantization)
355
- json.dump(desc.to_dict(), f)
356
-
357
-
358
- def cache_from_modelscope(
359
- model_family: LVLMFamilyV1,
360
- model_spec: "LVLMSpecV1",
361
- quantization: Optional[str] = None,
362
- ) -> str:
363
- """
364
- Cache model from Modelscope. Return the cache directory.
365
- """
366
- from modelscope.hub.snapshot_download import snapshot_download
367
-
368
- cache_dir = _get_cache_dir(model_family, model_spec)
369
- if _skip_download(
370
- cache_dir,
371
- model_spec.model_format,
372
- model_spec.model_hub,
373
- model_spec.model_revision,
374
- quantization,
375
- ):
376
- return cache_dir
377
-
378
- if model_spec.model_format in ["pytorch", "gptq"]:
379
- download_dir = retry_download(
380
- snapshot_download,
381
- model_family.model_name,
382
- {
383
- "model_size": model_spec.model_size_in_billions,
384
- "model_format": model_spec.model_format,
385
- },
386
- model_spec.model_id,
387
- revision=model_spec.model_revision,
388
- )
389
- for subdir, dirs, files in os.walk(download_dir):
390
- for file in files:
391
- relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
392
- symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
393
- else:
394
- raise ValueError(f"Unsupported format: {model_spec.model_format}")
395
-
396
- meta_path = _get_meta_path(
397
- cache_dir, model_spec.model_format, model_spec.model_hub, quantization
398
- )
399
- _generate_meta_file(meta_path, model_family, model_spec, quantization)
400
-
401
- return cache_dir
402
-
403
-
404
- def cache_from_huggingface(
405
- model_family: LVLMFamilyV1,
406
- model_spec: "LVLMSpecV1",
407
- quantization: Optional[str] = None,
408
- ) -> str:
409
- """
410
- Cache model from Hugging Face. Return the cache directory.
411
- """
412
- import huggingface_hub
413
-
414
- cache_dir = _get_cache_dir(model_family, model_spec)
415
- if _skip_download(
416
- cache_dir,
417
- model_spec.model_format,
418
- model_spec.model_hub,
419
- model_spec.model_revision,
420
- quantization,
421
- ):
422
- return cache_dir
423
-
424
- if model_spec.model_format in ["pytorch", "gptq"]:
425
- assert isinstance(model_spec, LVLMSpecV1)
426
- retry_download(
427
- huggingface_hub.snapshot_download,
428
- model_family.model_name,
429
- {
430
- "model_size": model_spec.model_size_in_billions,
431
- "model_format": model_spec.model_format,
432
- },
433
- model_spec.model_id,
434
- revision=model_spec.model_revision,
435
- local_dir=cache_dir,
436
- local_dir_use_symlinks=True,
437
- )
438
- else:
439
- raise ValueError(f"Unsupported model format: {model_spec.model_format}")
440
-
441
- meta_path = _get_meta_path(
442
- cache_dir, model_spec.model_format, model_spec.model_hub, quantization
443
- )
444
- _generate_meta_file(meta_path, model_family, model_spec, quantization)
445
-
446
- return cache_dir
447
-
448
-
449
- def cache(
450
- model_family: LVLMFamilyV1,
451
- model_spec: "LVLMSpecV1",
452
- quantization: Optional[str] = None,
453
- ) -> str:
454
- if model_spec.model_hub == "huggingface":
455
- logger.info(f"Caching from Hugging Face: {model_spec.model_id}")
456
- return cache_from_huggingface(model_family, model_spec, quantization)
457
- elif model_spec.model_hub == "modelscope":
458
- logger.info(f"Caching from Modelscope: {model_spec.model_id}")
459
- return cache_from_modelscope(model_family, model_spec, quantization)
460
- else:
461
- raise ValueError(f"Unknown model hub: {model_spec.model_hub}")
462
-
463
-
464
- def get_cache_status(
465
- model_spec: LVLMSpecV1,
466
- ) -> bool:
467
- return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
@@ -1,43 +0,0 @@
1
- [
2
- {
3
- "version": 1,
4
- "context_length": 4096,
5
- "model_name": "qwen-vl-chat",
6
- "model_lang": [
7
- "en",
8
- "zh"
9
- ],
10
- "model_ability": [
11
- "chat"
12
- ],
13
- "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
14
- "model_specs": [
15
- {
16
- "model_format": "pytorch",
17
- "model_size_in_billions": 7,
18
- "quantizations": [
19
- "none"
20
- ],
21
- "model_id": "Qwen/Qwen-VL-Chat",
22
- "model_revision": "6665c780ade5ff3f08853b4262dcb9c8f9598d42"
23
- },
24
- {
25
- "model_format": "gptq",
26
- "model_size_in_billions": 7,
27
- "quantizations": [
28
- "Int4"
29
- ],
30
- "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
31
- "model_revision": "5d3a5aa033ed2c502300d426c81cc5b13bcd1409"
32
- }
33
- ],
34
- "prompt_style": {
35
- "style_name": "QWEN",
36
- "system_prompt": "You are a helpful assistant.",
37
- "roles": [
38
- "user",
39
- "assistant"
40
- ]
41
- }
42
- }
43
- ]
@@ -1,45 +0,0 @@
1
- [
2
- {
3
- "version": 1,
4
- "context_length": 4096,
5
- "model_name": "qwen-vl-chat",
6
- "model_lang": [
7
- "en",
8
- "zh"
9
- ],
10
- "model_ability": [
11
- "chat"
12
- ],
13
- "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
14
- "model_specs": [
15
- {
16
- "model_format": "pytorch",
17
- "model_size_in_billions": 7,
18
- "quantizations": [
19
- "none"
20
- ],
21
- "model_hub": "modelscope",
22
- "model_id": "Qwen/Qwen-VL-Chat",
23
- "model_revision": "master"
24
- },
25
- {
26
- "model_format": "gptq",
27
- "model_size_in_billions": 7,
28
- "quantizations": [
29
- "Int4"
30
- ],
31
- "model_hub": "modelscope",
32
- "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
33
- "model_revision": "master"
34
- }
35
- ],
36
- "prompt_style": {
37
- "style_name": "QWEN",
38
- "system_prompt": "You are a helpful assistant.",
39
- "roles": [
40
- "user",
41
- "assistant"
42
- ]
43
- }
44
- }
45
- ]