xinference 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/chat_interface.py +1 -1
  3. xinference/core/image_interface.py +9 -0
  4. xinference/core/model.py +4 -1
  5. xinference/core/worker.py +48 -41
  6. xinference/model/audio/chattts.py +24 -9
  7. xinference/model/audio/core.py +8 -2
  8. xinference/model/audio/fish_speech.py +228 -0
  9. xinference/model/audio/model_spec.json +8 -0
  10. xinference/model/embedding/core.py +23 -1
  11. xinference/model/image/model_spec.json +2 -1
  12. xinference/model/image/model_spec_modelscope.json +2 -1
  13. xinference/model/image/stable_diffusion/core.py +49 -1
  14. xinference/model/llm/__init__.py +6 -0
  15. xinference/model/llm/llm_family.json +54 -9
  16. xinference/model/llm/llm_family.py +2 -0
  17. xinference/model/llm/llm_family_modelscope.json +56 -10
  18. xinference/model/llm/lmdeploy/__init__.py +0 -0
  19. xinference/model/llm/lmdeploy/core.py +557 -0
  20. xinference/model/llm/transformers/cogvlm2.py +4 -45
  21. xinference/model/llm/transformers/cogvlm2_video.py +524 -0
  22. xinference/model/llm/transformers/core.py +1 -0
  23. xinference/model/llm/transformers/glm4v.py +2 -23
  24. xinference/model/llm/transformers/intern_vl.py +94 -11
  25. xinference/model/llm/transformers/minicpmv25.py +2 -23
  26. xinference/model/llm/transformers/minicpmv26.py +2 -22
  27. xinference/model/llm/transformers/yi_vl.py +2 -24
  28. xinference/model/llm/utils.py +10 -1
  29. xinference/model/llm/vllm/core.py +1 -1
  30. xinference/thirdparty/fish_speech/__init__.py +0 -0
  31. xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
  32. xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
  33. xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
  34. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  35. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  36. xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
  37. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  38. xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
  39. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  40. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
  41. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
  42. xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
  43. xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
  44. xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
  45. xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
  46. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  47. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
  48. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
  49. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
  50. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
  51. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
  52. xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
  53. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  54. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
  55. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
  56. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
  57. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
  58. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
  59. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
  60. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  61. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
  62. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
  63. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
  64. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
  65. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
  66. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
  67. xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
  68. xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
  69. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
  70. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
  71. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
  72. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
  73. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
  74. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
  75. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
  76. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
  77. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
  78. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
  79. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
  80. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
  81. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
  82. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
  83. xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
  84. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
  85. xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
  86. xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
  87. xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
  88. xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
  89. xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
  90. xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
  91. xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
  92. xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
  93. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
  94. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  95. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
  96. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
  97. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  98. xinference/thirdparty/fish_speech/tools/api.py +495 -0
  99. xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
  100. xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
  101. xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
  102. xinference/thirdparty/fish_speech/tools/file.py +108 -0
  103. xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
  104. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  105. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
  106. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
  107. xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
  108. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
  109. xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
  110. xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
  111. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
  112. xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
  113. xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
  114. xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
  115. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
  116. xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
  117. xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
  118. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  119. xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
  120. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
  121. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
  122. xinference/thirdparty/fish_speech/tools/webui.py +619 -0
  123. xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
  124. xinference/web/ui/build/asset-manifest.json +3 -3
  125. xinference/web/ui/build/index.html +1 -1
  126. xinference/web/ui/build/static/js/{main.ffc26121.js → main.661c7b0a.js} +3 -3
  127. xinference/web/ui/build/static/js/main.661c7b0a.js.map +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
  129. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/METADATA +18 -6
  130. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/RECORD +135 -37
  131. xinference/web/ui/build/static/js/main.ffc26121.js.map +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
  133. /xinference/web/ui/build/static/js/{main.ffc26121.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
  134. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
  135. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
  136. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
  137. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,9 @@ from functools import partial
24
24
  from io import BytesIO
25
25
  from typing import Dict, List, Optional, Union
26
26
 
27
+ import PIL.Image
28
+ from PIL import ImageOps
29
+
27
30
  from ....constants import XINFERENCE_IMAGE_DIR
28
31
  from ....device_utils import move_model_to_available_device
29
32
  from ....types import Image, ImageList, LoRA
@@ -46,8 +49,13 @@ class DiffusionModel:
46
49
  self._model_uid = model_uid
47
50
  self._model_path = model_path
48
51
  self._device = device
52
+ # when a model has text2image ability,
53
+ # it will be loaded as AutoPipelineForText2Image
54
+ # for image2image and inpainting,
55
+ # we convert to the corresponding model
49
56
  self._model = None
50
57
  self._i2i_model = None # image to image model
58
+ self._inpainting_model = None # inpainting model
51
59
  self._lora_model = lora_model
52
60
  self._lora_load_kwargs = lora_load_kwargs or {}
53
61
  self._lora_fuse_kwargs = lora_fuse_kwargs or {}
@@ -152,6 +160,10 @@ class DiffusionModel:
152
160
  model=None,
153
161
  **kwargs,
154
162
  ):
163
+ import gc
164
+
165
+ from ....device_utils import empty_cache
166
+
155
167
  logger.debug(
156
168
  "stable diffusion args: %s",
157
169
  kwargs,
@@ -159,6 +171,11 @@ class DiffusionModel:
159
171
  model = model if model is not None else self._model
160
172
  assert callable(model)
161
173
  images = model(**kwargs).images
174
+
175
+ # clean cache
176
+ gc.collect()
177
+ empty_cache()
178
+
162
179
  if response_format == "url":
163
180
  os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
164
181
  image_list = []
@@ -209,9 +226,17 @@ class DiffusionModel:
209
226
  **kwargs,
210
227
  )
211
228
 
229
+ @staticmethod
230
+ def pad_to_multiple(image, multiple=8):
231
+ x, y = image.size
232
+ padding_x = (multiple - x % multiple) % multiple
233
+ padding_y = (multiple - y % multiple) % multiple
234
+ padding = (0, 0, padding_x, padding_y)
235
+ return ImageOps.expand(image, padding)
236
+
212
237
  def image_to_image(
213
238
  self,
214
- image: bytes,
239
+ image: PIL.Image,
215
240
  prompt: Optional[Union[str, List[str]]] = None,
216
241
  negative_prompt: Optional[Union[str, List[str]]] = None,
217
242
  n: int = 1,
@@ -236,6 +261,11 @@ class DiffusionModel:
236
261
  width, height = map(int, re.split(r"[^\d]+", size))
237
262
  kwargs["width"] = width
238
263
  kwargs["height"] = height
264
+ if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
265
+ # Model like SD3 image to image requires image's height and width is times of 16
266
+ # padding the image if specified
267
+ image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
268
+
239
269
  self._filter_kwargs(kwargs)
240
270
  return self._call_model(
241
271
  image=image,
@@ -258,6 +288,23 @@ class DiffusionModel:
258
288
  response_format: str = "url",
259
289
  **kwargs,
260
290
  ):
291
+ if "inpainting" not in self._abilities:
292
+ raise RuntimeError(f"{self._model_uid} does not support inpainting")
293
+
294
+ if (
295
+ "text2image" in self._abilities or "image2image" in self._abilities
296
+ ) and self._model is not None:
297
+ from diffusers import AutoPipelineForInpainting
298
+
299
+ if self._inpainting_model is not None:
300
+ model = self._inpainting_model
301
+ else:
302
+ model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
303
+ self._model
304
+ )
305
+ else:
306
+ model = self._model
307
+
261
308
  width, height = map(int, re.split(r"[^\d]+", size))
262
309
  return self._call_model(
263
310
  image=image,
@@ -268,5 +315,6 @@ class DiffusionModel:
268
315
  width=width,
269
316
  num_images_per_prompt=n,
270
317
  response_format=response_format,
318
+ model=model,
271
319
  **kwargs,
272
320
  )
@@ -34,6 +34,7 @@ from .llm_family import (
34
34
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
35
35
  LLAMA_CLASSES,
36
36
  LLM_ENGINES,
37
+ LMDEPLOY_CLASSES,
37
38
  MLX_CLASSES,
38
39
  SGLANG_CLASSES,
39
40
  SUPPORTED_ENGINES,
@@ -113,10 +114,12 @@ def generate_engine_config_by_model_family(model_family):
113
114
 
114
115
  def _install():
115
116
  from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
117
+ from .lmdeploy.core import LMDeployChatModel, LMDeployModel
116
118
  from .mlx.core import MLXChatModel, MLXModel
117
119
  from .sglang.core import SGLANGChatModel, SGLANGModel
118
120
  from .transformers.chatglm import ChatglmPytorchChatModel
119
121
  from .transformers.cogvlm2 import CogVLM2Model
122
+ from .transformers.cogvlm2_video import CogVLM2VideoModel
120
123
  from .transformers.core import PytorchChatModel, PytorchModel
121
124
  from .transformers.deepseek_vl import DeepSeekVLChatModel
122
125
  from .transformers.glm4v import Glm4VModel
@@ -147,6 +150,7 @@ def _install():
147
150
  SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
148
151
  VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
149
152
  MLX_CLASSES.extend([MLXModel, MLXChatModel])
153
+ LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
150
154
  TRANSFORMERS_CLASSES.extend(
151
155
  [
152
156
  ChatglmPytorchChatModel,
@@ -160,6 +164,7 @@ def _install():
160
164
  InternVLChatModel,
161
165
  PytorchModel,
162
166
  CogVLM2Model,
167
+ CogVLM2VideoModel,
163
168
  MiniCPMV25Model,
164
169
  MiniCPMV26Model,
165
170
  Glm4VModel,
@@ -174,6 +179,7 @@ def _install():
174
179
  SUPPORTED_ENGINES["Transformers"] = TRANSFORMERS_CLASSES
175
180
  SUPPORTED_ENGINES["llama.cpp"] = LLAMA_CLASSES
176
181
  SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
182
+ SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
177
183
 
178
184
  json_path = os.path.join(
179
185
  os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
@@ -7189,15 +7189,6 @@
7189
7189
  "model_id": "OpenGVLab/InternVL2-4B",
7190
7190
  "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
7191
7191
  },
7192
- {
7193
- "model_format": "awq",
7194
- "model_size_in_billions": 4,
7195
- "quantizations": [
7196
- "Int4"
7197
- ],
7198
- "model_id": "OpenGVLab/InternVL2-8B-AWQ",
7199
- "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
7200
- },
7201
7192
  {
7202
7193
  "model_format": "pytorch",
7203
7194
  "model_size_in_billions": 8,
@@ -7209,6 +7200,15 @@
7209
7200
  "model_id": "OpenGVLab/InternVL2-8B",
7210
7201
  "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
7211
7202
  },
7203
+ {
7204
+ "model_format": "awq",
7205
+ "model_size_in_billions": 8,
7206
+ "quantizations": [
7207
+ "Int4"
7208
+ ],
7209
+ "model_id": "OpenGVLab/InternVL2-8B-AWQ",
7210
+ "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
7211
+ },
7212
7212
  {
7213
7213
  "model_format": "pytorch",
7214
7214
  "model_size_in_billions": 26,
@@ -7342,6 +7342,51 @@
7342
7342
  ]
7343
7343
  }
7344
7344
  },
7345
+ {
7346
+ "version": 1,
7347
+ "context_length": 8192,
7348
+ "model_name": "cogvlm2-video-llama3-chat",
7349
+ "model_lang": [
7350
+ "en",
7351
+ "zh"
7352
+ ],
7353
+ "model_ability": [
7354
+ "chat",
7355
+ "vision"
7356
+ ],
7357
+ "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
7358
+ "model_specs": [
7359
+ {
7360
+ "model_format": "pytorch",
7361
+ "model_size_in_billions": 12,
7362
+ "quantizations": [
7363
+ "4-bit",
7364
+ "8-bit",
7365
+ "none"
7366
+ ],
7367
+ "model_id": "THUDM/cogvlm2-video-llama3-chat",
7368
+ "model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
7369
+ }
7370
+ ],
7371
+ "prompt_style": {
7372
+ "style_name": "LLAMA3",
7373
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
7374
+ "roles": [
7375
+ "user",
7376
+ "assistant"
7377
+ ],
7378
+ "intra_message_sep": "\n\n",
7379
+ "inter_message_sep": "<|eot_id|>",
7380
+ "stop_token_ids": [
7381
+ 128001,
7382
+ 128009
7383
+ ],
7384
+ "stop": [
7385
+ "<|end_of_text|>",
7386
+ "<|eot_id|>"
7387
+ ]
7388
+ }
7389
+ },
7345
7390
  {
7346
7391
  "version": 1,
7347
7392
  "context_length": 8192,
@@ -271,6 +271,8 @@ VLLM_CLASSES: List[Type[LLM]] = []
271
271
 
272
272
  MLX_CLASSES: List[Type[LLM]] = []
273
273
 
274
+ LMDEPLOY_CLASSES: List[Type[LLM]] = []
275
+
274
276
  LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
275
277
  SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}
276
278
 
@@ -4778,10 +4778,10 @@
4778
4778
  "model_revision": "master"
4779
4779
  },
4780
4780
  {
4781
- "model_format": "pytorch",
4781
+ "model_format": "awq",
4782
4782
  "model_size_in_billions": 2,
4783
4783
  "quantizations": [
4784
- "none"
4784
+ "Int4"
4785
4785
  ],
4786
4786
  "model_hub": "modelscope",
4787
4787
  "model_id": "OpenGVLab/InternVL2-2B-AWQ",
@@ -4812,10 +4812,10 @@
4812
4812
  "model_revision": "master"
4813
4813
  },
4814
4814
  {
4815
- "model_format": "pytorch",
4815
+ "model_format": "awq",
4816
4816
  "model_size_in_billions": 8,
4817
4817
  "quantizations": [
4818
- "none"
4818
+ "Int4"
4819
4819
  ],
4820
4820
  "model_hub": "modelscope",
4821
4821
  "model_id": "OpenGVLab/InternVL2-8B-AWQ",
@@ -4834,10 +4834,10 @@
4834
4834
  "model_revision": "master"
4835
4835
  },
4836
4836
  {
4837
- "model_format": "pytorch",
4837
+ "model_format": "awq",
4838
4838
  "model_size_in_billions": 26,
4839
4839
  "quantizations": [
4840
- "none"
4840
+ "Int4"
4841
4841
  ],
4842
4842
  "model_hub": "modelscope",
4843
4843
  "model_id": "OpenGVLab/InternVL2-26B-AWQ",
@@ -4856,10 +4856,10 @@
4856
4856
  "model_revision": "master"
4857
4857
  },
4858
4858
  {
4859
- "model_format": "pytorch",
4859
+ "model_format": "awq",
4860
4860
  "model_size_in_billions": 40,
4861
4861
  "quantizations": [
4862
- "none"
4862
+ "Int4"
4863
4863
  ],
4864
4864
  "model_hub": "modelscope",
4865
4865
  "model_id": "OpenGVLab/InternVL2-40B-AWQ",
@@ -4878,10 +4878,10 @@
4878
4878
  "model_revision": "master"
4879
4879
  },
4880
4880
  {
4881
- "model_format": "pytorch",
4881
+ "model_format": "awq",
4882
4882
  "model_size_in_billions": 76,
4883
4883
  "quantizations": [
4884
- "none"
4884
+ "Int4"
4885
4885
  ],
4886
4886
  "model_hub": "modelscope",
4887
4887
  "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
@@ -4962,6 +4962,52 @@
4962
4962
  ]
4963
4963
  }
4964
4964
  },
4965
+ {
4966
+ "version": 1,
4967
+ "context_length": 8192,
4968
+ "model_name": "cogvlm2-video-llama3-chat",
4969
+ "model_lang": [
4970
+ "en",
4971
+ "zh"
4972
+ ],
4973
+ "model_ability": [
4974
+ "chat",
4975
+ "vision"
4976
+ ],
4977
+ "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
4978
+ "model_specs": [
4979
+ {
4980
+ "model_format": "pytorch",
4981
+ "model_size_in_billions": 12,
4982
+ "quantizations": [
4983
+ "4-bit",
4984
+ "8-bit",
4985
+ "none"
4986
+ ],
4987
+ "model_hub": "modelscope",
4988
+ "model_id": "ZhipuAI/cogvlm2-video-llama3-chat",
4989
+ "model_revision": "master"
4990
+ }
4991
+ ],
4992
+ "prompt_style": {
4993
+ "style_name": "LLAMA3",
4994
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
4995
+ "roles": [
4996
+ "user",
4997
+ "assistant"
4998
+ ],
4999
+ "intra_message_sep": "\n\n",
5000
+ "inter_message_sep": "<|eot_id|>",
5001
+ "stop_token_ids": [
5002
+ 128001,
5003
+ 128009
5004
+ ],
5005
+ "stop": [
5006
+ "<|end_of_text|>",
5007
+ "<|eot_id|>"
5008
+ ]
5009
+ }
5010
+ },
4965
5011
  {
4966
5012
  "version": 1,
4967
5013
  "context_length": 8192,
File without changes