xinference 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +123 -3
- xinference/client/restful/restful_client.py +131 -2
- xinference/core/model.py +93 -24
- xinference/core/supervisor.py +132 -15
- xinference/core/worker.py +165 -8
- xinference/deploy/cmdline.py +5 -0
- xinference/model/audio/chattts.py +46 -14
- xinference/model/audio/core.py +23 -15
- xinference/model/core.py +12 -3
- xinference/model/embedding/core.py +25 -16
- xinference/model/flexible/__init__.py +40 -0
- xinference/model/flexible/core.py +228 -0
- xinference/model/flexible/launchers/__init__.py +15 -0
- xinference/model/flexible/launchers/transformers_launcher.py +63 -0
- xinference/model/flexible/utils.py +33 -0
- xinference/model/image/core.py +21 -14
- xinference/model/image/custom.py +1 -1
- xinference/model/image/model_spec.json +14 -0
- xinference/model/image/stable_diffusion/core.py +43 -6
- xinference/model/llm/__init__.py +0 -2
- xinference/model/llm/core.py +3 -2
- xinference/model/llm/ggml/llamacpp.py +1 -10
- xinference/model/llm/llm_family.json +292 -36
- xinference/model/llm/llm_family.py +97 -52
- xinference/model/llm/llm_family_modelscope.json +220 -27
- xinference/model/llm/pytorch/core.py +0 -80
- xinference/model/llm/sglang/core.py +7 -2
- xinference/model/llm/utils.py +4 -2
- xinference/model/llm/vllm/core.py +3 -0
- xinference/model/rerank/core.py +24 -25
- xinference/types.py +0 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.0fb6f3ab.js → main.95c1d652.js} +3 -3
- xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
- {xinference-0.13.0.dist-info → xinference-0.13.2.dist-info}/METADATA +9 -11
- {xinference-0.13.0.dist-info → xinference-0.13.2.dist-info}/RECORD +49 -58
- xinference/model/llm/ggml/chatglm.py +0 -457
- xinference/thirdparty/ChatTTS/__init__.py +0 -1
- xinference/thirdparty/ChatTTS/core.py +0 -200
- xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
- xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/infer/api.py +0 -125
- xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
- xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
- xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
- xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
- xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
- xinference/web/ui/build/static/js/main.0fb6f3ab.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f6b391abec76271137faad13a3793fe7acc1024e8cd2269c147b653ecd3a73b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/30a0c79d8025d6441eb75b2df5bc2750a14f30119c869ef02570d294dff65c2f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40486e655c3c5801f087e2cf206c0b5511aaa0dfdba78046b7181bf9c17e54c5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b5507cd57f16a3a230aa0128e39fe103e928de139ea29e2679e4c64dcbba3b3a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d779b915f83f9c7b5a72515b6932fdd114f1822cef90ae01cc0d12bca59abc2d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d87824cb266194447a9c0c69ebab2d507bfc3e3148976173760d18c035e9dd26.json +0 -1
- /xinference/web/ui/build/static/js/{main.0fb6f3ab.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.2.dist-info}/LICENSE +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.2.dist-info}/WHEEL +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.2.dist-info}/top_level.txt +0 -0
|
@@ -92,5 +92,19 @@
|
|
|
92
92
|
"model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
|
|
93
93
|
}
|
|
94
94
|
]
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"model_name": "stable-diffusion-inpainting",
|
|
98
|
+
"model_family": "stable_diffusion",
|
|
99
|
+
"model_id": "runwayml/stable-diffusion-inpainting",
|
|
100
|
+
"model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
|
|
101
|
+
"ability": "inpainting"
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"model_name": "stable-diffusion-2-inpainting",
|
|
105
|
+
"model_family": "stable_diffusion",
|
|
106
|
+
"model_id": "stabilityai/stable-diffusion-2-inpainting",
|
|
107
|
+
"model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
|
|
108
|
+
"ability": "inpainting"
|
|
95
109
|
}
|
|
96
110
|
]
|
|
@@ -16,6 +16,7 @@ import base64
|
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
18
|
import re
|
|
19
|
+
import sys
|
|
19
20
|
import time
|
|
20
21
|
import uuid
|
|
21
22
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -39,6 +40,7 @@ class DiffusionModel:
|
|
|
39
40
|
lora_model: Optional[List[LoRA]] = None,
|
|
40
41
|
lora_load_kwargs: Optional[Dict] = None,
|
|
41
42
|
lora_fuse_kwargs: Optional[Dict] = None,
|
|
43
|
+
ability: Optional[str] = None,
|
|
42
44
|
**kwargs,
|
|
43
45
|
):
|
|
44
46
|
self._model_uid = model_uid
|
|
@@ -48,6 +50,7 @@ class DiffusionModel:
|
|
|
48
50
|
self._lora_model = lora_model
|
|
49
51
|
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
50
52
|
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
53
|
+
self._ability = ability
|
|
51
54
|
self._kwargs = kwargs
|
|
52
55
|
|
|
53
56
|
def _apply_lora(self):
|
|
@@ -64,8 +67,14 @@ class DiffusionModel:
|
|
|
64
67
|
logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
|
|
65
68
|
|
|
66
69
|
def load(self):
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
import torch
|
|
71
|
+
|
|
72
|
+
if self._ability in [None, "text2image", "image2image"]:
|
|
73
|
+
from diffusers import AutoPipelineForText2Image as AutoPipelineModel
|
|
74
|
+
elif self._ability == "inpainting":
|
|
75
|
+
from diffusers import AutoPipelineForInpainting as AutoPipelineModel
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError(f"Unknown ability: {self._ability}")
|
|
69
78
|
|
|
70
79
|
controlnet = self._kwargs.get("controlnet")
|
|
71
80
|
if controlnet is not None:
|
|
@@ -74,12 +83,16 @@ class DiffusionModel:
|
|
|
74
83
|
logger.debug("Loading controlnet %s", controlnet)
|
|
75
84
|
self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
|
|
76
85
|
|
|
77
|
-
|
|
86
|
+
torch_dtype = self._kwargs.get("torch_dtype")
|
|
87
|
+
if sys.platform != "darwin" and torch_dtype is None:
|
|
88
|
+
# The following params crashes on Mac M2
|
|
89
|
+
self._kwargs["torch_dtype"] = torch.float16
|
|
90
|
+
self._kwargs["use_safetensors"] = True
|
|
91
|
+
|
|
92
|
+
logger.debug("Loading model %s", AutoPipelineModel)
|
|
93
|
+
self._model = AutoPipelineModel.from_pretrained(
|
|
78
94
|
self._model_path,
|
|
79
95
|
**self._kwargs,
|
|
80
|
-
# The following params crashes on Mac M2
|
|
81
|
-
# torch_dtype=torch.float16,
|
|
82
|
-
# use_safetensors=True,
|
|
83
96
|
)
|
|
84
97
|
self._model = move_model_to_available_device(self._model)
|
|
85
98
|
# Recommended if your computer has < 64 GB of RAM
|
|
@@ -174,3 +187,27 @@ class DiffusionModel:
|
|
|
174
187
|
response_format=response_format,
|
|
175
188
|
**kwargs,
|
|
176
189
|
)
|
|
190
|
+
|
|
191
|
+
def inpainting(
|
|
192
|
+
self,
|
|
193
|
+
image: bytes,
|
|
194
|
+
mask_image: bytes,
|
|
195
|
+
prompt: Optional[Union[str, List[str]]] = None,
|
|
196
|
+
negative_prompt: Optional[Union[str, List[str]]] = None,
|
|
197
|
+
n: int = 1,
|
|
198
|
+
size: str = "1024*1024",
|
|
199
|
+
response_format: str = "url",
|
|
200
|
+
**kwargs,
|
|
201
|
+
):
|
|
202
|
+
width, height = map(int, re.split(r"[^\d]+", size))
|
|
203
|
+
return self._call_model(
|
|
204
|
+
image=image,
|
|
205
|
+
mask_image=mask_image,
|
|
206
|
+
prompt=prompt,
|
|
207
|
+
negative_prompt=negative_prompt,
|
|
208
|
+
height=height,
|
|
209
|
+
width=width,
|
|
210
|
+
num_images_per_prompt=n,
|
|
211
|
+
response_format=response_format,
|
|
212
|
+
**kwargs,
|
|
213
|
+
)
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -112,7 +112,6 @@ def generate_engine_config_by_model_family(model_family):
|
|
|
112
112
|
|
|
113
113
|
|
|
114
114
|
def _install():
|
|
115
|
-
from .ggml.chatglm import ChatglmCppChatModel
|
|
116
115
|
from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
|
|
117
116
|
from .mlx.core import MLXChatModel, MLXModel
|
|
118
117
|
from .pytorch.baichuan import BaichuanPytorchChatModel
|
|
@@ -143,7 +142,6 @@ def _install():
|
|
|
143
142
|
# register llm classes.
|
|
144
143
|
LLAMA_CLASSES.extend(
|
|
145
144
|
[
|
|
146
|
-
ChatglmCppChatModel,
|
|
147
145
|
LlamaCppChatModel,
|
|
148
146
|
LlamaCppModel,
|
|
149
147
|
]
|
xinference/model/llm/core.py
CHANGED
|
@@ -20,7 +20,7 @@ import platform
|
|
|
20
20
|
from abc import abstractmethod
|
|
21
21
|
from collections import defaultdict
|
|
22
22
|
from functools import lru_cache
|
|
23
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
|
23
|
+
from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
|
|
24
24
|
|
|
25
25
|
from ...core.utils import parse_replica_model_uid
|
|
26
26
|
from ...types import PeftModelConfig
|
|
@@ -193,6 +193,7 @@ def create_llm_model_instance(
|
|
|
193
193
|
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
194
194
|
quantization: Optional[str] = None,
|
|
195
195
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
196
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
196
197
|
**kwargs,
|
|
197
198
|
) -> Tuple[LLM, LLMDescription]:
|
|
198
199
|
from .llm_family import cache, check_engine_by_spec_parameters, match_llm
|
|
@@ -200,7 +201,7 @@ def create_llm_model_instance(
|
|
|
200
201
|
if model_engine is None:
|
|
201
202
|
raise ValueError("model_engine is required for LLM model")
|
|
202
203
|
match_result = match_llm(
|
|
203
|
-
model_name, model_format, model_size_in_billions, quantization
|
|
204
|
+
model_name, model_format, model_size_in_billions, quantization, download_hub
|
|
204
205
|
)
|
|
205
206
|
|
|
206
207
|
if not match_result:
|
|
@@ -25,7 +25,6 @@ from ....types import (
|
|
|
25
25
|
CompletionChunk,
|
|
26
26
|
CompletionUsage,
|
|
27
27
|
CreateCompletionLlamaCpp,
|
|
28
|
-
Embedding,
|
|
29
28
|
LlamaCppGenerateConfig,
|
|
30
29
|
LlamaCppModelConfig,
|
|
31
30
|
)
|
|
@@ -65,7 +64,6 @@ class LlamaCppModel(LLM):
|
|
|
65
64
|
|
|
66
65
|
if self.model_family.context_length:
|
|
67
66
|
llamacpp_model_config.setdefault("n_ctx", self.model_family.context_length)
|
|
68
|
-
llamacpp_model_config.setdefault("embedding", True)
|
|
69
67
|
llamacpp_model_config.setdefault("use_mmap", False)
|
|
70
68
|
llamacpp_model_config.setdefault("use_mlock", True)
|
|
71
69
|
|
|
@@ -185,7 +183,7 @@ class LlamaCppModel(LLM):
|
|
|
185
183
|
) -> bool:
|
|
186
184
|
if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
|
|
187
185
|
return False
|
|
188
|
-
if "
|
|
186
|
+
if "qwen" in llm_family.model_name:
|
|
189
187
|
return False
|
|
190
188
|
if "generate" not in llm_family.model_ability:
|
|
191
189
|
return False
|
|
@@ -261,11 +259,6 @@ class LlamaCppModel(LLM):
|
|
|
261
259
|
else:
|
|
262
260
|
return generator_wrapper(prompt, generate_config)
|
|
263
261
|
|
|
264
|
-
def create_embedding(self, input: Union[str, List[str]]) -> Embedding:
|
|
265
|
-
assert self._llm is not None
|
|
266
|
-
embedding = self._llm.create_embedding(input)
|
|
267
|
-
return embedding
|
|
268
|
-
|
|
269
262
|
|
|
270
263
|
class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
271
264
|
def __init__(
|
|
@@ -292,8 +285,6 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
292
285
|
) -> bool:
|
|
293
286
|
if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
|
|
294
287
|
return False
|
|
295
|
-
if "chatglm" in llm_family.model_name:
|
|
296
|
-
return False
|
|
297
288
|
if "chat" not in llm_family.model_ability:
|
|
298
289
|
return False
|
|
299
290
|
return True
|
|
@@ -574,19 +574,6 @@
|
|
|
574
574
|
],
|
|
575
575
|
"model_description": "ChatGLM is an open-source General Language Model (GLM) based LLM trained on both Chinese and English data.",
|
|
576
576
|
"model_specs": [
|
|
577
|
-
{
|
|
578
|
-
"model_format": "ggmlv3",
|
|
579
|
-
"model_size_in_billions": 6,
|
|
580
|
-
"quantizations": [
|
|
581
|
-
"q4_0",
|
|
582
|
-
"q4_1",
|
|
583
|
-
"q5_0",
|
|
584
|
-
"q5_1",
|
|
585
|
-
"q8_0"
|
|
586
|
-
],
|
|
587
|
-
"model_id": "Xorbits/chatglm-6B-GGML",
|
|
588
|
-
"model_file_name_template": "chatglm-ggml-{quantization}.bin"
|
|
589
|
-
},
|
|
590
577
|
{
|
|
591
578
|
"model_format": "pytorch",
|
|
592
579
|
"model_size_in_billions": 6,
|
|
@@ -622,19 +609,6 @@
|
|
|
622
609
|
],
|
|
623
610
|
"model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
624
611
|
"model_specs": [
|
|
625
|
-
{
|
|
626
|
-
"model_format": "ggmlv3",
|
|
627
|
-
"model_size_in_billions": 6,
|
|
628
|
-
"quantizations": [
|
|
629
|
-
"q4_0",
|
|
630
|
-
"q4_1",
|
|
631
|
-
"q5_0",
|
|
632
|
-
"q5_1",
|
|
633
|
-
"q8_0"
|
|
634
|
-
],
|
|
635
|
-
"model_id": "Xorbits/chatglm2-6B-GGML",
|
|
636
|
-
"model_file_name_template": "chatglm2-ggml-{quantization}.bin"
|
|
637
|
-
},
|
|
638
612
|
{
|
|
639
613
|
"model_format": "pytorch",
|
|
640
614
|
"model_size_in_billions": 6,
|
|
@@ -706,15 +680,6 @@
|
|
|
706
680
|
],
|
|
707
681
|
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
708
682
|
"model_specs": [
|
|
709
|
-
{
|
|
710
|
-
"model_format": "ggmlv3",
|
|
711
|
-
"model_size_in_billions": 6,
|
|
712
|
-
"quantizations": [
|
|
713
|
-
"q4_0"
|
|
714
|
-
],
|
|
715
|
-
"model_id": "Xorbits/chatglm3-6B-GGML",
|
|
716
|
-
"model_file_name_template": "chatglm3-ggml-{quantization}.bin"
|
|
717
|
-
},
|
|
718
683
|
{
|
|
719
684
|
"model_format": "pytorch",
|
|
720
685
|
"model_size_in_billions": 6,
|
|
@@ -855,6 +820,32 @@
|
|
|
855
820
|
],
|
|
856
821
|
"model_id": "THUDM/glm-4-9b-chat",
|
|
857
822
|
"model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
|
|
823
|
+
},
|
|
824
|
+
{
|
|
825
|
+
"model_format": "ggufv2",
|
|
826
|
+
"model_size_in_billions": 9,
|
|
827
|
+
"quantizations": [
|
|
828
|
+
"Q2_K",
|
|
829
|
+
"IQ3_XS",
|
|
830
|
+
"IQ3_S",
|
|
831
|
+
"IQ3_M",
|
|
832
|
+
"Q3_K_S",
|
|
833
|
+
"Q3_K_L",
|
|
834
|
+
"Q3_K",
|
|
835
|
+
"IQ4_XS",
|
|
836
|
+
"IQ4_NL",
|
|
837
|
+
"Q4_K_S",
|
|
838
|
+
"Q4_K",
|
|
839
|
+
"Q5_K_S",
|
|
840
|
+
"Q5_K",
|
|
841
|
+
"Q6_K",
|
|
842
|
+
"Q8_0",
|
|
843
|
+
"BF16",
|
|
844
|
+
"FP16"
|
|
845
|
+
],
|
|
846
|
+
"model_file_name_template": "glm-4-9b-chat.{quantization}.gguf",
|
|
847
|
+
"model_id": "legraphista/glm-4-9b-chat-GGUF",
|
|
848
|
+
"model_revision": "0155a14edf0176863e9a003cdd78ce599e4d62c0"
|
|
858
849
|
}
|
|
859
850
|
],
|
|
860
851
|
"prompt_style": {
|
|
@@ -900,6 +891,32 @@
|
|
|
900
891
|
],
|
|
901
892
|
"model_id": "THUDM/glm-4-9b-chat-1m",
|
|
902
893
|
"model_revision": "715ddbe91082f976ff6a4ca06d59e5bbff6c3642"
|
|
894
|
+
},
|
|
895
|
+
{
|
|
896
|
+
"model_format": "ggufv2",
|
|
897
|
+
"model_size_in_billions": 9,
|
|
898
|
+
"quantizations": [
|
|
899
|
+
"Q2_K",
|
|
900
|
+
"IQ3_XS",
|
|
901
|
+
"IQ3_S",
|
|
902
|
+
"IQ3_M",
|
|
903
|
+
"Q3_K_S",
|
|
904
|
+
"Q3_K_L",
|
|
905
|
+
"Q3_K",
|
|
906
|
+
"IQ4_XS",
|
|
907
|
+
"IQ4_NL",
|
|
908
|
+
"Q4_K_S",
|
|
909
|
+
"Q4_K",
|
|
910
|
+
"Q5_K_S",
|
|
911
|
+
"Q5_K",
|
|
912
|
+
"Q6_K",
|
|
913
|
+
"Q8_0",
|
|
914
|
+
"BF16",
|
|
915
|
+
"FP16"
|
|
916
|
+
],
|
|
917
|
+
"model_file_name_template": "glm-4-9b-chat-1m.{quantization}.gguf",
|
|
918
|
+
"model_id": "legraphista/glm-4-9b-chat-1m-GGUF",
|
|
919
|
+
"model_revision": "782e28bd5eee3c514c07108da15e0b5e06dcf776"
|
|
903
920
|
}
|
|
904
921
|
],
|
|
905
922
|
"prompt_style": {
|
|
@@ -966,6 +983,65 @@
|
|
|
966
983
|
]
|
|
967
984
|
}
|
|
968
985
|
},
|
|
986
|
+
{
|
|
987
|
+
"version": 1,
|
|
988
|
+
"context_length": 131072,
|
|
989
|
+
"model_name": "codegeex4",
|
|
990
|
+
"model_lang": [
|
|
991
|
+
"en",
|
|
992
|
+
"zh"
|
|
993
|
+
],
|
|
994
|
+
"model_ability": [
|
|
995
|
+
"chat"
|
|
996
|
+
],
|
|
997
|
+
"model_description": "the open-source version of the latest CodeGeeX4 model series",
|
|
998
|
+
"model_specs": [
|
|
999
|
+
{
|
|
1000
|
+
"model_format": "pytorch",
|
|
1001
|
+
"model_size_in_billions": 9,
|
|
1002
|
+
"quantizations": [
|
|
1003
|
+
"4-bit",
|
|
1004
|
+
"8-bit",
|
|
1005
|
+
"none"
|
|
1006
|
+
],
|
|
1007
|
+
"model_id": "THUDM/codegeex4-all-9b",
|
|
1008
|
+
"model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
|
|
1009
|
+
},
|
|
1010
|
+
{
|
|
1011
|
+
"model_format": "ggufv2",
|
|
1012
|
+
"model_size_in_billions": 9,
|
|
1013
|
+
"quantizations": [
|
|
1014
|
+
"IQ2_M",
|
|
1015
|
+
"IQ3_M",
|
|
1016
|
+
"Q4_K_M",
|
|
1017
|
+
"Q5_K_M",
|
|
1018
|
+
"Q6_K_L",
|
|
1019
|
+
"Q8_0"
|
|
1020
|
+
],
|
|
1021
|
+
"model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
|
|
1022
|
+
"model_id": "THUDM/codegeex4-all-9b-GGUF",
|
|
1023
|
+
"model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
|
|
1024
|
+
}
|
|
1025
|
+
],
|
|
1026
|
+
"prompt_style": {
|
|
1027
|
+
"style_name": "CHATGLM3",
|
|
1028
|
+
"system_prompt": "",
|
|
1029
|
+
"roles": [
|
|
1030
|
+
"user",
|
|
1031
|
+
"assistant"
|
|
1032
|
+
],
|
|
1033
|
+
"stop_token_ids": [
|
|
1034
|
+
151329,
|
|
1035
|
+
151336,
|
|
1036
|
+
151338
|
|
1037
|
+
],
|
|
1038
|
+
"stop": [
|
|
1039
|
+
"<|endoftext|>",
|
|
1040
|
+
"<|user|>",
|
|
1041
|
+
"<|observation|>"
|
|
1042
|
+
]
|
|
1043
|
+
}
|
|
1044
|
+
},
|
|
969
1045
|
{
|
|
970
1046
|
"version": 1,
|
|
971
1047
|
"context_length": 2048,
|
|
@@ -5774,7 +5850,7 @@
|
|
|
5774
5850
|
},
|
|
5775
5851
|
{
|
|
5776
5852
|
"version": 1,
|
|
5777
|
-
"context_length":
|
|
5853
|
+
"context_length": 32768,
|
|
5778
5854
|
"model_name": "internlm2-chat",
|
|
5779
5855
|
"model_lang": [
|
|
5780
5856
|
"en",
|
|
@@ -5822,6 +5898,140 @@
|
|
|
5822
5898
|
]
|
|
5823
5899
|
}
|
|
5824
5900
|
},
|
|
5901
|
+
{
|
|
5902
|
+
"version": 1,
|
|
5903
|
+
"context_length": 32768,
|
|
5904
|
+
"model_name": "internlm2.5-chat",
|
|
5905
|
+
"model_lang": [
|
|
5906
|
+
"en",
|
|
5907
|
+
"zh"
|
|
5908
|
+
],
|
|
5909
|
+
"model_ability": [
|
|
5910
|
+
"chat"
|
|
5911
|
+
],
|
|
5912
|
+
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
5913
|
+
"model_specs": [
|
|
5914
|
+
{
|
|
5915
|
+
"model_format": "pytorch",
|
|
5916
|
+
"model_size_in_billions": 7,
|
|
5917
|
+
"quantizations": [
|
|
5918
|
+
"none"
|
|
5919
|
+
],
|
|
5920
|
+
"model_id": "internlm/internlm2_5-7b-chat",
|
|
5921
|
+
"model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
|
|
5922
|
+
},
|
|
5923
|
+
{
|
|
5924
|
+
"model_format": "gptq",
|
|
5925
|
+
"model_size_in_billions": 7,
|
|
5926
|
+
"quantizations": [
|
|
5927
|
+
"Int4"
|
|
5928
|
+
],
|
|
5929
|
+
"model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
|
|
5930
|
+
"model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
|
|
5931
|
+
},
|
|
5932
|
+
{
|
|
5933
|
+
"model_format": "ggufv2",
|
|
5934
|
+
"model_size_in_billions": 7,
|
|
5935
|
+
"quantizations": [
|
|
5936
|
+
"q2_k",
|
|
5937
|
+
"q3_k_m",
|
|
5938
|
+
"q4_0",
|
|
5939
|
+
"q4_k_m",
|
|
5940
|
+
"q5_0",
|
|
5941
|
+
"q5_k_m",
|
|
5942
|
+
"q6_k",
|
|
5943
|
+
"q8_0",
|
|
5944
|
+
"fp16"
|
|
5945
|
+
],
|
|
5946
|
+
"model_id": "internlm/internlm2_5-7b-chat-gguf",
|
|
5947
|
+
"model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
|
|
5948
|
+
}
|
|
5949
|
+
],
|
|
5950
|
+
"prompt_style": {
|
|
5951
|
+
"style_name": "INTERNLM2",
|
|
5952
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
5953
|
+
"roles": [
|
|
5954
|
+
"<|im_start|>user",
|
|
5955
|
+
"<|im_start|>assistant"
|
|
5956
|
+
],
|
|
5957
|
+
"intra_message_sep": "<|im_end|>",
|
|
5958
|
+
"stop_token_ids": [
|
|
5959
|
+
2,
|
|
5960
|
+
92542
|
|
5961
|
+
],
|
|
5962
|
+
"stop": [
|
|
5963
|
+
"</s>",
|
|
5964
|
+
"<|im_end|>"
|
|
5965
|
+
]
|
|
5966
|
+
}
|
|
5967
|
+
},
|
|
5968
|
+
{
|
|
5969
|
+
"version": 1,
|
|
5970
|
+
"context_length": 262144,
|
|
5971
|
+
"model_name": "internlm2.5-chat-1m",
|
|
5972
|
+
"model_lang": [
|
|
5973
|
+
"en",
|
|
5974
|
+
"zh"
|
|
5975
|
+
],
|
|
5976
|
+
"model_ability": [
|
|
5977
|
+
"chat"
|
|
5978
|
+
],
|
|
5979
|
+
"model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
|
|
5980
|
+
"model_specs": [
|
|
5981
|
+
{
|
|
5982
|
+
"model_format": "pytorch",
|
|
5983
|
+
"model_size_in_billions": 7,
|
|
5984
|
+
"quantizations": [
|
|
5985
|
+
"none"
|
|
5986
|
+
],
|
|
5987
|
+
"model_id": "internlm/internlm2_5-7b-chat-1m",
|
|
5988
|
+
"model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
|
|
5989
|
+
},
|
|
5990
|
+
{
|
|
5991
|
+
"model_format": "gptq",
|
|
5992
|
+
"model_size_in_billions": 7,
|
|
5993
|
+
"quantizations": [
|
|
5994
|
+
"Int4"
|
|
5995
|
+
],
|
|
5996
|
+
"model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
|
|
5997
|
+
"model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
|
|
5998
|
+
},
|
|
5999
|
+
{
|
|
6000
|
+
"model_format": "ggufv2",
|
|
6001
|
+
"model_size_in_billions": 7,
|
|
6002
|
+
"quantizations": [
|
|
6003
|
+
"q2_k",
|
|
6004
|
+
"q3_k_m",
|
|
6005
|
+
"q4_0",
|
|
6006
|
+
"q4_k_m",
|
|
6007
|
+
"q5_0",
|
|
6008
|
+
"q5_k_m",
|
|
6009
|
+
"q6_k",
|
|
6010
|
+
"q8_0",
|
|
6011
|
+
"fp16"
|
|
6012
|
+
],
|
|
6013
|
+
"model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
|
|
6014
|
+
"model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
|
|
6015
|
+
}
|
|
6016
|
+
],
|
|
6017
|
+
"prompt_style": {
|
|
6018
|
+
"style_name": "INTERNLM2",
|
|
6019
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
6020
|
+
"roles": [
|
|
6021
|
+
"<|im_start|>user",
|
|
6022
|
+
"<|im_start|>assistant"
|
|
6023
|
+
],
|
|
6024
|
+
"intra_message_sep": "<|im_end|>",
|
|
6025
|
+
"stop_token_ids": [
|
|
6026
|
+
2,
|
|
6027
|
+
92542
|
|
6028
|
+
],
|
|
6029
|
+
"stop": [
|
|
6030
|
+
"</s>",
|
|
6031
|
+
"<|im_end|>"
|
|
6032
|
+
]
|
|
6033
|
+
}
|
|
6034
|
+
},
|
|
5825
6035
|
{
|
|
5826
6036
|
"version":1,
|
|
5827
6037
|
"context_length":2048,
|
|
@@ -6175,6 +6385,52 @@
|
|
|
6175
6385
|
],
|
|
6176
6386
|
"model_id": "google/gemma-2-27b-it"
|
|
6177
6387
|
},
|
|
6388
|
+
{
|
|
6389
|
+
"model_format": "ggufv2",
|
|
6390
|
+
"model_size_in_billions": 9,
|
|
6391
|
+
"quantizations": [
|
|
6392
|
+
"Q2_K",
|
|
6393
|
+
"Q2_K_L",
|
|
6394
|
+
"Q3_K_L",
|
|
6395
|
+
"Q3_K_M",
|
|
6396
|
+
"Q3_K_S",
|
|
6397
|
+
"Q4_K_L",
|
|
6398
|
+
"Q4_K_M",
|
|
6399
|
+
"Q4_K_S",
|
|
6400
|
+
"Q5_K_L",
|
|
6401
|
+
"Q5_K_M",
|
|
6402
|
+
"Q5_K_S",
|
|
6403
|
+
"Q6_K",
|
|
6404
|
+
"Q6_K_L",
|
|
6405
|
+
"Q8_0",
|
|
6406
|
+
"f32"
|
|
6407
|
+
],
|
|
6408
|
+
"model_id": "bartowski/gemma-2-9b-it-GGUF",
|
|
6409
|
+
"model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
|
|
6410
|
+
},
|
|
6411
|
+
{
|
|
6412
|
+
"model_format": "ggufv2",
|
|
6413
|
+
"model_size_in_billions": 27,
|
|
6414
|
+
"quantizations": [
|
|
6415
|
+
"Q2_K",
|
|
6416
|
+
"Q2_K_L",
|
|
6417
|
+
"Q3_K_L",
|
|
6418
|
+
"Q3_K_M",
|
|
6419
|
+
"Q3_K_S",
|
|
6420
|
+
"Q4_K_L",
|
|
6421
|
+
"Q4_K_M",
|
|
6422
|
+
"Q4_K_S",
|
|
6423
|
+
"Q5_K_L",
|
|
6424
|
+
"Q5_K_M",
|
|
6425
|
+
"Q5_K_S",
|
|
6426
|
+
"Q6_K",
|
|
6427
|
+
"Q6_K_L",
|
|
6428
|
+
"Q8_0",
|
|
6429
|
+
"f32"
|
|
6430
|
+
],
|
|
6431
|
+
"model_id": "bartowski/gemma-2-27b-it-GGUF",
|
|
6432
|
+
"model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
|
|
6433
|
+
},
|
|
6178
6434
|
{
|
|
6179
6435
|
"model_format": "mlx",
|
|
6180
6436
|
"model_size_in_billions": 9,
|