xinference 0.12.3__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +6 -6
- xinference/client/restful/restful_client.py +0 -2
- xinference/core/model.py +21 -4
- xinference/core/scheduler.py +2 -0
- xinference/core/worker.py +74 -45
- xinference/deploy/utils.py +33 -2
- xinference/model/llm/__init__.py +5 -0
- xinference/model/llm/llm_family.json +240 -1
- xinference/model/llm/llm_family.py +32 -8
- xinference/model/llm/llm_family_modelscope.json +192 -0
- xinference/model/llm/mlx/__init__.py +13 -0
- xinference/model/llm/mlx/core.py +408 -0
- xinference/model/llm/pytorch/chatglm.py +2 -9
- xinference/model/llm/pytorch/cogvlm2.py +206 -21
- xinference/model/llm/pytorch/core.py +213 -40
- xinference/model/llm/pytorch/glm4v.py +171 -15
- xinference/model/llm/pytorch/qwen_vl.py +168 -7
- xinference/model/llm/pytorch/utils.py +53 -62
- xinference/model/llm/utils.py +24 -5
- xinference/model/rerank/core.py +5 -0
- xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
- xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.0fb6f3ab.js +3 -0
- xinference/web/ui/build/static/js/main.0fb6f3ab.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f6b391abec76271137faad13a3793fe7acc1024e8cd2269c147b653ecd3a73b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/30a0c79d8025d6441eb75b2df5bc2750a14f30119c869ef02570d294dff65c2f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/40486e655c3c5801f087e2cf206c0b5511aaa0dfdba78046b7181bf9c17e54c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b5507cd57f16a3a230aa0128e39fe103e928de139ea29e2679e4c64dcbba3b3a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d779b915f83f9c7b5a72515b6932fdd114f1822cef90ae01cc0d12bca59abc2d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d87824cb266194447a9c0c69ebab2d507bfc3e3148976173760d18c035e9dd26.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
- {xinference-0.12.3.dist-info → xinference-0.13.0.dist-info}/METADATA +4 -1
- {xinference-0.12.3.dist-info → xinference-0.13.0.dist-info}/RECORD +55 -44
- xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
- xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
- /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.0fb6f3ab.js.LICENSE.txt} +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.0.dist-info}/LICENSE +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.0.dist-info}/WHEEL +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.0.dist-info}/top_level.txt +0 -0
|
@@ -107,6 +107,28 @@ class PytorchLLMSpecV1(BaseModel):
|
|
|
107
107
|
return v
|
|
108
108
|
|
|
109
109
|
|
|
110
|
+
class MLXLLMSpecV1(BaseModel):
|
|
111
|
+
model_format: Literal["mlx"]
|
|
112
|
+
# Must in order that `str` first, then `int`
|
|
113
|
+
model_size_in_billions: Union[str, int]
|
|
114
|
+
quantizations: List[str]
|
|
115
|
+
model_id: Optional[str]
|
|
116
|
+
model_hub: str = "huggingface"
|
|
117
|
+
model_uri: Optional[str]
|
|
118
|
+
model_revision: Optional[str]
|
|
119
|
+
|
|
120
|
+
@validator("model_size_in_billions", pre=False)
|
|
121
|
+
def validate_model_size_with_radix(cls, v: object) -> object:
|
|
122
|
+
if isinstance(v, str):
|
|
123
|
+
if (
|
|
124
|
+
"_" in v
|
|
125
|
+
): # for example, "1_8" just returns "1_8", otherwise int("1_8") returns 18
|
|
126
|
+
return v
|
|
127
|
+
else:
|
|
128
|
+
return int(v)
|
|
129
|
+
return v
|
|
130
|
+
|
|
131
|
+
|
|
110
132
|
class PromptStyleV1(BaseModel):
|
|
111
133
|
style_name: str
|
|
112
134
|
system_prompt: str = ""
|
|
@@ -226,7 +248,7 @@ class CustomLLMFamilyV1(LLMFamilyV1):
|
|
|
226
248
|
|
|
227
249
|
|
|
228
250
|
LLMSpecV1 = Annotated[
|
|
229
|
-
Union[GgmlLLMSpecV1, PytorchLLMSpecV1],
|
|
251
|
+
Union[GgmlLLMSpecV1, PytorchLLMSpecV1, MLXLLMSpecV1],
|
|
230
252
|
Field(discriminator="model_format"),
|
|
231
253
|
]
|
|
232
254
|
|
|
@@ -249,6 +271,8 @@ UD_LLM_FAMILIES_LOCK = Lock()
|
|
|
249
271
|
|
|
250
272
|
VLLM_CLASSES: List[Type[LLM]] = []
|
|
251
273
|
|
|
274
|
+
MLX_CLASSES: List[Type[LLM]] = []
|
|
275
|
+
|
|
252
276
|
LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
|
|
253
277
|
SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}
|
|
254
278
|
|
|
@@ -549,7 +573,7 @@ def _get_meta_path(
|
|
|
549
573
|
return os.path.join(cache_dir, "__valid_download")
|
|
550
574
|
else:
|
|
551
575
|
return os.path.join(cache_dir, f"__valid_download_{model_hub}")
|
|
552
|
-
elif model_format in ["ggmlv3", "ggufv2", "gptq", "awq"]:
|
|
576
|
+
elif model_format in ["ggmlv3", "ggufv2", "gptq", "awq", "mlx"]:
|
|
553
577
|
assert quantization is not None
|
|
554
578
|
if model_hub == "huggingface":
|
|
555
579
|
return os.path.join(cache_dir, f"__valid_download_{quantization}")
|
|
@@ -588,7 +612,7 @@ def _skip_download(
|
|
|
588
612
|
logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
|
|
589
613
|
return True
|
|
590
614
|
return False
|
|
591
|
-
elif model_format in ["ggmlv3", "ggufv2", "gptq", "awq"]:
|
|
615
|
+
elif model_format in ["ggmlv3", "ggufv2", "gptq", "awq", "mlx"]:
|
|
592
616
|
assert quantization is not None
|
|
593
617
|
return os.path.exists(
|
|
594
618
|
_get_meta_path(cache_dir, model_format, model_hub, quantization)
|
|
@@ -683,7 +707,7 @@ def cache_from_csghub(
|
|
|
683
707
|
):
|
|
684
708
|
return cache_dir
|
|
685
709
|
|
|
686
|
-
if llm_spec.model_format in ["pytorch", "gptq", "awq"]:
|
|
710
|
+
if llm_spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
|
|
687
711
|
download_dir = retry_download(
|
|
688
712
|
snapshot_download,
|
|
689
713
|
llm_family.model_name,
|
|
@@ -751,7 +775,7 @@ def cache_from_modelscope(
|
|
|
751
775
|
):
|
|
752
776
|
return cache_dir
|
|
753
777
|
|
|
754
|
-
if llm_spec.model_format in ["pytorch", "gptq", "awq"]:
|
|
778
|
+
if llm_spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
|
|
755
779
|
download_dir = retry_download(
|
|
756
780
|
snapshot_download,
|
|
757
781
|
llm_family.model_name,
|
|
@@ -820,8 +844,8 @@ def cache_from_huggingface(
|
|
|
820
844
|
if not IS_NEW_HUGGINGFACE_HUB:
|
|
821
845
|
use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
|
|
822
846
|
|
|
823
|
-
if llm_spec.model_format in ["pytorch", "gptq", "awq"]:
|
|
824
|
-
assert isinstance(llm_spec, PytorchLLMSpecV1)
|
|
847
|
+
if llm_spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
|
|
848
|
+
assert isinstance(llm_spec, (PytorchLLMSpecV1, MLXLLMSpecV1))
|
|
825
849
|
download_dir = retry_download(
|
|
826
850
|
huggingface_hub.snapshot_download,
|
|
827
851
|
llm_family.model_name,
|
|
@@ -910,7 +934,7 @@ def get_cache_status(
|
|
|
910
934
|
]
|
|
911
935
|
return any(revisions)
|
|
912
936
|
# just check meta file for ggml and gptq model
|
|
913
|
-
elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq", "awq"]:
|
|
937
|
+
elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq", "awq", "mlx"]:
|
|
914
938
|
ret = []
|
|
915
939
|
for q in llm_spec.quantizations:
|
|
916
940
|
assert q is not None
|
|
@@ -2921,6 +2921,33 @@
|
|
|
2921
2921
|
"model_id": "qwen/Qwen2-72B-Instruct-AWQ",
|
|
2922
2922
|
"model_hub": "modelscope"
|
|
2923
2923
|
},
|
|
2924
|
+
{
|
|
2925
|
+
"model_format": "mlx",
|
|
2926
|
+
"model_size_in_billions": "0_5",
|
|
2927
|
+
"quantizations": [
|
|
2928
|
+
"4-bit"
|
|
2929
|
+
],
|
|
2930
|
+
"model_id": "qwen/Qwen2-0.5B-Instruct-MLX",
|
|
2931
|
+
"model_hub": "modelscope"
|
|
2932
|
+
},
|
|
2933
|
+
{
|
|
2934
|
+
"model_format": "mlx",
|
|
2935
|
+
"model_size_in_billions": "1_5",
|
|
2936
|
+
"quantizations": [
|
|
2937
|
+
"4-bit"
|
|
2938
|
+
],
|
|
2939
|
+
"model_id": "qwen/Qwen2-1.5B-Instruct-MLX",
|
|
2940
|
+
"model_hub": "modelscope"
|
|
2941
|
+
},
|
|
2942
|
+
{
|
|
2943
|
+
"model_format": "mlx",
|
|
2944
|
+
"model_size_in_billions": 7,
|
|
2945
|
+
"quantizations": [
|
|
2946
|
+
"4-bit"
|
|
2947
|
+
],
|
|
2948
|
+
"model_id": "qwen/Qwen2-7B-Instruct-MLX",
|
|
2949
|
+
"model_hub": "modelscope"
|
|
2950
|
+
},
|
|
2924
2951
|
{
|
|
2925
2952
|
"model_format": "ggufv2",
|
|
2926
2953
|
"model_size_in_billions": "0_5",
|
|
@@ -2938,6 +2965,85 @@
|
|
|
2938
2965
|
"model_id": "qwen/Qwen2-0.5B-Instruct-GGUF",
|
|
2939
2966
|
"model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf",
|
|
2940
2967
|
"model_hub": "modelscope"
|
|
2968
|
+
},
|
|
2969
|
+
{
|
|
2970
|
+
"model_format": "ggufv2",
|
|
2971
|
+
"model_size_in_billions": "1_5",
|
|
2972
|
+
"quantizations": [
|
|
2973
|
+
"q2_k",
|
|
2974
|
+
"q3_k_m",
|
|
2975
|
+
"q4_0",
|
|
2976
|
+
"q4_k_m",
|
|
2977
|
+
"q5_0",
|
|
2978
|
+
"q5_k_m",
|
|
2979
|
+
"q6_k",
|
|
2980
|
+
"q8_0",
|
|
2981
|
+
"fp16"
|
|
2982
|
+
],
|
|
2983
|
+
"model_id": "qwen/Qwen2-1.5B-Instruct-GGUF",
|
|
2984
|
+
"model_file_name_template": "qwen2-1_5b-instruct-{quantization}.gguf",
|
|
2985
|
+
"model_hub": "modelscope"
|
|
2986
|
+
},
|
|
2987
|
+
{
|
|
2988
|
+
"model_format": "ggufv2",
|
|
2989
|
+
"model_size_in_billions": 7,
|
|
2990
|
+
"quantizations": [
|
|
2991
|
+
"q2_k",
|
|
2992
|
+
"q3_k_m",
|
|
2993
|
+
"q4_0",
|
|
2994
|
+
"q4_k_m",
|
|
2995
|
+
"q5_0",
|
|
2996
|
+
"q5_k_m",
|
|
2997
|
+
"q6_k",
|
|
2998
|
+
"q8_0",
|
|
2999
|
+
"fp16"
|
|
3000
|
+
],
|
|
3001
|
+
"model_id": "qwen/Qwen2-7B-Instruct-GGUF",
|
|
3002
|
+
"model_file_name_template": "qwen2-7b-instruct-{quantization}.gguf",
|
|
3003
|
+
"model_hub": "modelscope"
|
|
3004
|
+
},
|
|
3005
|
+
{
|
|
3006
|
+
"model_format": "ggufv2",
|
|
3007
|
+
"model_size_in_billions": 72,
|
|
3008
|
+
"quantizations": [
|
|
3009
|
+
"q2_k",
|
|
3010
|
+
"q3_k_m",
|
|
3011
|
+
"q4_0",
|
|
3012
|
+
"q4_k_m",
|
|
3013
|
+
"q5_0",
|
|
3014
|
+
"q5_k_m",
|
|
3015
|
+
"q6_k",
|
|
3016
|
+
"q8_0",
|
|
3017
|
+
"fp16"
|
|
3018
|
+
],
|
|
3019
|
+
"model_id": "qwen/Qwen2-72B-Instruct-GGUF",
|
|
3020
|
+
"model_hub": "modelscope",
|
|
3021
|
+
"model_file_name_template": "qwen2-72b-instruct-{quantization}.gguf",
|
|
3022
|
+
"model_file_name_split_template": "qwen2-72b-instruct-{quantization}-{part}.gguf",
|
|
3023
|
+
"quantization_parts": {
|
|
3024
|
+
"q5_0": [
|
|
3025
|
+
"00001-of-00002",
|
|
3026
|
+
"00002-of-00002"
|
|
3027
|
+
],
|
|
3028
|
+
"q5_k_m": [
|
|
3029
|
+
"00001-of-00002",
|
|
3030
|
+
"00002-of-00002"
|
|
3031
|
+
],
|
|
3032
|
+
"q6_k": [
|
|
3033
|
+
"00001-of-00002",
|
|
3034
|
+
"00002-of-00002"
|
|
3035
|
+
],
|
|
3036
|
+
"q8_0": [
|
|
3037
|
+
"00001-of-00002",
|
|
3038
|
+
"00002-of-00002"
|
|
3039
|
+
],
|
|
3040
|
+
"fp16": [
|
|
3041
|
+
"00001-of-00004",
|
|
3042
|
+
"00002-of-00004",
|
|
3043
|
+
"00003-of-00004",
|
|
3044
|
+
"00004-of-00004"
|
|
3045
|
+
]
|
|
3046
|
+
}
|
|
2941
3047
|
}
|
|
2942
3048
|
],
|
|
2943
3049
|
"prompt_style": {
|
|
@@ -2993,6 +3099,35 @@
|
|
|
2993
3099
|
],
|
|
2994
3100
|
"model_id": "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
|
|
2995
3101
|
"model_hub": "modelscope"
|
|
3102
|
+
},
|
|
3103
|
+
{
|
|
3104
|
+
"model_format": "ggufv2",
|
|
3105
|
+
"model_size_in_billions": 14,
|
|
3106
|
+
"quantizations": [
|
|
3107
|
+
"q3_k_m",
|
|
3108
|
+
"q4_0",
|
|
3109
|
+
"q4_k_m",
|
|
3110
|
+
"q5_0",
|
|
3111
|
+
"q5_k_m",
|
|
3112
|
+
"q6_k",
|
|
3113
|
+
"q8_0",
|
|
3114
|
+
"fp16"
|
|
3115
|
+
],
|
|
3116
|
+
"model_id": "qwen/Qwen2-57B-A14B-Instruct-GGUF",
|
|
3117
|
+
"model_hub": "modelscope",
|
|
3118
|
+
"model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
|
|
3119
|
+
"model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
|
|
3120
|
+
"quantization_parts": {
|
|
3121
|
+
"q8_0": [
|
|
3122
|
+
"00001-of-00002",
|
|
3123
|
+
"00002-of-00002"
|
|
3124
|
+
],
|
|
3125
|
+
"fp16": [
|
|
3126
|
+
"00001-of-00003",
|
|
3127
|
+
"00002-of-00003",
|
|
3128
|
+
"00003-of-00003"
|
|
3129
|
+
]
|
|
3130
|
+
}
|
|
2996
3131
|
}
|
|
2997
3132
|
],
|
|
2998
3133
|
"prompt_style": {
|
|
@@ -3402,6 +3537,16 @@
|
|
|
3402
3537
|
"roles": [
|
|
3403
3538
|
"user",
|
|
3404
3539
|
"assistant"
|
|
3540
|
+
],
|
|
3541
|
+
"stop_token_ids": [
|
|
3542
|
+
151643,
|
|
3543
|
+
151644,
|
|
3544
|
+
151645
|
|
3545
|
+
],
|
|
3546
|
+
"stop": [
|
|
3547
|
+
"<|endoftext|>",
|
|
3548
|
+
"<|im_start|>",
|
|
3549
|
+
"<|im_end|>"
|
|
3405
3550
|
]
|
|
3406
3551
|
}
|
|
3407
3552
|
},
|
|
@@ -3593,6 +3738,53 @@
|
|
|
3593
3738
|
]
|
|
3594
3739
|
}
|
|
3595
3740
|
},
|
|
3741
|
+
{
|
|
3742
|
+
"version": 1,
|
|
3743
|
+
"context_length": 8192,
|
|
3744
|
+
"model_name": "gemma-2-it",
|
|
3745
|
+
"model_lang": [
|
|
3746
|
+
"en"
|
|
3747
|
+
],
|
|
3748
|
+
"model_ability": [
|
|
3749
|
+
"chat"
|
|
3750
|
+
],
|
|
3751
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3752
|
+
"model_specs": [
|
|
3753
|
+
{
|
|
3754
|
+
"model_format": "pytorch",
|
|
3755
|
+
"model_size_in_billions": 9,
|
|
3756
|
+
"quantizations": [
|
|
3757
|
+
"none",
|
|
3758
|
+
"4-bit",
|
|
3759
|
+
"8-bit"
|
|
3760
|
+
],
|
|
3761
|
+
"model_id": "AI-ModelScope/gemma-2-9b-it",
|
|
3762
|
+
"model_hub": "modelscope"
|
|
3763
|
+
},
|
|
3764
|
+
{
|
|
3765
|
+
"model_format": "pytorch",
|
|
3766
|
+
"model_size_in_billions": 27,
|
|
3767
|
+
"quantizations": [
|
|
3768
|
+
"none",
|
|
3769
|
+
"4-bit",
|
|
3770
|
+
"8-bit"
|
|
3771
|
+
],
|
|
3772
|
+
"model_id": "AI-ModelScope/gemma-2-27b-it",
|
|
3773
|
+
"model_hub": "modelscope"
|
|
3774
|
+
}
|
|
3775
|
+
],
|
|
3776
|
+
"prompt_style": {
|
|
3777
|
+
"style_name": "gemma",
|
|
3778
|
+
"roles": [
|
|
3779
|
+
"user",
|
|
3780
|
+
"model"
|
|
3781
|
+
],
|
|
3782
|
+
"stop": [
|
|
3783
|
+
"<end_of_turn>",
|
|
3784
|
+
"<start_of_turn>"
|
|
3785
|
+
]
|
|
3786
|
+
}
|
|
3787
|
+
},
|
|
3596
3788
|
{
|
|
3597
3789
|
"version":1,
|
|
3598
3790
|
"context_length":2048,
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|