xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +56 -8
- xinference/client/restful/restful_client.py +49 -4
- xinference/core/model.py +36 -4
- xinference/core/scheduler.py +2 -0
- xinference/core/supervisor.py +132 -15
- xinference/core/worker.py +239 -53
- xinference/deploy/cmdline.py +5 -0
- xinference/deploy/utils.py +33 -2
- xinference/model/audio/chattts.py +6 -6
- xinference/model/audio/core.py +23 -15
- xinference/model/core.py +12 -3
- xinference/model/embedding/core.py +25 -16
- xinference/model/flexible/__init__.py +40 -0
- xinference/model/flexible/core.py +228 -0
- xinference/model/flexible/launchers/__init__.py +15 -0
- xinference/model/flexible/launchers/transformers_launcher.py +63 -0
- xinference/model/flexible/utils.py +33 -0
- xinference/model/image/core.py +18 -14
- xinference/model/image/custom.py +1 -1
- xinference/model/llm/__init__.py +5 -2
- xinference/model/llm/core.py +3 -2
- xinference/model/llm/ggml/llamacpp.py +1 -10
- xinference/model/llm/llm_family.json +292 -36
- xinference/model/llm/llm_family.py +102 -53
- xinference/model/llm/llm_family_modelscope.json +247 -27
- xinference/model/llm/mlx/__init__.py +13 -0
- xinference/model/llm/mlx/core.py +408 -0
- xinference/model/llm/pytorch/chatglm.py +2 -9
- xinference/model/llm/pytorch/cogvlm2.py +206 -21
- xinference/model/llm/pytorch/core.py +213 -120
- xinference/model/llm/pytorch/glm4v.py +171 -15
- xinference/model/llm/pytorch/qwen_vl.py +168 -7
- xinference/model/llm/pytorch/utils.py +53 -62
- xinference/model/llm/utils.py +28 -7
- xinference/model/rerank/core.py +29 -25
- xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
- xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
- xinference/types.py +0 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
- xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
- xinference/model/llm/ggml/chatglm.py +0 -457
- xinference/thirdparty/ChatTTS/__init__.py +0 -1
- xinference/thirdparty/ChatTTS/core.py +0 -200
- xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
- xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/infer/api.py +0 -125
- xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
- xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
- xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
- xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
- xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
- xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
- xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
- /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
|
@@ -304,21 +304,6 @@
|
|
|
304
304
|
],
|
|
305
305
|
"model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
306
306
|
"model_specs": [
|
|
307
|
-
{
|
|
308
|
-
"model_format": "ggmlv3",
|
|
309
|
-
"model_size_in_billions": 6,
|
|
310
|
-
"quantizations": [
|
|
311
|
-
"q4_0",
|
|
312
|
-
"q4_1",
|
|
313
|
-
"q5_0",
|
|
314
|
-
"q5_1",
|
|
315
|
-
"q8_0"
|
|
316
|
-
],
|
|
317
|
-
"model_hub": "modelscope",
|
|
318
|
-
"model_id": "Xorbits/chatglm2-6B-GGML",
|
|
319
|
-
"model_revision": "v1.0.0",
|
|
320
|
-
"model_file_name_template": "chatglm2-ggml-{quantization}.bin"
|
|
321
|
-
},
|
|
322
307
|
{
|
|
323
308
|
"model_format": "pytorch",
|
|
324
309
|
"model_size_in_billions": 6,
|
|
@@ -392,17 +377,6 @@
|
|
|
392
377
|
],
|
|
393
378
|
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
394
379
|
"model_specs": [
|
|
395
|
-
{
|
|
396
|
-
"model_format": "ggmlv3",
|
|
397
|
-
"model_size_in_billions": 6,
|
|
398
|
-
"quantizations": [
|
|
399
|
-
"q4_0"
|
|
400
|
-
],
|
|
401
|
-
"model_hub": "modelscope",
|
|
402
|
-
"model_id": "Xorbits/chatglm3-ggml",
|
|
403
|
-
"model_revision": "v1.0.0",
|
|
404
|
-
"model_file_name_template": "chatglm3-ggml-{quantization}.bin"
|
|
405
|
-
},
|
|
406
380
|
{
|
|
407
381
|
"model_format": "pytorch",
|
|
408
382
|
"model_size_in_billions": 6,
|
|
@@ -547,6 +521,33 @@
|
|
|
547
521
|
"model_hub": "modelscope",
|
|
548
522
|
"model_id": "ZhipuAI/glm-4-9b-chat",
|
|
549
523
|
"model_revision": "master"
|
|
524
|
+
},
|
|
525
|
+
{
|
|
526
|
+
"model_format": "ggufv2",
|
|
527
|
+
"model_size_in_billions": 9,
|
|
528
|
+
"quantizations": [
|
|
529
|
+
"Q2_K",
|
|
530
|
+
"IQ3_XS",
|
|
531
|
+
"IQ3_S",
|
|
532
|
+
"IQ3_M",
|
|
533
|
+
"Q3_K_S",
|
|
534
|
+
"Q3_K_L",
|
|
535
|
+
"Q3_K",
|
|
536
|
+
"IQ4_XS",
|
|
537
|
+
"IQ4_NL",
|
|
538
|
+
"Q4_K_S",
|
|
539
|
+
"Q4_K",
|
|
540
|
+
"Q5_K_S",
|
|
541
|
+
"Q5_K",
|
|
542
|
+
"Q6_K",
|
|
543
|
+
"Q8_0",
|
|
544
|
+
"BF16",
|
|
545
|
+
"FP16"
|
|
546
|
+
],
|
|
547
|
+
"model_file_name_template": "glm-4-9b-chat.{quantization}.gguf",
|
|
548
|
+
"model_hub": "modelscope",
|
|
549
|
+
"model_id": "LLM-Research/glm-4-9b-chat-GGUF",
|
|
550
|
+
"model_revision": "master"
|
|
550
551
|
}
|
|
551
552
|
],
|
|
552
553
|
"prompt_style": {
|
|
@@ -593,6 +594,33 @@
|
|
|
593
594
|
"model_hub": "modelscope",
|
|
594
595
|
"model_id": "ZhipuAI/glm-4-9b-chat-1m",
|
|
595
596
|
"model_revision": "master"
|
|
597
|
+
},
|
|
598
|
+
{
|
|
599
|
+
"model_format": "ggufv2",
|
|
600
|
+
"model_size_in_billions": 9,
|
|
601
|
+
"quantizations": [
|
|
602
|
+
"Q2_K",
|
|
603
|
+
"IQ3_XS",
|
|
604
|
+
"IQ3_S",
|
|
605
|
+
"IQ3_M",
|
|
606
|
+
"Q3_K_S",
|
|
607
|
+
"Q3_K_L",
|
|
608
|
+
"Q3_K",
|
|
609
|
+
"IQ4_XS",
|
|
610
|
+
"IQ4_NL",
|
|
611
|
+
"Q4_K_S",
|
|
612
|
+
"Q4_K",
|
|
613
|
+
"Q5_K_S",
|
|
614
|
+
"Q5_K",
|
|
615
|
+
"Q6_K",
|
|
616
|
+
"Q8_0",
|
|
617
|
+
"BF16",
|
|
618
|
+
"FP16"
|
|
619
|
+
],
|
|
620
|
+
"model_file_name_template": "glm-4-9b-chat-1m.{quantization}.gguf",
|
|
621
|
+
"model_hub": "modelscope",
|
|
622
|
+
"model_id": "LLM-Research/glm-4-9b-chat-1m-GGUF",
|
|
623
|
+
"model_revision": "master"
|
|
596
624
|
}
|
|
597
625
|
],
|
|
598
626
|
"prompt_style": {
|
|
@@ -2921,6 +2949,33 @@
|
|
|
2921
2949
|
"model_id": "qwen/Qwen2-72B-Instruct-AWQ",
|
|
2922
2950
|
"model_hub": "modelscope"
|
|
2923
2951
|
},
|
|
2952
|
+
{
|
|
2953
|
+
"model_format": "mlx",
|
|
2954
|
+
"model_size_in_billions": "0_5",
|
|
2955
|
+
"quantizations": [
|
|
2956
|
+
"4-bit"
|
|
2957
|
+
],
|
|
2958
|
+
"model_id": "qwen/Qwen2-0.5B-Instruct-MLX",
|
|
2959
|
+
"model_hub": "modelscope"
|
|
2960
|
+
},
|
|
2961
|
+
{
|
|
2962
|
+
"model_format": "mlx",
|
|
2963
|
+
"model_size_in_billions": "1_5",
|
|
2964
|
+
"quantizations": [
|
|
2965
|
+
"4-bit"
|
|
2966
|
+
],
|
|
2967
|
+
"model_id": "qwen/Qwen2-1.5B-Instruct-MLX",
|
|
2968
|
+
"model_hub": "modelscope"
|
|
2969
|
+
},
|
|
2970
|
+
{
|
|
2971
|
+
"model_format": "mlx",
|
|
2972
|
+
"model_size_in_billions": 7,
|
|
2973
|
+
"quantizations": [
|
|
2974
|
+
"4-bit"
|
|
2975
|
+
],
|
|
2976
|
+
"model_id": "qwen/Qwen2-7B-Instruct-MLX",
|
|
2977
|
+
"model_hub": "modelscope"
|
|
2978
|
+
},
|
|
2924
2979
|
{
|
|
2925
2980
|
"model_format": "ggufv2",
|
|
2926
2981
|
"model_size_in_billions": "0_5",
|
|
@@ -2938,6 +2993,85 @@
|
|
|
2938
2993
|
"model_id": "qwen/Qwen2-0.5B-Instruct-GGUF",
|
|
2939
2994
|
"model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf",
|
|
2940
2995
|
"model_hub": "modelscope"
|
|
2996
|
+
},
|
|
2997
|
+
{
|
|
2998
|
+
"model_format": "ggufv2",
|
|
2999
|
+
"model_size_in_billions": "1_5",
|
|
3000
|
+
"quantizations": [
|
|
3001
|
+
"q2_k",
|
|
3002
|
+
"q3_k_m",
|
|
3003
|
+
"q4_0",
|
|
3004
|
+
"q4_k_m",
|
|
3005
|
+
"q5_0",
|
|
3006
|
+
"q5_k_m",
|
|
3007
|
+
"q6_k",
|
|
3008
|
+
"q8_0",
|
|
3009
|
+
"fp16"
|
|
3010
|
+
],
|
|
3011
|
+
"model_id": "qwen/Qwen2-1.5B-Instruct-GGUF",
|
|
3012
|
+
"model_file_name_template": "qwen2-1_5b-instruct-{quantization}.gguf",
|
|
3013
|
+
"model_hub": "modelscope"
|
|
3014
|
+
},
|
|
3015
|
+
{
|
|
3016
|
+
"model_format": "ggufv2",
|
|
3017
|
+
"model_size_in_billions": 7,
|
|
3018
|
+
"quantizations": [
|
|
3019
|
+
"q2_k",
|
|
3020
|
+
"q3_k_m",
|
|
3021
|
+
"q4_0",
|
|
3022
|
+
"q4_k_m",
|
|
3023
|
+
"q5_0",
|
|
3024
|
+
"q5_k_m",
|
|
3025
|
+
"q6_k",
|
|
3026
|
+
"q8_0",
|
|
3027
|
+
"fp16"
|
|
3028
|
+
],
|
|
3029
|
+
"model_id": "qwen/Qwen2-7B-Instruct-GGUF",
|
|
3030
|
+
"model_file_name_template": "qwen2-7b-instruct-{quantization}.gguf",
|
|
3031
|
+
"model_hub": "modelscope"
|
|
3032
|
+
},
|
|
3033
|
+
{
|
|
3034
|
+
"model_format": "ggufv2",
|
|
3035
|
+
"model_size_in_billions": 72,
|
|
3036
|
+
"quantizations": [
|
|
3037
|
+
"q2_k",
|
|
3038
|
+
"q3_k_m",
|
|
3039
|
+
"q4_0",
|
|
3040
|
+
"q4_k_m",
|
|
3041
|
+
"q5_0",
|
|
3042
|
+
"q5_k_m",
|
|
3043
|
+
"q6_k",
|
|
3044
|
+
"q8_0",
|
|
3045
|
+
"fp16"
|
|
3046
|
+
],
|
|
3047
|
+
"model_id": "qwen/Qwen2-72B-Instruct-GGUF",
|
|
3048
|
+
"model_hub": "modelscope",
|
|
3049
|
+
"model_file_name_template": "qwen2-72b-instruct-{quantization}.gguf",
|
|
3050
|
+
"model_file_name_split_template": "qwen2-72b-instruct-{quantization}-{part}.gguf",
|
|
3051
|
+
"quantization_parts": {
|
|
3052
|
+
"q5_0": [
|
|
3053
|
+
"00001-of-00002",
|
|
3054
|
+
"00002-of-00002"
|
|
3055
|
+
],
|
|
3056
|
+
"q5_k_m": [
|
|
3057
|
+
"00001-of-00002",
|
|
3058
|
+
"00002-of-00002"
|
|
3059
|
+
],
|
|
3060
|
+
"q6_k": [
|
|
3061
|
+
"00001-of-00002",
|
|
3062
|
+
"00002-of-00002"
|
|
3063
|
+
],
|
|
3064
|
+
"q8_0": [
|
|
3065
|
+
"00001-of-00002",
|
|
3066
|
+
"00002-of-00002"
|
|
3067
|
+
],
|
|
3068
|
+
"fp16": [
|
|
3069
|
+
"00001-of-00004",
|
|
3070
|
+
"00002-of-00004",
|
|
3071
|
+
"00003-of-00004",
|
|
3072
|
+
"00004-of-00004"
|
|
3073
|
+
]
|
|
3074
|
+
}
|
|
2941
3075
|
}
|
|
2942
3076
|
],
|
|
2943
3077
|
"prompt_style": {
|
|
@@ -2993,6 +3127,35 @@
|
|
|
2993
3127
|
],
|
|
2994
3128
|
"model_id": "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
|
|
2995
3129
|
"model_hub": "modelscope"
|
|
3130
|
+
},
|
|
3131
|
+
{
|
|
3132
|
+
"model_format": "ggufv2",
|
|
3133
|
+
"model_size_in_billions": 14,
|
|
3134
|
+
"quantizations": [
|
|
3135
|
+
"q3_k_m",
|
|
3136
|
+
"q4_0",
|
|
3137
|
+
"q4_k_m",
|
|
3138
|
+
"q5_0",
|
|
3139
|
+
"q5_k_m",
|
|
3140
|
+
"q6_k",
|
|
3141
|
+
"q8_0",
|
|
3142
|
+
"fp16"
|
|
3143
|
+
],
|
|
3144
|
+
"model_id": "qwen/Qwen2-57B-A14B-Instruct-GGUF",
|
|
3145
|
+
"model_hub": "modelscope",
|
|
3146
|
+
"model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
|
|
3147
|
+
"model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
|
|
3148
|
+
"quantization_parts": {
|
|
3149
|
+
"q8_0": [
|
|
3150
|
+
"00001-of-00002",
|
|
3151
|
+
"00002-of-00002"
|
|
3152
|
+
],
|
|
3153
|
+
"fp16": [
|
|
3154
|
+
"00001-of-00003",
|
|
3155
|
+
"00002-of-00003",
|
|
3156
|
+
"00003-of-00003"
|
|
3157
|
+
]
|
|
3158
|
+
}
|
|
2996
3159
|
}
|
|
2997
3160
|
],
|
|
2998
3161
|
"prompt_style": {
|
|
@@ -3402,6 +3565,16 @@
|
|
|
3402
3565
|
"roles": [
|
|
3403
3566
|
"user",
|
|
3404
3567
|
"assistant"
|
|
3568
|
+
],
|
|
3569
|
+
"stop_token_ids": [
|
|
3570
|
+
151643,
|
|
3571
|
+
151644,
|
|
3572
|
+
151645
|
|
3573
|
+
],
|
|
3574
|
+
"stop": [
|
|
3575
|
+
"<|endoftext|>",
|
|
3576
|
+
"<|im_start|>",
|
|
3577
|
+
"<|im_end|>"
|
|
3405
3578
|
]
|
|
3406
3579
|
}
|
|
3407
3580
|
},
|
|
@@ -3593,6 +3766,53 @@
|
|
|
3593
3766
|
]
|
|
3594
3767
|
}
|
|
3595
3768
|
},
|
|
3769
|
+
{
|
|
3770
|
+
"version": 1,
|
|
3771
|
+
"context_length": 8192,
|
|
3772
|
+
"model_name": "gemma-2-it",
|
|
3773
|
+
"model_lang": [
|
|
3774
|
+
"en"
|
|
3775
|
+
],
|
|
3776
|
+
"model_ability": [
|
|
3777
|
+
"chat"
|
|
3778
|
+
],
|
|
3779
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3780
|
+
"model_specs": [
|
|
3781
|
+
{
|
|
3782
|
+
"model_format": "pytorch",
|
|
3783
|
+
"model_size_in_billions": 9,
|
|
3784
|
+
"quantizations": [
|
|
3785
|
+
"none",
|
|
3786
|
+
"4-bit",
|
|
3787
|
+
"8-bit"
|
|
3788
|
+
],
|
|
3789
|
+
"model_id": "AI-ModelScope/gemma-2-9b-it",
|
|
3790
|
+
"model_hub": "modelscope"
|
|
3791
|
+
},
|
|
3792
|
+
{
|
|
3793
|
+
"model_format": "pytorch",
|
|
3794
|
+
"model_size_in_billions": 27,
|
|
3795
|
+
"quantizations": [
|
|
3796
|
+
"none",
|
|
3797
|
+
"4-bit",
|
|
3798
|
+
"8-bit"
|
|
3799
|
+
],
|
|
3800
|
+
"model_id": "AI-ModelScope/gemma-2-27b-it",
|
|
3801
|
+
"model_hub": "modelscope"
|
|
3802
|
+
}
|
|
3803
|
+
],
|
|
3804
|
+
"prompt_style": {
|
|
3805
|
+
"style_name": "gemma",
|
|
3806
|
+
"roles": [
|
|
3807
|
+
"user",
|
|
3808
|
+
"model"
|
|
3809
|
+
],
|
|
3810
|
+
"stop": [
|
|
3811
|
+
"<end_of_turn>",
|
|
3812
|
+
"<start_of_turn>"
|
|
3813
|
+
]
|
|
3814
|
+
}
|
|
3815
|
+
},
|
|
3596
3816
|
{
|
|
3597
3817
|
"version":1,
|
|
3598
3818
|
"context_length":2048,
|
|
@@ -3923,7 +4143,7 @@
|
|
|
3923
4143
|
"zh"
|
|
3924
4144
|
],
|
|
3925
4145
|
"model_ability": [
|
|
3926
|
-
"
|
|
4146
|
+
"chat"
|
|
3927
4147
|
],
|
|
3928
4148
|
"model_description": "Aquila2-chat series models are the chat models",
|
|
3929
4149
|
"model_specs": [
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|