xinference 0.11.0__py3-none-any.whl → 0.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +30 -0
- xinference/client/restful/restful_client.py +29 -0
- xinference/core/cache_tracker.py +12 -1
- xinference/core/chat_interface.py +10 -4
- xinference/core/model.py +2 -2
- xinference/core/supervisor.py +30 -2
- xinference/core/utils.py +12 -0
- xinference/core/worker.py +4 -1
- xinference/deploy/cmdline.py +126 -0
- xinference/deploy/test/test_cmdline.py +24 -0
- xinference/fields.py +3 -1
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/ggml/chatglm.py +98 -13
- xinference/model/llm/ggml/llamacpp.py +49 -2
- xinference/model/llm/llm_family.json +633 -9
- xinference/model/llm/llm_family.py +84 -10
- xinference/model/llm/llm_family_modelscope.json +337 -10
- xinference/model/llm/memory.py +332 -0
- xinference/model/llm/pytorch/chatglm.py +48 -0
- xinference/model/llm/pytorch/core.py +25 -6
- xinference/model/llm/pytorch/deepseek_vl.py +35 -9
- xinference/model/llm/pytorch/intern_vl.py +387 -0
- xinference/model/llm/pytorch/internlm2.py +32 -1
- xinference/model/llm/pytorch/qwen_vl.py +38 -11
- xinference/model/llm/pytorch/utils.py +38 -1
- xinference/model/llm/pytorch/yi_vl.py +42 -14
- xinference/model/llm/sglang/core.py +31 -9
- xinference/model/llm/utils.py +38 -5
- xinference/model/llm/vllm/core.py +87 -5
- xinference/model/rerank/core.py +23 -1
- xinference/model/utils.py +17 -7
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +1 -1
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +2 -2
- xinference/thirdparty/llava/mm_utils.py +3 -2
- xinference/thirdparty/llava/model/llava_arch.py +1 -1
- xinference/thirdparty/omnilmm/chat.py +6 -5
- xinference/types.py +10 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.8e44da4b.js → main.551aa479.js} +3 -3
- xinference/web/ui/build/static/js/main.551aa479.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1fa824d82b2af519de7700c594e50bde4bbca60d13bd3fabff576802e4070304.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/23caf6f1e52c43e983ca3bfd4189f41dbd645fa78f2dfdcd7f6b69bc41678665.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6da6bc3d0d2191adebee87fb58ecebe82d071087bd2f7f3a9c7fdd2ada130f2.json +1 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/METADATA +10 -8
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/RECORD +52 -50
- xinference/web/ui/build/static/js/main.8e44da4b.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ddaec68b88e5eff792df1e39a4b4b8b737bfc832293c015660c3c69334e3cf5c.json +0 -1
- /xinference/web/ui/build/static/js/{main.8e44da4b.js.LICENSE.txt → main.551aa479.js.LICENSE.txt} +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/LICENSE +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/WHEEL +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/top_level.txt +0 -0
|
@@ -34,6 +34,8 @@ from ..._compat import (
|
|
|
34
34
|
)
|
|
35
35
|
from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
|
|
36
36
|
from ..utils import (
|
|
37
|
+
IS_NEW_HUGGINGFACE_HUB,
|
|
38
|
+
create_symlink,
|
|
37
39
|
download_from_modelscope,
|
|
38
40
|
is_valid_model_uri,
|
|
39
41
|
parse_uri,
|
|
@@ -447,6 +449,61 @@ def cache_from_uri(
|
|
|
447
449
|
raise ValueError(f"Unsupported URL scheme: {src_scheme}")
|
|
448
450
|
|
|
449
451
|
|
|
452
|
+
def cache_model_config(
|
|
453
|
+
llm_family: LLMFamilyV1,
|
|
454
|
+
llm_spec: "LLMSpecV1",
|
|
455
|
+
):
|
|
456
|
+
"""Download model config.json into cache_dir,
|
|
457
|
+
returns local filepath
|
|
458
|
+
"""
|
|
459
|
+
cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec)
|
|
460
|
+
config_file = os.path.join(cache_dir, "config.json")
|
|
461
|
+
if not os.path.islink(config_file) and not os.path.exists(config_file):
|
|
462
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
463
|
+
if llm_spec.model_hub == "huggingface":
|
|
464
|
+
from huggingface_hub import hf_hub_download
|
|
465
|
+
|
|
466
|
+
hf_hub_download(
|
|
467
|
+
repo_id=llm_spec.model_id, filename="config.json", local_dir=cache_dir
|
|
468
|
+
)
|
|
469
|
+
else:
|
|
470
|
+
from modelscope.hub.file_download import model_file_download
|
|
471
|
+
|
|
472
|
+
download_path = model_file_download(
|
|
473
|
+
model_id=llm_spec.model_id, file_path="config.json"
|
|
474
|
+
)
|
|
475
|
+
os.symlink(download_path, config_file)
|
|
476
|
+
return config_file
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _get_cache_dir_for_model_mem(
|
|
480
|
+
llm_family: LLMFamilyV1,
|
|
481
|
+
llm_spec: "LLMSpecV1",
|
|
482
|
+
create_if_not_exist=True,
|
|
483
|
+
):
|
|
484
|
+
"""
|
|
485
|
+
For cal-model-mem only. (might called from supervisor / cli)
|
|
486
|
+
Temporary use separate dir from worker's cache_dir, due to issue of different style of symlink.
|
|
487
|
+
"""
|
|
488
|
+
quant_suffix = ""
|
|
489
|
+
for q in llm_spec.quantizations:
|
|
490
|
+
if llm_spec.model_id and q in llm_spec.model_id:
|
|
491
|
+
quant_suffix = q
|
|
492
|
+
break
|
|
493
|
+
cache_dir_name = (
|
|
494
|
+
f"{llm_family.model_name}-{llm_spec.model_format}"
|
|
495
|
+
f"-{llm_spec.model_size_in_billions}b"
|
|
496
|
+
)
|
|
497
|
+
if quant_suffix:
|
|
498
|
+
cache_dir_name += f"-{quant_suffix}"
|
|
499
|
+
cache_dir = os.path.realpath(
|
|
500
|
+
os.path.join(XINFERENCE_CACHE_DIR, "model_mem", cache_dir_name)
|
|
501
|
+
)
|
|
502
|
+
if create_if_not_exist and not os.path.exists(cache_dir):
|
|
503
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
504
|
+
return cache_dir
|
|
505
|
+
|
|
506
|
+
|
|
450
507
|
def _get_cache_dir(
|
|
451
508
|
llm_family: LLMFamilyV1,
|
|
452
509
|
llm_spec: "LLMSpecV1",
|
|
@@ -625,10 +682,7 @@ def cache_from_modelscope(
|
|
|
625
682
|
llm_spec.model_id,
|
|
626
683
|
revision=llm_spec.model_revision,
|
|
627
684
|
)
|
|
628
|
-
|
|
629
|
-
for file in files:
|
|
630
|
-
relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
|
|
631
|
-
symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
|
|
685
|
+
create_symlink(download_dir, cache_dir)
|
|
632
686
|
|
|
633
687
|
elif llm_spec.model_format in ["ggmlv3", "ggufv2"]:
|
|
634
688
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
@@ -682,9 +736,13 @@ def cache_from_huggingface(
|
|
|
682
736
|
):
|
|
683
737
|
return cache_dir
|
|
684
738
|
|
|
739
|
+
use_symlinks = {}
|
|
740
|
+
if not IS_NEW_HUGGINGFACE_HUB:
|
|
741
|
+
use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
|
|
742
|
+
|
|
685
743
|
if llm_spec.model_format in ["pytorch", "gptq", "awq"]:
|
|
686
744
|
assert isinstance(llm_spec, PytorchLLMSpecV1)
|
|
687
|
-
retry_download(
|
|
745
|
+
download_dir = retry_download(
|
|
688
746
|
huggingface_hub.snapshot_download,
|
|
689
747
|
llm_family.model_name,
|
|
690
748
|
{
|
|
@@ -693,9 +751,10 @@ def cache_from_huggingface(
|
|
|
693
751
|
},
|
|
694
752
|
llm_spec.model_id,
|
|
695
753
|
revision=llm_spec.model_revision,
|
|
696
|
-
|
|
697
|
-
local_dir_use_symlinks=True,
|
|
754
|
+
**use_symlinks,
|
|
698
755
|
)
|
|
756
|
+
if IS_NEW_HUGGINGFACE_HUB:
|
|
757
|
+
create_symlink(download_dir, cache_dir)
|
|
699
758
|
|
|
700
759
|
elif llm_spec.model_format in ["ggmlv3", "ggufv2"]:
|
|
701
760
|
assert isinstance(llm_spec, GgmlLLMSpecV1)
|
|
@@ -704,7 +763,7 @@ def cache_from_huggingface(
|
|
|
704
763
|
)
|
|
705
764
|
|
|
706
765
|
for file_name in file_names:
|
|
707
|
-
retry_download(
|
|
766
|
+
download_file_path = retry_download(
|
|
708
767
|
huggingface_hub.hf_hub_download,
|
|
709
768
|
llm_family.model_name,
|
|
710
769
|
{
|
|
@@ -714,9 +773,10 @@ def cache_from_huggingface(
|
|
|
714
773
|
llm_spec.model_id,
|
|
715
774
|
revision=llm_spec.model_revision,
|
|
716
775
|
filename=file_name,
|
|
717
|
-
|
|
718
|
-
local_dir_use_symlinks=True,
|
|
776
|
+
**use_symlinks,
|
|
719
777
|
)
|
|
778
|
+
if IS_NEW_HUGGINGFACE_HUB:
|
|
779
|
+
symlink_local_file(download_file_path, cache_dir, file_name)
|
|
720
780
|
|
|
721
781
|
if need_merge:
|
|
722
782
|
_merge_cached_files(cache_dir, file_names, final_file_name)
|
|
@@ -823,6 +883,20 @@ def match_model_size(
|
|
|
823
883
|
return False
|
|
824
884
|
|
|
825
885
|
|
|
886
|
+
def convert_model_size_to_float(
|
|
887
|
+
model_size_in_billions: Union[float, int, str]
|
|
888
|
+
) -> float:
|
|
889
|
+
if isinstance(model_size_in_billions, str):
|
|
890
|
+
if "_" in model_size_in_billions:
|
|
891
|
+
ms = model_size_in_billions.replace("_", ".")
|
|
892
|
+
return float(ms)
|
|
893
|
+
elif "." in model_size_in_billions:
|
|
894
|
+
return float(model_size_in_billions)
|
|
895
|
+
else:
|
|
896
|
+
return int(model_size_in_billions)
|
|
897
|
+
return model_size_in_billions
|
|
898
|
+
|
|
899
|
+
|
|
826
900
|
def match_llm(
|
|
827
901
|
model_name: str,
|
|
828
902
|
model_format: Optional[str] = None,
|
|
@@ -1289,7 +1289,7 @@
|
|
|
1289
1289
|
},
|
|
1290
1290
|
{
|
|
1291
1291
|
"version": 1,
|
|
1292
|
-
"context_length":
|
|
1292
|
+
"context_length": 262144,
|
|
1293
1293
|
"model_name": "Yi-200k",
|
|
1294
1294
|
"model_lang": [
|
|
1295
1295
|
"en",
|
|
@@ -1328,7 +1328,7 @@
|
|
|
1328
1328
|
},
|
|
1329
1329
|
{
|
|
1330
1330
|
"version": 1,
|
|
1331
|
-
"context_length":
|
|
1331
|
+
"context_length": 4096,
|
|
1332
1332
|
"model_name": "Yi-chat",
|
|
1333
1333
|
"model_lang": [
|
|
1334
1334
|
"en",
|
|
@@ -1349,6 +1349,18 @@
|
|
|
1349
1349
|
"model_id": "01ai/Yi-34B-Chat-{quantization}",
|
|
1350
1350
|
"model_revision": "master"
|
|
1351
1351
|
},
|
|
1352
|
+
{
|
|
1353
|
+
"model_format": "pytorch",
|
|
1354
|
+
"model_size_in_billions": 6,
|
|
1355
|
+
"quantizations": [
|
|
1356
|
+
"4-bit",
|
|
1357
|
+
"8-bit",
|
|
1358
|
+
"none"
|
|
1359
|
+
],
|
|
1360
|
+
"model_hub": "modelscope",
|
|
1361
|
+
"model_id": "01ai/Yi-6B-Chat",
|
|
1362
|
+
"model_revision": "master"
|
|
1363
|
+
},
|
|
1352
1364
|
{
|
|
1353
1365
|
"model_format": "pytorch",
|
|
1354
1366
|
"model_size_in_billions": 34,
|
|
@@ -1385,6 +1397,130 @@
|
|
|
1385
1397
|
]
|
|
1386
1398
|
}
|
|
1387
1399
|
},
|
|
1400
|
+
{
|
|
1401
|
+
"version": 1,
|
|
1402
|
+
"context_length": 4096,
|
|
1403
|
+
"model_name": "Yi-1.5",
|
|
1404
|
+
"model_lang": [
|
|
1405
|
+
"en",
|
|
1406
|
+
"zh"
|
|
1407
|
+
],
|
|
1408
|
+
"model_ability": [
|
|
1409
|
+
"generate"
|
|
1410
|
+
],
|
|
1411
|
+
"model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
|
|
1412
|
+
"model_specs": [
|
|
1413
|
+
{
|
|
1414
|
+
"model_format": "pytorch",
|
|
1415
|
+
"model_size_in_billions": 6,
|
|
1416
|
+
"quantizations": [
|
|
1417
|
+
"4-bit",
|
|
1418
|
+
"8-bit",
|
|
1419
|
+
"none"
|
|
1420
|
+
],
|
|
1421
|
+
"model_hub": "modelscope",
|
|
1422
|
+
"model_id": "01ai/Yi-1.5-6B",
|
|
1423
|
+
"model_revision": "master"
|
|
1424
|
+
},
|
|
1425
|
+
{
|
|
1426
|
+
"model_format": "pytorch",
|
|
1427
|
+
"model_size_in_billions": 9,
|
|
1428
|
+
"quantizations": [
|
|
1429
|
+
"4-bit",
|
|
1430
|
+
"8-bit",
|
|
1431
|
+
"none"
|
|
1432
|
+
],
|
|
1433
|
+
"model_hub": "modelscope",
|
|
1434
|
+
"model_id": "01ai/Yi-1.5-9B",
|
|
1435
|
+
"model_revision": "master"
|
|
1436
|
+
},
|
|
1437
|
+
{
|
|
1438
|
+
"model_format": "pytorch",
|
|
1439
|
+
"model_size_in_billions": 34,
|
|
1440
|
+
"quantizations": [
|
|
1441
|
+
"4-bit",
|
|
1442
|
+
"8-bit",
|
|
1443
|
+
"none"
|
|
1444
|
+
],
|
|
1445
|
+
"model_hub": "modelscope",
|
|
1446
|
+
"model_id": "01ai/Yi-1.5-34B",
|
|
1447
|
+
"model_revision": "master"
|
|
1448
|
+
}
|
|
1449
|
+
]
|
|
1450
|
+
},
|
|
1451
|
+
{
|
|
1452
|
+
"version": 1,
|
|
1453
|
+
"context_length": 4096,
|
|
1454
|
+
"model_name": "Yi-1.5-chat",
|
|
1455
|
+
"model_lang": [
|
|
1456
|
+
"en",
|
|
1457
|
+
"zh"
|
|
1458
|
+
],
|
|
1459
|
+
"model_ability": [
|
|
1460
|
+
"chat"
|
|
1461
|
+
],
|
|
1462
|
+
"model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
|
|
1463
|
+
"model_specs": [
|
|
1464
|
+
{
|
|
1465
|
+
"model_format": "pytorch",
|
|
1466
|
+
"model_size_in_billions": 6,
|
|
1467
|
+
"quantizations": [
|
|
1468
|
+
"4-bit",
|
|
1469
|
+
"8-bit",
|
|
1470
|
+
"none"
|
|
1471
|
+
],
|
|
1472
|
+
"model_hub": "modelscope",
|
|
1473
|
+
"model_id": "01ai/Yi-1.5-6B-Chat",
|
|
1474
|
+
"model_revision": "master"
|
|
1475
|
+
},
|
|
1476
|
+
{
|
|
1477
|
+
"model_format": "pytorch",
|
|
1478
|
+
"model_size_in_billions": 9,
|
|
1479
|
+
"quantizations": [
|
|
1480
|
+
"4-bit",
|
|
1481
|
+
"8-bit",
|
|
1482
|
+
"none"
|
|
1483
|
+
],
|
|
1484
|
+
"model_hub": "modelscope",
|
|
1485
|
+
"model_id": "01ai/Yi-1.5-9B-Chat",
|
|
1486
|
+
"model_revision": "master"
|
|
1487
|
+
},
|
|
1488
|
+
{
|
|
1489
|
+
"model_format": "pytorch",
|
|
1490
|
+
"model_size_in_billions": 34,
|
|
1491
|
+
"quantizations": [
|
|
1492
|
+
"4-bit",
|
|
1493
|
+
"8-bit",
|
|
1494
|
+
"none"
|
|
1495
|
+
],
|
|
1496
|
+
"model_hub": "modelscope",
|
|
1497
|
+
"model_id": "01ai/Yi-1.5-34B-Chat",
|
|
1498
|
+
"model_revision": "master"
|
|
1499
|
+
}
|
|
1500
|
+
],
|
|
1501
|
+
"prompt_style": {
|
|
1502
|
+
"style_name": "CHATML",
|
|
1503
|
+
"system_prompt": "",
|
|
1504
|
+
"roles": [
|
|
1505
|
+
"<|im_start|>user",
|
|
1506
|
+
"<|im_start|>assistant"
|
|
1507
|
+
],
|
|
1508
|
+
"intra_message_sep": "<|im_end|>",
|
|
1509
|
+
"inter_message_sep": "",
|
|
1510
|
+
"stop_token_ids": [
|
|
1511
|
+
2,
|
|
1512
|
+
6,
|
|
1513
|
+
7,
|
|
1514
|
+
8
|
|
1515
|
+
],
|
|
1516
|
+
"stop": [
|
|
1517
|
+
"<|endoftext|>",
|
|
1518
|
+
"<|im_start|>",
|
|
1519
|
+
"<|im_end|>",
|
|
1520
|
+
"<|im_sep|>"
|
|
1521
|
+
]
|
|
1522
|
+
}
|
|
1523
|
+
},
|
|
1388
1524
|
{
|
|
1389
1525
|
"version": 1,
|
|
1390
1526
|
"context_length": 2048,
|
|
@@ -2294,6 +2430,32 @@
|
|
|
2294
2430
|
]
|
|
2295
2431
|
}
|
|
2296
2432
|
},
|
|
2433
|
+
{
|
|
2434
|
+
"version": 1,
|
|
2435
|
+
"context_length": 65536,
|
|
2436
|
+
"model_name": "codeqwen1.5",
|
|
2437
|
+
"model_lang": [
|
|
2438
|
+
"en",
|
|
2439
|
+
"zh"
|
|
2440
|
+
],
|
|
2441
|
+
"model_ability": [
|
|
2442
|
+
"generate"
|
|
2443
|
+
],
|
|
2444
|
+
"model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
|
|
2445
|
+
"model_specs": [
|
|
2446
|
+
{
|
|
2447
|
+
"model_format": "pytorch",
|
|
2448
|
+
"model_size_in_billions": 7,
|
|
2449
|
+
"quantizations": [
|
|
2450
|
+
"4-bit",
|
|
2451
|
+
"8-bit",
|
|
2452
|
+
"none"
|
|
2453
|
+
],
|
|
2454
|
+
"model_id": "qwen/CodeQwen1.5-7B",
|
|
2455
|
+
"model_hub": "modelscope"
|
|
2456
|
+
}
|
|
2457
|
+
]
|
|
2458
|
+
},
|
|
2297
2459
|
{
|
|
2298
2460
|
"version": 1,
|
|
2299
2461
|
"context_length": 65536,
|
|
@@ -2412,6 +2574,43 @@
|
|
|
2412
2574
|
]
|
|
2413
2575
|
}
|
|
2414
2576
|
},
|
|
2577
|
+
{
|
|
2578
|
+
"version": 1,
|
|
2579
|
+
"context_length": 4096,
|
|
2580
|
+
"model_name": "deepseek",
|
|
2581
|
+
"model_lang": [
|
|
2582
|
+
"en",
|
|
2583
|
+
"zh"
|
|
2584
|
+
],
|
|
2585
|
+
"model_ability": [
|
|
2586
|
+
"generate"
|
|
2587
|
+
],
|
|
2588
|
+
"model_description": "DDeepSeek LLM, trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese. ",
|
|
2589
|
+
"model_specs": [
|
|
2590
|
+
{
|
|
2591
|
+
"model_format": "pytorch",
|
|
2592
|
+
"model_size_in_billions": 7,
|
|
2593
|
+
"quantizations": [
|
|
2594
|
+
"4-bit",
|
|
2595
|
+
"8-bit",
|
|
2596
|
+
"none"
|
|
2597
|
+
],
|
|
2598
|
+
"model_id": "deepseek-ai/deepseek-llm-7b-base",
|
|
2599
|
+
"model_hub": "modelscope"
|
|
2600
|
+
},
|
|
2601
|
+
{
|
|
2602
|
+
"model_format": "pytorch",
|
|
2603
|
+
"model_size_in_billions": 67,
|
|
2604
|
+
"quantizations": [
|
|
2605
|
+
"4-bit",
|
|
2606
|
+
"8-bit",
|
|
2607
|
+
"none"
|
|
2608
|
+
],
|
|
2609
|
+
"model_id": "deepseek-ai/deepseek-llm-67b-base",
|
|
2610
|
+
"model_hub": "modelscope"
|
|
2611
|
+
}
|
|
2612
|
+
]
|
|
2613
|
+
},
|
|
2415
2614
|
{
|
|
2416
2615
|
"version": 1,
|
|
2417
2616
|
"context_length": 4096,
|
|
@@ -2464,7 +2663,55 @@
|
|
|
2464
2663
|
},
|
|
2465
2664
|
{
|
|
2466
2665
|
"version": 1,
|
|
2467
|
-
"context_length":
|
|
2666
|
+
"context_length": 16384,
|
|
2667
|
+
"model_name": "deepseek-coder",
|
|
2668
|
+
"model_lang": [
|
|
2669
|
+
"en",
|
|
2670
|
+
"zh"
|
|
2671
|
+
],
|
|
2672
|
+
"model_ability": [
|
|
2673
|
+
"generate"
|
|
2674
|
+
],
|
|
2675
|
+
"model_description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
|
|
2676
|
+
"model_specs": [
|
|
2677
|
+
{
|
|
2678
|
+
"model_format": "pytorch",
|
|
2679
|
+
"model_size_in_billions": "1_3",
|
|
2680
|
+
"quantizations": [
|
|
2681
|
+
"4-bit",
|
|
2682
|
+
"8-bit",
|
|
2683
|
+
"none"
|
|
2684
|
+
],
|
|
2685
|
+
"model_id": "deepseek-ai/deepseek-coder-1.3b-base",
|
|
2686
|
+
"model_hub": "modelscope"
|
|
2687
|
+
},
|
|
2688
|
+
{
|
|
2689
|
+
"model_format": "pytorch",
|
|
2690
|
+
"model_size_in_billions": "6_7",
|
|
2691
|
+
"quantizations": [
|
|
2692
|
+
"4-bit",
|
|
2693
|
+
"8-bit",
|
|
2694
|
+
"none"
|
|
2695
|
+
],
|
|
2696
|
+
"model_id": "deepseek-ai/deepseek-coder-6.7b-base",
|
|
2697
|
+
"model_hub": "modelscope"
|
|
2698
|
+
},
|
|
2699
|
+
{
|
|
2700
|
+
"model_format": "pytorch",
|
|
2701
|
+
"model_size_in_billions": 33,
|
|
2702
|
+
"quantizations": [
|
|
2703
|
+
"4-bit",
|
|
2704
|
+
"8-bit",
|
|
2705
|
+
"none"
|
|
2706
|
+
],
|
|
2707
|
+
"model_id": "deepseek-ai/deepseek-coder-33b-base",
|
|
2708
|
+
"model_hub": "modelscope"
|
|
2709
|
+
}
|
|
2710
|
+
]
|
|
2711
|
+
},
|
|
2712
|
+
{
|
|
2713
|
+
"version": 1,
|
|
2714
|
+
"context_length": 16384,
|
|
2468
2715
|
"model_name": "deepseek-coder-instruct",
|
|
2469
2716
|
"model_lang": [
|
|
2470
2717
|
"en",
|
|
@@ -2755,7 +3002,7 @@
|
|
|
2755
3002
|
},
|
|
2756
3003
|
{
|
|
2757
3004
|
"version": 1,
|
|
2758
|
-
"context_length":
|
|
3005
|
+
"context_length": 4096,
|
|
2759
3006
|
"model_name": "yi-vl-chat",
|
|
2760
3007
|
"model_lang": [
|
|
2761
3008
|
"en",
|
|
@@ -3253,7 +3500,7 @@
|
|
|
3253
3500
|
"ar"
|
|
3254
3501
|
],
|
|
3255
3502
|
"model_ability": [
|
|
3256
|
-
"
|
|
3503
|
+
"chat"
|
|
3257
3504
|
],
|
|
3258
3505
|
"model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
|
|
3259
3506
|
"model_specs": [
|
|
@@ -3272,11 +3519,12 @@
|
|
|
3272
3519
|
"model_size_in_billions": 35,
|
|
3273
3520
|
"quantizations": [
|
|
3274
3521
|
"Q2_K",
|
|
3522
|
+
"Q3_K_M",
|
|
3275
3523
|
"Q4_K_M",
|
|
3276
3524
|
"Q5_K_M"
|
|
3277
3525
|
],
|
|
3278
3526
|
"model_id": "mirror013/C4AI-Command-R-v01-GGUF",
|
|
3279
|
-
"model_file_name_template": "c4ai-command-r-v01
|
|
3527
|
+
"model_file_name_template": "c4ai-command-r-v01-{quantization}.gguf",
|
|
3280
3528
|
"model_hub": "modelscope",
|
|
3281
3529
|
"model_revision": "master"
|
|
3282
3530
|
},
|
|
@@ -3290,7 +3538,21 @@
|
|
|
3290
3538
|
"model_id": "AI-ModelScope/c4ai-command-r-plus",
|
|
3291
3539
|
"model_revision": "master"
|
|
3292
3540
|
}
|
|
3293
|
-
]
|
|
3541
|
+
],
|
|
3542
|
+
"prompt_style": {
|
|
3543
|
+
"style_name": "c4ai-command-r",
|
|
3544
|
+
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
|
|
3545
|
+
"roles": [
|
|
3546
|
+
"<|USER_TOKEN|>",
|
|
3547
|
+
"<|CHATBOT_TOKEN|>"
|
|
3548
|
+
],
|
|
3549
|
+
"intra_message_sep": "",
|
|
3550
|
+
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
|
|
3551
|
+
"stop_token_ids": [
|
|
3552
|
+
6,
|
|
3553
|
+
255001
|
|
3554
|
+
]
|
|
3555
|
+
}
|
|
3294
3556
|
},
|
|
3295
3557
|
{
|
|
3296
3558
|
"version": 1,
|
|
@@ -3309,7 +3571,7 @@
|
|
|
3309
3571
|
"ar"
|
|
3310
3572
|
],
|
|
3311
3573
|
"model_ability": [
|
|
3312
|
-
"
|
|
3574
|
+
"chat"
|
|
3313
3575
|
],
|
|
3314
3576
|
"model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
|
|
3315
3577
|
"model_specs": [
|
|
@@ -3323,7 +3585,21 @@
|
|
|
3323
3585
|
"model_id": "mirror013/c4ai-command-r-v01-4bit",
|
|
3324
3586
|
"model_revision": "master"
|
|
3325
3587
|
}
|
|
3326
|
-
]
|
|
3588
|
+
],
|
|
3589
|
+
"prompt_style": {
|
|
3590
|
+
"style_name": "c4ai-command-r",
|
|
3591
|
+
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
|
|
3592
|
+
"roles": [
|
|
3593
|
+
"<|USER_TOKEN|>",
|
|
3594
|
+
"<|CHATBOT_TOKEN|>"
|
|
3595
|
+
],
|
|
3596
|
+
"intra_message_sep": "",
|
|
3597
|
+
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
|
|
3598
|
+
"stop_token_ids": [
|
|
3599
|
+
6,
|
|
3600
|
+
255001
|
|
3601
|
+
]
|
|
3602
|
+
}
|
|
3327
3603
|
},
|
|
3328
3604
|
{
|
|
3329
3605
|
"version": 1,
|
|
@@ -3412,5 +3688,56 @@
|
|
|
3412
3688
|
"<|end|>"
|
|
3413
3689
|
]
|
|
3414
3690
|
}
|
|
3415
|
-
}
|
|
3691
|
+
},
|
|
3692
|
+
{
|
|
3693
|
+
"version": 1,
|
|
3694
|
+
"context_length": 32768,
|
|
3695
|
+
"model_name": "internvl-chat",
|
|
3696
|
+
"model_lang": [
|
|
3697
|
+
"en",
|
|
3698
|
+
"zh"
|
|
3699
|
+
],
|
|
3700
|
+
"model_ability": [
|
|
3701
|
+
"chat",
|
|
3702
|
+
"vision"
|
|
3703
|
+
],
|
|
3704
|
+
"model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
3705
|
+
"model_specs": [
|
|
3706
|
+
{
|
|
3707
|
+
"model_format": "pytorch",
|
|
3708
|
+
"model_size_in_billions": 26,
|
|
3709
|
+
"quantizations": [
|
|
3710
|
+
"none"
|
|
3711
|
+
],
|
|
3712
|
+
"model_hub": "modelscope",
|
|
3713
|
+
"model_id": "AI-ModelScope/InternVL-Chat-V1-5",
|
|
3714
|
+
"model_revision": "master"
|
|
3715
|
+
},
|
|
3716
|
+
{
|
|
3717
|
+
"model_format": "pytorch",
|
|
3718
|
+
"model_size_in_billions": 26,
|
|
3719
|
+
"quantizations": [
|
|
3720
|
+
"Int8"
|
|
3721
|
+
],
|
|
3722
|
+
"model_hub": "modelscope",
|
|
3723
|
+
"model_id": "AI-ModelScope/InternVL-Chat-V1-5-{quantization}",
|
|
3724
|
+
"model_revision": "master"
|
|
3725
|
+
}
|
|
3726
|
+
],
|
|
3727
|
+
"prompt_style": {
|
|
3728
|
+
"style_name": "INTERNLM2",
|
|
3729
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
3730
|
+
"roles": [
|
|
3731
|
+
"<|im_start|>user",
|
|
3732
|
+
"<|im_start|>assistant"
|
|
3733
|
+
],
|
|
3734
|
+
"intra_message_sep": "<|im_end|>",
|
|
3735
|
+
"stop_token_ids": [
|
|
3736
|
+
92542
|
|
3737
|
+
],
|
|
3738
|
+
"stop": [
|
|
3739
|
+
"<|im_end|>"
|
|
3740
|
+
]
|
|
3741
|
+
}
|
|
3742
|
+
}
|
|
3416
3743
|
]
|