xinference 0.11.0__py3-none-any.whl → 0.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (56) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +30 -0
  3. xinference/client/restful/restful_client.py +29 -0
  4. xinference/core/cache_tracker.py +12 -1
  5. xinference/core/chat_interface.py +10 -4
  6. xinference/core/model.py +2 -2
  7. xinference/core/supervisor.py +30 -2
  8. xinference/core/utils.py +12 -0
  9. xinference/core/worker.py +4 -1
  10. xinference/deploy/cmdline.py +126 -0
  11. xinference/deploy/test/test_cmdline.py +24 -0
  12. xinference/fields.py +3 -1
  13. xinference/model/llm/__init__.py +2 -0
  14. xinference/model/llm/ggml/chatglm.py +98 -13
  15. xinference/model/llm/ggml/llamacpp.py +49 -2
  16. xinference/model/llm/llm_family.json +633 -9
  17. xinference/model/llm/llm_family.py +84 -10
  18. xinference/model/llm/llm_family_modelscope.json +337 -10
  19. xinference/model/llm/memory.py +332 -0
  20. xinference/model/llm/pytorch/chatglm.py +48 -0
  21. xinference/model/llm/pytorch/core.py +25 -6
  22. xinference/model/llm/pytorch/deepseek_vl.py +35 -9
  23. xinference/model/llm/pytorch/intern_vl.py +387 -0
  24. xinference/model/llm/pytorch/internlm2.py +32 -1
  25. xinference/model/llm/pytorch/qwen_vl.py +38 -11
  26. xinference/model/llm/pytorch/utils.py +38 -1
  27. xinference/model/llm/pytorch/yi_vl.py +42 -14
  28. xinference/model/llm/sglang/core.py +31 -9
  29. xinference/model/llm/utils.py +38 -5
  30. xinference/model/llm/vllm/core.py +87 -5
  31. xinference/model/rerank/core.py +23 -1
  32. xinference/model/utils.py +17 -7
  33. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +1 -1
  34. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +2 -2
  35. xinference/thirdparty/llava/mm_utils.py +3 -2
  36. xinference/thirdparty/llava/model/llava_arch.py +1 -1
  37. xinference/thirdparty/omnilmm/chat.py +6 -5
  38. xinference/types.py +10 -1
  39. xinference/web/ui/build/asset-manifest.json +3 -3
  40. xinference/web/ui/build/index.html +1 -1
  41. xinference/web/ui/build/static/js/{main.8e44da4b.js → main.551aa479.js} +3 -3
  42. xinference/web/ui/build/static/js/main.551aa479.js.map +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/1fa824d82b2af519de7700c594e50bde4bbca60d13bd3fabff576802e4070304.json +1 -0
  44. xinference/web/ui/node_modules/.cache/babel-loader/23caf6f1e52c43e983ca3bfd4189f41dbd645fa78f2dfdcd7f6b69bc41678665.json +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/a6da6bc3d0d2191adebee87fb58ecebe82d071087bd2f7f3a9c7fdd2ada130f2.json +1 -0
  46. {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/METADATA +10 -8
  47. {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/RECORD +52 -50
  48. xinference/web/ui/build/static/js/main.8e44da4b.js.map +0 -1
  49. xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +0 -1
  50. xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +0 -1
  51. xinference/web/ui/node_modules/.cache/babel-loader/ddaec68b88e5eff792df1e39a4b4b8b737bfc832293c015660c3c69334e3cf5c.json +0 -1
  52. /xinference/web/ui/build/static/js/{main.8e44da4b.js.LICENSE.txt → main.551aa479.js.LICENSE.txt} +0 -0
  53. {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/LICENSE +0 -0
  54. {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/WHEEL +0 -0
  55. {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/entry_points.txt +0 -0
  56. {xinference-0.11.0.dist-info → xinference-0.11.2.dist-info}/top_level.txt +0 -0
@@ -34,6 +34,8 @@ from ..._compat import (
34
34
  )
35
35
  from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
36
36
  from ..utils import (
37
+ IS_NEW_HUGGINGFACE_HUB,
38
+ create_symlink,
37
39
  download_from_modelscope,
38
40
  is_valid_model_uri,
39
41
  parse_uri,
@@ -447,6 +449,61 @@ def cache_from_uri(
447
449
  raise ValueError(f"Unsupported URL scheme: {src_scheme}")
448
450
 
449
451
 
452
+ def cache_model_config(
453
+ llm_family: LLMFamilyV1,
454
+ llm_spec: "LLMSpecV1",
455
+ ):
456
+ """Download model config.json into cache_dir,
457
+ returns local filepath
458
+ """
459
+ cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec)
460
+ config_file = os.path.join(cache_dir, "config.json")
461
+ if not os.path.islink(config_file) and not os.path.exists(config_file):
462
+ os.makedirs(cache_dir, exist_ok=True)
463
+ if llm_spec.model_hub == "huggingface":
464
+ from huggingface_hub import hf_hub_download
465
+
466
+ hf_hub_download(
467
+ repo_id=llm_spec.model_id, filename="config.json", local_dir=cache_dir
468
+ )
469
+ else:
470
+ from modelscope.hub.file_download import model_file_download
471
+
472
+ download_path = model_file_download(
473
+ model_id=llm_spec.model_id, file_path="config.json"
474
+ )
475
+ os.symlink(download_path, config_file)
476
+ return config_file
477
+
478
+
479
+ def _get_cache_dir_for_model_mem(
480
+ llm_family: LLMFamilyV1,
481
+ llm_spec: "LLMSpecV1",
482
+ create_if_not_exist=True,
483
+ ):
484
+ """
485
+ For cal-model-mem only. (might called from supervisor / cli)
486
+ Temporary use separate dir from worker's cache_dir, due to issue of different style of symlink.
487
+ """
488
+ quant_suffix = ""
489
+ for q in llm_spec.quantizations:
490
+ if llm_spec.model_id and q in llm_spec.model_id:
491
+ quant_suffix = q
492
+ break
493
+ cache_dir_name = (
494
+ f"{llm_family.model_name}-{llm_spec.model_format}"
495
+ f"-{llm_spec.model_size_in_billions}b"
496
+ )
497
+ if quant_suffix:
498
+ cache_dir_name += f"-{quant_suffix}"
499
+ cache_dir = os.path.realpath(
500
+ os.path.join(XINFERENCE_CACHE_DIR, "model_mem", cache_dir_name)
501
+ )
502
+ if create_if_not_exist and not os.path.exists(cache_dir):
503
+ os.makedirs(cache_dir, exist_ok=True)
504
+ return cache_dir
505
+
506
+
450
507
  def _get_cache_dir(
451
508
  llm_family: LLMFamilyV1,
452
509
  llm_spec: "LLMSpecV1",
@@ -625,10 +682,7 @@ def cache_from_modelscope(
625
682
  llm_spec.model_id,
626
683
  revision=llm_spec.model_revision,
627
684
  )
628
- for subdir, dirs, files in os.walk(download_dir):
629
- for file in files:
630
- relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
631
- symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
685
+ create_symlink(download_dir, cache_dir)
632
686
 
633
687
  elif llm_spec.model_format in ["ggmlv3", "ggufv2"]:
634
688
  file_names, final_file_name, need_merge = _generate_model_file_names(
@@ -682,9 +736,13 @@ def cache_from_huggingface(
682
736
  ):
683
737
  return cache_dir
684
738
 
739
+ use_symlinks = {}
740
+ if not IS_NEW_HUGGINGFACE_HUB:
741
+ use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
742
+
685
743
  if llm_spec.model_format in ["pytorch", "gptq", "awq"]:
686
744
  assert isinstance(llm_spec, PytorchLLMSpecV1)
687
- retry_download(
745
+ download_dir = retry_download(
688
746
  huggingface_hub.snapshot_download,
689
747
  llm_family.model_name,
690
748
  {
@@ -693,9 +751,10 @@ def cache_from_huggingface(
693
751
  },
694
752
  llm_spec.model_id,
695
753
  revision=llm_spec.model_revision,
696
- local_dir=cache_dir,
697
- local_dir_use_symlinks=True,
754
+ **use_symlinks,
698
755
  )
756
+ if IS_NEW_HUGGINGFACE_HUB:
757
+ create_symlink(download_dir, cache_dir)
699
758
 
700
759
  elif llm_spec.model_format in ["ggmlv3", "ggufv2"]:
701
760
  assert isinstance(llm_spec, GgmlLLMSpecV1)
@@ -704,7 +763,7 @@ def cache_from_huggingface(
704
763
  )
705
764
 
706
765
  for file_name in file_names:
707
- retry_download(
766
+ download_file_path = retry_download(
708
767
  huggingface_hub.hf_hub_download,
709
768
  llm_family.model_name,
710
769
  {
@@ -714,9 +773,10 @@ def cache_from_huggingface(
714
773
  llm_spec.model_id,
715
774
  revision=llm_spec.model_revision,
716
775
  filename=file_name,
717
- local_dir=cache_dir,
718
- local_dir_use_symlinks=True,
776
+ **use_symlinks,
719
777
  )
778
+ if IS_NEW_HUGGINGFACE_HUB:
779
+ symlink_local_file(download_file_path, cache_dir, file_name)
720
780
 
721
781
  if need_merge:
722
782
  _merge_cached_files(cache_dir, file_names, final_file_name)
@@ -823,6 +883,20 @@ def match_model_size(
823
883
  return False
824
884
 
825
885
 
886
+ def convert_model_size_to_float(
887
+ model_size_in_billions: Union[float, int, str]
888
+ ) -> float:
889
+ if isinstance(model_size_in_billions, str):
890
+ if "_" in model_size_in_billions:
891
+ ms = model_size_in_billions.replace("_", ".")
892
+ return float(ms)
893
+ elif "." in model_size_in_billions:
894
+ return float(model_size_in_billions)
895
+ else:
896
+ return int(model_size_in_billions)
897
+ return model_size_in_billions
898
+
899
+
826
900
  def match_llm(
827
901
  model_name: str,
828
902
  model_format: Optional[str] = None,
@@ -1289,7 +1289,7 @@
1289
1289
  },
1290
1290
  {
1291
1291
  "version": 1,
1292
- "context_length": 204800,
1292
+ "context_length": 262144,
1293
1293
  "model_name": "Yi-200k",
1294
1294
  "model_lang": [
1295
1295
  "en",
@@ -1328,7 +1328,7 @@
1328
1328
  },
1329
1329
  {
1330
1330
  "version": 1,
1331
- "context_length": 204800,
1331
+ "context_length": 4096,
1332
1332
  "model_name": "Yi-chat",
1333
1333
  "model_lang": [
1334
1334
  "en",
@@ -1349,6 +1349,18 @@
1349
1349
  "model_id": "01ai/Yi-34B-Chat-{quantization}",
1350
1350
  "model_revision": "master"
1351
1351
  },
1352
+ {
1353
+ "model_format": "pytorch",
1354
+ "model_size_in_billions": 6,
1355
+ "quantizations": [
1356
+ "4-bit",
1357
+ "8-bit",
1358
+ "none"
1359
+ ],
1360
+ "model_hub": "modelscope",
1361
+ "model_id": "01ai/Yi-6B-Chat",
1362
+ "model_revision": "master"
1363
+ },
1352
1364
  {
1353
1365
  "model_format": "pytorch",
1354
1366
  "model_size_in_billions": 34,
@@ -1385,6 +1397,130 @@
1385
1397
  ]
1386
1398
  }
1387
1399
  },
1400
+ {
1401
+ "version": 1,
1402
+ "context_length": 4096,
1403
+ "model_name": "Yi-1.5",
1404
+ "model_lang": [
1405
+ "en",
1406
+ "zh"
1407
+ ],
1408
+ "model_ability": [
1409
+ "generate"
1410
+ ],
1411
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
1412
+ "model_specs": [
1413
+ {
1414
+ "model_format": "pytorch",
1415
+ "model_size_in_billions": 6,
1416
+ "quantizations": [
1417
+ "4-bit",
1418
+ "8-bit",
1419
+ "none"
1420
+ ],
1421
+ "model_hub": "modelscope",
1422
+ "model_id": "01ai/Yi-1.5-6B",
1423
+ "model_revision": "master"
1424
+ },
1425
+ {
1426
+ "model_format": "pytorch",
1427
+ "model_size_in_billions": 9,
1428
+ "quantizations": [
1429
+ "4-bit",
1430
+ "8-bit",
1431
+ "none"
1432
+ ],
1433
+ "model_hub": "modelscope",
1434
+ "model_id": "01ai/Yi-1.5-9B",
1435
+ "model_revision": "master"
1436
+ },
1437
+ {
1438
+ "model_format": "pytorch",
1439
+ "model_size_in_billions": 34,
1440
+ "quantizations": [
1441
+ "4-bit",
1442
+ "8-bit",
1443
+ "none"
1444
+ ],
1445
+ "model_hub": "modelscope",
1446
+ "model_id": "01ai/Yi-1.5-34B",
1447
+ "model_revision": "master"
1448
+ }
1449
+ ]
1450
+ },
1451
+ {
1452
+ "version": 1,
1453
+ "context_length": 4096,
1454
+ "model_name": "Yi-1.5-chat",
1455
+ "model_lang": [
1456
+ "en",
1457
+ "zh"
1458
+ ],
1459
+ "model_ability": [
1460
+ "chat"
1461
+ ],
1462
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
1463
+ "model_specs": [
1464
+ {
1465
+ "model_format": "pytorch",
1466
+ "model_size_in_billions": 6,
1467
+ "quantizations": [
1468
+ "4-bit",
1469
+ "8-bit",
1470
+ "none"
1471
+ ],
1472
+ "model_hub": "modelscope",
1473
+ "model_id": "01ai/Yi-1.5-6B-Chat",
1474
+ "model_revision": "master"
1475
+ },
1476
+ {
1477
+ "model_format": "pytorch",
1478
+ "model_size_in_billions": 9,
1479
+ "quantizations": [
1480
+ "4-bit",
1481
+ "8-bit",
1482
+ "none"
1483
+ ],
1484
+ "model_hub": "modelscope",
1485
+ "model_id": "01ai/Yi-1.5-9B-Chat",
1486
+ "model_revision": "master"
1487
+ },
1488
+ {
1489
+ "model_format": "pytorch",
1490
+ "model_size_in_billions": 34,
1491
+ "quantizations": [
1492
+ "4-bit",
1493
+ "8-bit",
1494
+ "none"
1495
+ ],
1496
+ "model_hub": "modelscope",
1497
+ "model_id": "01ai/Yi-1.5-34B-Chat",
1498
+ "model_revision": "master"
1499
+ }
1500
+ ],
1501
+ "prompt_style": {
1502
+ "style_name": "CHATML",
1503
+ "system_prompt": "",
1504
+ "roles": [
1505
+ "<|im_start|>user",
1506
+ "<|im_start|>assistant"
1507
+ ],
1508
+ "intra_message_sep": "<|im_end|>",
1509
+ "inter_message_sep": "",
1510
+ "stop_token_ids": [
1511
+ 2,
1512
+ 6,
1513
+ 7,
1514
+ 8
1515
+ ],
1516
+ "stop": [
1517
+ "<|endoftext|>",
1518
+ "<|im_start|>",
1519
+ "<|im_end|>",
1520
+ "<|im_sep|>"
1521
+ ]
1522
+ }
1523
+ },
1388
1524
  {
1389
1525
  "version": 1,
1390
1526
  "context_length": 2048,
@@ -2294,6 +2430,32 @@
2294
2430
  ]
2295
2431
  }
2296
2432
  },
2433
+ {
2434
+ "version": 1,
2435
+ "context_length": 65536,
2436
+ "model_name": "codeqwen1.5",
2437
+ "model_lang": [
2438
+ "en",
2439
+ "zh"
2440
+ ],
2441
+ "model_ability": [
2442
+ "generate"
2443
+ ],
2444
+ "model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
2445
+ "model_specs": [
2446
+ {
2447
+ "model_format": "pytorch",
2448
+ "model_size_in_billions": 7,
2449
+ "quantizations": [
2450
+ "4-bit",
2451
+ "8-bit",
2452
+ "none"
2453
+ ],
2454
+ "model_id": "qwen/CodeQwen1.5-7B",
2455
+ "model_hub": "modelscope"
2456
+ }
2457
+ ]
2458
+ },
2297
2459
  {
2298
2460
  "version": 1,
2299
2461
  "context_length": 65536,
@@ -2412,6 +2574,43 @@
2412
2574
  ]
2413
2575
  }
2414
2576
  },
2577
+ {
2578
+ "version": 1,
2579
+ "context_length": 4096,
2580
+ "model_name": "deepseek",
2581
+ "model_lang": [
2582
+ "en",
2583
+ "zh"
2584
+ ],
2585
+ "model_ability": [
2586
+ "generate"
2587
+ ],
2588
+ "model_description": "DDeepSeek LLM, trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese. ",
2589
+ "model_specs": [
2590
+ {
2591
+ "model_format": "pytorch",
2592
+ "model_size_in_billions": 7,
2593
+ "quantizations": [
2594
+ "4-bit",
2595
+ "8-bit",
2596
+ "none"
2597
+ ],
2598
+ "model_id": "deepseek-ai/deepseek-llm-7b-base",
2599
+ "model_hub": "modelscope"
2600
+ },
2601
+ {
2602
+ "model_format": "pytorch",
2603
+ "model_size_in_billions": 67,
2604
+ "quantizations": [
2605
+ "4-bit",
2606
+ "8-bit",
2607
+ "none"
2608
+ ],
2609
+ "model_id": "deepseek-ai/deepseek-llm-67b-base",
2610
+ "model_hub": "modelscope"
2611
+ }
2612
+ ]
2613
+ },
2415
2614
  {
2416
2615
  "version": 1,
2417
2616
  "context_length": 4096,
@@ -2464,7 +2663,55 @@
2464
2663
  },
2465
2664
  {
2466
2665
  "version": 1,
2467
- "context_length": 4096,
2666
+ "context_length": 16384,
2667
+ "model_name": "deepseek-coder",
2668
+ "model_lang": [
2669
+ "en",
2670
+ "zh"
2671
+ ],
2672
+ "model_ability": [
2673
+ "generate"
2674
+ ],
2675
+ "model_description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
2676
+ "model_specs": [
2677
+ {
2678
+ "model_format": "pytorch",
2679
+ "model_size_in_billions": "1_3",
2680
+ "quantizations": [
2681
+ "4-bit",
2682
+ "8-bit",
2683
+ "none"
2684
+ ],
2685
+ "model_id": "deepseek-ai/deepseek-coder-1.3b-base",
2686
+ "model_hub": "modelscope"
2687
+ },
2688
+ {
2689
+ "model_format": "pytorch",
2690
+ "model_size_in_billions": "6_7",
2691
+ "quantizations": [
2692
+ "4-bit",
2693
+ "8-bit",
2694
+ "none"
2695
+ ],
2696
+ "model_id": "deepseek-ai/deepseek-coder-6.7b-base",
2697
+ "model_hub": "modelscope"
2698
+ },
2699
+ {
2700
+ "model_format": "pytorch",
2701
+ "model_size_in_billions": 33,
2702
+ "quantizations": [
2703
+ "4-bit",
2704
+ "8-bit",
2705
+ "none"
2706
+ ],
2707
+ "model_id": "deepseek-ai/deepseek-coder-33b-base",
2708
+ "model_hub": "modelscope"
2709
+ }
2710
+ ]
2711
+ },
2712
+ {
2713
+ "version": 1,
2714
+ "context_length": 16384,
2468
2715
  "model_name": "deepseek-coder-instruct",
2469
2716
  "model_lang": [
2470
2717
  "en",
@@ -2755,7 +3002,7 @@
2755
3002
  },
2756
3003
  {
2757
3004
  "version": 1,
2758
- "context_length": 204800,
3005
+ "context_length": 4096,
2759
3006
  "model_name": "yi-vl-chat",
2760
3007
  "model_lang": [
2761
3008
  "en",
@@ -3253,7 +3500,7 @@
3253
3500
  "ar"
3254
3501
  ],
3255
3502
  "model_ability": [
3256
- "generate"
3503
+ "chat"
3257
3504
  ],
3258
3505
  "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
3259
3506
  "model_specs": [
@@ -3272,11 +3519,12 @@
3272
3519
  "model_size_in_billions": 35,
3273
3520
  "quantizations": [
3274
3521
  "Q2_K",
3522
+ "Q3_K_M",
3275
3523
  "Q4_K_M",
3276
3524
  "Q5_K_M"
3277
3525
  ],
3278
3526
  "model_id": "mirror013/C4AI-Command-R-v01-GGUF",
3279
- "model_file_name_template": "c4ai-command-r-v01.{quantization}.gguf",
3527
+ "model_file_name_template": "c4ai-command-r-v01-{quantization}.gguf",
3280
3528
  "model_hub": "modelscope",
3281
3529
  "model_revision": "master"
3282
3530
  },
@@ -3290,7 +3538,21 @@
3290
3538
  "model_id": "AI-ModelScope/c4ai-command-r-plus",
3291
3539
  "model_revision": "master"
3292
3540
  }
3293
- ]
3541
+ ],
3542
+ "prompt_style": {
3543
+ "style_name": "c4ai-command-r",
3544
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
3545
+ "roles": [
3546
+ "<|USER_TOKEN|>",
3547
+ "<|CHATBOT_TOKEN|>"
3548
+ ],
3549
+ "intra_message_sep": "",
3550
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
3551
+ "stop_token_ids": [
3552
+ 6,
3553
+ 255001
3554
+ ]
3555
+ }
3294
3556
  },
3295
3557
  {
3296
3558
  "version": 1,
@@ -3309,7 +3571,7 @@
3309
3571
  "ar"
3310
3572
  ],
3311
3573
  "model_ability": [
3312
- "generate"
3574
+ "chat"
3313
3575
  ],
3314
3576
  "model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
3315
3577
  "model_specs": [
@@ -3323,7 +3585,21 @@
3323
3585
  "model_id": "mirror013/c4ai-command-r-v01-4bit",
3324
3586
  "model_revision": "master"
3325
3587
  }
3326
- ]
3588
+ ],
3589
+ "prompt_style": {
3590
+ "style_name": "c4ai-command-r",
3591
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
3592
+ "roles": [
3593
+ "<|USER_TOKEN|>",
3594
+ "<|CHATBOT_TOKEN|>"
3595
+ ],
3596
+ "intra_message_sep": "",
3597
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
3598
+ "stop_token_ids": [
3599
+ 6,
3600
+ 255001
3601
+ ]
3602
+ }
3327
3603
  },
3328
3604
  {
3329
3605
  "version": 1,
@@ -3412,5 +3688,56 @@
3412
3688
  "<|end|>"
3413
3689
  ]
3414
3690
  }
3415
- }
3691
+ },
3692
+ {
3693
+ "version": 1,
3694
+ "context_length": 32768,
3695
+ "model_name": "internvl-chat",
3696
+ "model_lang": [
3697
+ "en",
3698
+ "zh"
3699
+ ],
3700
+ "model_ability": [
3701
+ "chat",
3702
+ "vision"
3703
+ ],
3704
+ "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
3705
+ "model_specs": [
3706
+ {
3707
+ "model_format": "pytorch",
3708
+ "model_size_in_billions": 26,
3709
+ "quantizations": [
3710
+ "none"
3711
+ ],
3712
+ "model_hub": "modelscope",
3713
+ "model_id": "AI-ModelScope/InternVL-Chat-V1-5",
3714
+ "model_revision": "master"
3715
+ },
3716
+ {
3717
+ "model_format": "pytorch",
3718
+ "model_size_in_billions": 26,
3719
+ "quantizations": [
3720
+ "Int8"
3721
+ ],
3722
+ "model_hub": "modelscope",
3723
+ "model_id": "AI-ModelScope/InternVL-Chat-V1-5-{quantization}",
3724
+ "model_revision": "master"
3725
+ }
3726
+ ],
3727
+ "prompt_style": {
3728
+ "style_name": "INTERNLM2",
3729
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
3730
+ "roles": [
3731
+ "<|im_start|>user",
3732
+ "<|im_start|>assistant"
3733
+ ],
3734
+ "intra_message_sep": "<|im_end|>",
3735
+ "stop_token_ids": [
3736
+ 92542
3737
+ ],
3738
+ "stop": [
3739
+ "<|im_end|>"
3740
+ ]
3741
+ }
3742
+ }
3416
3743
  ]