xinference 0.11.2.post1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (36) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +83 -8
  3. xinference/client/restful/restful_client.py +70 -0
  4. xinference/constants.py +8 -0
  5. xinference/core/__init__.py +0 -2
  6. xinference/core/cache_tracker.py +22 -1
  7. xinference/core/chat_interface.py +71 -10
  8. xinference/core/model.py +141 -12
  9. xinference/core/scheduler.py +428 -0
  10. xinference/core/supervisor.py +31 -3
  11. xinference/core/worker.py +8 -3
  12. xinference/isolation.py +9 -2
  13. xinference/model/audio/chattts.py +84 -0
  14. xinference/model/audio/core.py +10 -3
  15. xinference/model/audio/model_spec.json +20 -0
  16. xinference/model/llm/__init__.py +6 -0
  17. xinference/model/llm/llm_family.json +1063 -260
  18. xinference/model/llm/llm_family_modelscope.json +686 -13
  19. xinference/model/llm/pytorch/baichuan.py +2 -1
  20. xinference/model/llm/pytorch/chatglm.py +2 -1
  21. xinference/model/llm/pytorch/cogvlm2.py +316 -0
  22. xinference/model/llm/pytorch/core.py +92 -6
  23. xinference/model/llm/pytorch/glm4v.py +258 -0
  24. xinference/model/llm/pytorch/intern_vl.py +5 -10
  25. xinference/model/llm/pytorch/minicpmv25.py +232 -0
  26. xinference/model/llm/pytorch/utils.py +386 -2
  27. xinference/model/llm/vllm/core.py +7 -1
  28. xinference/thirdparty/ChatTTS/__init__.py +1 -0
  29. xinference/thirdparty/ChatTTS/core.py +200 -0
  30. xinference/types.py +3 -0
  31. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/METADATA +28 -11
  32. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/RECORD +36 -29
  33. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/LICENSE +0 -0
  34. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/WHEEL +0 -0
  35. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/entry_points.txt +0 -0
  36. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/top_level.txt +0 -0
@@ -522,6 +522,142 @@
522
522
  ]
523
523
  }
524
524
  },
525
+ {
526
+ "version": 1,
527
+ "context_length": 131072,
528
+ "model_name": "glm4-chat",
529
+ "model_lang": [
530
+ "en",
531
+ "zh"
532
+ ],
533
+ "model_ability": [
534
+ "chat",
535
+ "tools"
536
+ ],
537
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
538
+ "model_specs": [
539
+ {
540
+ "model_format": "pytorch",
541
+ "model_size_in_billions": 9,
542
+ "quantizations": [
543
+ "4-bit",
544
+ "8-bit",
545
+ "none"
546
+ ],
547
+ "model_hub": "modelscope",
548
+ "model_id": "ZhipuAI/glm-4-9b-chat",
549
+ "model_revision": "master"
550
+ }
551
+ ],
552
+ "prompt_style": {
553
+ "style_name": "CHATGLM3",
554
+ "system_prompt": "",
555
+ "roles": [
556
+ "user",
557
+ "assistant"
558
+ ],
559
+ "stop_token_ids": [
560
+ 151329,
561
+ 151336,
562
+ 151338
563
+ ],
564
+ "stop": [
565
+ "<|endoftext|>",
566
+ "<|user|>",
567
+ "<|observation|>"
568
+ ]
569
+ }
570
+ },
571
+ {
572
+ "version": 1,
573
+ "context_length": 1048576,
574
+ "model_name": "glm4-chat-1m",
575
+ "model_lang": [
576
+ "en",
577
+ "zh"
578
+ ],
579
+ "model_ability": [
580
+ "chat",
581
+ "tools"
582
+ ],
583
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
584
+ "model_specs": [
585
+ {
586
+ "model_format": "pytorch",
587
+ "model_size_in_billions": 9,
588
+ "quantizations": [
589
+ "4-bit",
590
+ "8-bit",
591
+ "none"
592
+ ],
593
+ "model_hub": "modelscope",
594
+ "model_id": "ZhipuAI/glm-4-9b-chat-1m",
595
+ "model_revision": "master"
596
+ }
597
+ ],
598
+ "prompt_style": {
599
+ "style_name": "CHATGLM3",
600
+ "system_prompt": "",
601
+ "roles": [
602
+ "user",
603
+ "assistant"
604
+ ],
605
+ "stop_token_ids": [
606
+ 151329,
607
+ 151336,
608
+ 151338
609
+ ],
610
+ "stop": [
611
+ "<|endoftext|>",
612
+ "<|user|>",
613
+ "<|observation|>"
614
+ ]
615
+ }
616
+ },
617
+ {
618
+ "version": 1,
619
+ "context_length": 8192,
620
+ "model_name": "glm-4v",
621
+ "model_lang": [
622
+ "en",
623
+ "zh"
624
+ ],
625
+ "model_ability": [
626
+ "chat",
627
+ "vision"
628
+ ],
629
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
630
+ "model_specs": [
631
+ {
632
+ "model_format": "pytorch",
633
+ "model_size_in_billions": 9,
634
+ "quantizations": [
635
+ "none"
636
+ ],
637
+ "model_hub": "modelscope",
638
+ "model_id": "ZhipuAI/glm-4v-9b",
639
+ "model_revision": "master"
640
+ }
641
+ ],
642
+ "prompt_style": {
643
+ "style_name": "CHATGLM3",
644
+ "system_prompt": "",
645
+ "roles": [
646
+ "user",
647
+ "assistant"
648
+ ],
649
+ "stop_token_ids": [
650
+ 151329,
651
+ 151336,
652
+ 151338
653
+ ],
654
+ "stop": [
655
+ "<|endoftext|>",
656
+ "<|user|>",
657
+ "<|observation|>"
658
+ ]
659
+ }
660
+ },
525
661
  {
526
662
  "version": 1,
527
663
  "context_length": 2048,
@@ -1496,6 +1632,127 @@
1496
1632
  "model_hub": "modelscope",
1497
1633
  "model_id": "01ai/Yi-1.5-34B-Chat",
1498
1634
  "model_revision": "master"
1635
+ },
1636
+ {
1637
+ "model_format": "gptq",
1638
+ "model_size_in_billions": 6,
1639
+ "quantizations": [
1640
+ "Int4"
1641
+ ],
1642
+ "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-GPTQ",
1643
+ "model_hub": "modelscope",
1644
+ "model_revision": "master"
1645
+ },
1646
+ {
1647
+ "model_format": "gptq",
1648
+ "model_size_in_billions": 9,
1649
+ "quantizations": [
1650
+ "Int4"
1651
+ ],
1652
+ "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-GPTQ",
1653
+ "model_hub": "modelscope",
1654
+ "model_revision": "master"
1655
+ },
1656
+ {
1657
+ "model_format": "gptq",
1658
+ "model_size_in_billions": 34,
1659
+ "quantizations": [
1660
+ "Int4"
1661
+ ],
1662
+ "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-GPTQ",
1663
+ "model_hub": "modelscope",
1664
+ "model_revision": "master"
1665
+ },
1666
+ {
1667
+ "model_format": "awq",
1668
+ "model_size_in_billions": 6,
1669
+ "quantizations": [
1670
+ "Int4"
1671
+ ],
1672
+ "model_id": "AI-ModelScope/Yi-1.5-6B-Chat-AWQ",
1673
+ "model_hub": "modelscope",
1674
+ "model_revision": "master"
1675
+ },
1676
+ {
1677
+ "model_format": "awq",
1678
+ "model_size_in_billions": 9,
1679
+ "quantizations": [
1680
+ "Int4"
1681
+ ],
1682
+ "model_id": "AI-ModelScope/Yi-1.5-9B-Chat-AWQ",
1683
+ "model_hub": "modelscope",
1684
+ "model_revision": "master"
1685
+ },
1686
+ {
1687
+ "model_format": "awq",
1688
+ "model_size_in_billions": 34,
1689
+ "quantizations": [
1690
+ "Int4"
1691
+ ],
1692
+ "model_id": "AI-ModelScope/Yi-1.5-34B-Chat-AWQ",
1693
+ "model_hub": "modelscope",
1694
+ "model_revision": "master"
1695
+ }
1696
+ ],
1697
+ "prompt_style": {
1698
+ "style_name": "CHATML",
1699
+ "system_prompt": "",
1700
+ "roles": [
1701
+ "<|im_start|>user",
1702
+ "<|im_start|>assistant"
1703
+ ],
1704
+ "intra_message_sep": "<|im_end|>",
1705
+ "inter_message_sep": "",
1706
+ "stop_token_ids": [
1707
+ 2,
1708
+ 6,
1709
+ 7,
1710
+ 8
1711
+ ],
1712
+ "stop": [
1713
+ "<|endoftext|>",
1714
+ "<|im_start|>",
1715
+ "<|im_end|>",
1716
+ "<|im_sep|>"
1717
+ ]
1718
+ }
1719
+ },
1720
+ {
1721
+ "version": 1,
1722
+ "context_length": 16384,
1723
+ "model_name": "Yi-1.5-chat-16k",
1724
+ "model_lang": [
1725
+ "en",
1726
+ "zh"
1727
+ ],
1728
+ "model_ability": [
1729
+ "chat"
1730
+ ],
1731
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
1732
+ "model_specs": [
1733
+ {
1734
+ "model_format": "pytorch",
1735
+ "model_size_in_billions": 9,
1736
+ "quantizations": [
1737
+ "4-bit",
1738
+ "8-bit",
1739
+ "none"
1740
+ ],
1741
+ "model_hub": "modelscope",
1742
+ "model_id": "01ai/Yi-1.5-9B-Chat-16K",
1743
+ "model_revision": "master"
1744
+ },
1745
+ {
1746
+ "model_format": "pytorch",
1747
+ "model_size_in_billions": 34,
1748
+ "quantizations": [
1749
+ "4-bit",
1750
+ "8-bit",
1751
+ "none"
1752
+ ],
1753
+ "model_hub": "modelscope",
1754
+ "model_id": "01ai/Yi-1.5-34B-Chat-16K",
1755
+ "model_revision": "master"
1499
1756
  }
1500
1757
  ],
1501
1758
  "prompt_style": {
@@ -2529,39 +2786,266 @@
2529
2786
  },
2530
2787
  {
2531
2788
  "version": 1,
2532
- "context_length": 4096,
2533
- "model_name": "deepseek-vl-chat",
2789
+ "context_length": 32768,
2790
+ "model_name": "qwen2-instruct",
2534
2791
  "model_lang": [
2535
2792
  "en",
2536
2793
  "zh"
2537
2794
  ],
2538
2795
  "model_ability": [
2539
2796
  "chat",
2540
- "vision"
2797
+ "tools"
2541
2798
  ],
2542
- "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
2799
+ "model_description": "Qwen2 is the new series of Qwen large language models",
2543
2800
  "model_specs": [
2544
2801
  {
2545
2802
  "model_format": "pytorch",
2546
- "model_size_in_billions": "1_3",
2803
+ "model_size_in_billions": "0_5",
2547
2804
  "quantizations": [
2805
+ "4-bit",
2806
+ "8-bit",
2548
2807
  "none"
2549
2808
  ],
2550
- "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
2809
+ "model_id": "qwen/Qwen2-0.5B-Instruct",
2810
+ "model_hub": "modelscope"
2811
+ },
2812
+ {
2813
+ "model_format": "pytorch",
2814
+ "model_size_in_billions": "1_5",
2815
+ "quantizations": [
2816
+ "4-bit",
2817
+ "8-bit",
2818
+ "none"
2819
+ ],
2820
+ "model_id": "qwen/Qwen2-1.5B-Instruct",
2551
2821
  "model_hub": "modelscope"
2552
2822
  },
2553
2823
  {
2554
2824
  "model_format": "pytorch",
2555
2825
  "model_size_in_billions": 7,
2556
2826
  "quantizations": [
2827
+ "4-bit",
2828
+ "8-bit",
2557
2829
  "none"
2558
2830
  ],
2559
- "model_id": "deepseek-ai/deepseek-vl-7b-chat",
2831
+ "model_id": "qwen/Qwen2-7B-Instruct",
2560
2832
  "model_hub": "modelscope"
2561
- }
2562
- ],
2563
- "prompt_style": {
2564
- "style_name": "DEEPSEEK_CHAT",
2833
+ },
2834
+ {
2835
+ "model_format": "pytorch",
2836
+ "model_size_in_billions": 72,
2837
+ "quantizations": [
2838
+ "4-bit",
2839
+ "8-bit",
2840
+ "none"
2841
+ ],
2842
+ "model_id": "qwen/Qwen2-72B-Instruct",
2843
+ "model_hub": "modelscope"
2844
+ },
2845
+ {
2846
+ "model_format": "gptq",
2847
+ "model_size_in_billions": "0_5",
2848
+ "quantizations": [
2849
+ "Int4",
2850
+ "Int8"
2851
+ ],
2852
+ "model_id": "qwen/Qwen2-0.5B-Instruct-GPTQ-{quantization}",
2853
+ "model_hub": "modelscope"
2854
+ },
2855
+ {
2856
+ "model_format": "gptq",
2857
+ "model_size_in_billions": "1_5",
2858
+ "quantizations": [
2859
+ "Int4",
2860
+ "Int8"
2861
+ ],
2862
+ "model_id": "qwen/Qwen2-1.5B-Instruct-GPTQ-{quantization}",
2863
+ "model_hub": "modelscope"
2864
+ },
2865
+ {
2866
+ "model_format": "gptq",
2867
+ "model_size_in_billions": 7,
2868
+ "quantizations": [
2869
+ "Int4",
2870
+ "Int8"
2871
+ ],
2872
+ "model_id": "qwen/Qwen2-7B-Instruct-GPTQ-{quantization}",
2873
+ "model_hub": "modelscope"
2874
+ },
2875
+ {
2876
+ "model_format": "gptq",
2877
+ "model_size_in_billions": 72,
2878
+ "quantizations": [
2879
+ "Int4",
2880
+ "Int8"
2881
+ ],
2882
+ "model_id": "qwen/Qwen2-72B-Instruct-GPTQ-{quantization}",
2883
+ "model_hub": "modelscope"
2884
+ },
2885
+ {
2886
+ "model_format": "awq",
2887
+ "model_size_in_billions": "0_5",
2888
+ "quantizations": [
2889
+ "Int4"
2890
+ ],
2891
+ "model_id": "qwen/Qwen2-0.5B-Instruct-AWQ",
2892
+ "model_hub": "modelscope"
2893
+ },
2894
+ {
2895
+ "model_format": "awq",
2896
+ "model_size_in_billions": "1_5",
2897
+ "quantizations": [
2898
+ "Int4"
2899
+ ],
2900
+ "model_id": "qwen/Qwen2-1.5B-Instruct-AWQ",
2901
+ "model_hub": "modelscope"
2902
+ },
2903
+ {
2904
+ "model_format": "awq",
2905
+ "model_size_in_billions": 7,
2906
+ "quantizations": [
2907
+ "Int4"
2908
+ ],
2909
+ "model_id": "qwen/Qwen2-7B-Instruct-AWQ",
2910
+ "model_hub": "modelscope"
2911
+ },
2912
+ {
2913
+ "model_format": "awq",
2914
+ "model_size_in_billions": 72,
2915
+ "quantizations": [
2916
+ "Int4"
2917
+ ],
2918
+ "model_id": "qwen/Qwen2-72B-Instruct-AWQ",
2919
+ "model_hub": "modelscope"
2920
+ },
2921
+ {
2922
+ "model_format": "ggufv2",
2923
+ "model_size_in_billions": "0_5",
2924
+ "quantizations": [
2925
+ "q2_k",
2926
+ "q3_k_m",
2927
+ "q4_0",
2928
+ "q4_k_m",
2929
+ "q5_0",
2930
+ "q5_k_m",
2931
+ "q6_k",
2932
+ "q8_0",
2933
+ "fp16"
2934
+ ],
2935
+ "model_id": "qwen/Qwen2-0.5B-Instruct-GGUF",
2936
+ "model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf",
2937
+ "model_hub": "modelscope"
2938
+ }
2939
+ ],
2940
+ "prompt_style": {
2941
+ "style_name": "QWEN",
2942
+ "system_prompt": "You are a helpful assistant.",
2943
+ "roles": [
2944
+ "user",
2945
+ "assistant"
2946
+ ],
2947
+ "intra_message_sep": "\n",
2948
+ "stop_token_ids": [
2949
+ 151643,
2950
+ 151644,
2951
+ 151645
2952
+ ],
2953
+ "stop": [
2954
+ "<|endoftext|>",
2955
+ "<|im_start|>",
2956
+ "<|im_end|>"
2957
+ ]
2958
+ }
2959
+ },
2960
+ {
2961
+ "version": 1,
2962
+ "context_length": 32768,
2963
+ "model_name": "qwen2-moe-instruct",
2964
+ "model_lang": [
2965
+ "en",
2966
+ "zh"
2967
+ ],
2968
+ "model_ability": [
2969
+ "chat"
2970
+ ],
2971
+ "model_description": "Qwen2 is the new series of Qwen large language models. ",
2972
+ "model_specs": [
2973
+ {
2974
+ "model_format": "pytorch",
2975
+ "model_size_in_billions": 14,
2976
+ "quantizations": [
2977
+ "4-bit",
2978
+ "8-bit",
2979
+ "none"
2980
+ ],
2981
+ "model_id": "qwen/Qwen2-57B-A14B-Instruct",
2982
+ "model_hub": "modelscope"
2983
+ },
2984
+ {
2985
+ "model_format": "gptq",
2986
+ "model_size_in_billions": 14,
2987
+ "quantizations": [
2988
+ "Int4"
2989
+ ],
2990
+ "model_id": "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
2991
+ "model_hub": "modelscope"
2992
+ }
2993
+ ],
2994
+ "prompt_style": {
2995
+ "style_name": "QWEN",
2996
+ "system_prompt": "You are a helpful assistant.",
2997
+ "roles": [
2998
+ "user",
2999
+ "assistant"
3000
+ ],
3001
+ "intra_message_sep": "\n",
3002
+ "stop_token_ids": [
3003
+ 151643,
3004
+ 151644,
3005
+ 151645
3006
+ ],
3007
+ "stop": [
3008
+ "<|endoftext|>",
3009
+ "<|im_start|>",
3010
+ "<|im_end|>"
3011
+ ]
3012
+ }
3013
+ },
3014
+ {
3015
+ "version": 1,
3016
+ "context_length": 4096,
3017
+ "model_name": "deepseek-vl-chat",
3018
+ "model_lang": [
3019
+ "en",
3020
+ "zh"
3021
+ ],
3022
+ "model_ability": [
3023
+ "chat",
3024
+ "vision"
3025
+ ],
3026
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
3027
+ "model_specs": [
3028
+ {
3029
+ "model_format": "pytorch",
3030
+ "model_size_in_billions": "1_3",
3031
+ "quantizations": [
3032
+ "none"
3033
+ ],
3034
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
3035
+ "model_hub": "modelscope"
3036
+ },
3037
+ {
3038
+ "model_format": "pytorch",
3039
+ "model_size_in_billions": 7,
3040
+ "quantizations": [
3041
+ "none"
3042
+ ],
3043
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
3044
+ "model_hub": "modelscope"
3045
+ }
3046
+ ],
3047
+ "prompt_style": {
3048
+ "style_name": "DEEPSEEK_CHAT",
2565
3049
  "system_prompt": "<|begin▁of▁sentence|>",
2566
3050
  "roles": [
2567
3051
  "User",
@@ -3115,7 +3599,7 @@
3115
3599
  "chat",
3116
3600
  "vision"
3117
3601
  ],
3118
- "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
3602
+ "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
3119
3603
  "model_specs":[
3120
3604
  {
3121
3605
  "model_format":"pytorch",
@@ -3347,6 +3831,50 @@
3347
3831
  ]
3348
3832
  }
3349
3833
  },
3834
+ {
3835
+ "version":1,
3836
+ "context_length":2048,
3837
+ "model_name":"MiniCPM-Llama3-V-2_5",
3838
+ "model_lang":[
3839
+ "en",
3840
+ "zh"
3841
+ ],
3842
+ "model_ability":[
3843
+ "chat",
3844
+ "vision"
3845
+ ],
3846
+ "model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
3847
+ "model_specs":[
3848
+ {
3849
+ "model_format":"pytorch",
3850
+ "model_size_in_billions":8,
3851
+ "quantizations":[
3852
+ "none"
3853
+ ],
3854
+ "model_hub": "modelscope",
3855
+ "model_id":"OpenBMB/MiniCPM-Llama3-V-2_5",
3856
+ "model_revision":"master"
3857
+ },
3858
+ {
3859
+ "model_format":"pytorch",
3860
+ "model_size_in_billions":8,
3861
+ "quantizations":[
3862
+ "int4"
3863
+ ],
3864
+ "model_hub": "modelscope",
3865
+ "model_id":"OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
3866
+ "model_revision":"master"
3867
+ }
3868
+ ],
3869
+ "prompt_style":{
3870
+ "style_name":"OmniLMM",
3871
+ "system_prompt":"The role of first msg should be user",
3872
+ "roles":[
3873
+ "user",
3874
+ "assistant"
3875
+ ]
3876
+ }
3877
+ },
3350
3878
  {
3351
3879
  "version": 1,
3352
3880
  "context_length": 2048,
@@ -3739,5 +4267,150 @@
3739
4267
  "<|im_end|>"
3740
4268
  ]
3741
4269
  }
3742
- }
4270
+ },
4271
+ {
4272
+ "version": 1,
4273
+ "context_length": 8192,
4274
+ "model_name": "cogvlm2",
4275
+ "model_lang": [
4276
+ "en",
4277
+ "zh"
4278
+ ],
4279
+ "model_ability": [
4280
+ "chat",
4281
+ "vision"
4282
+ ],
4283
+ "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
4284
+ "model_specs": [
4285
+ {
4286
+ "model_format": "pytorch",
4287
+ "model_size_in_billions": 20,
4288
+ "quantizations": [
4289
+ "none"
4290
+ ],
4291
+ "model_hub": "modelscope",
4292
+ "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B",
4293
+ "model_revision": "master"
4294
+ },
4295
+ {
4296
+ "model_format": "pytorch",
4297
+ "model_size_in_billions": 20,
4298
+ "quantizations": [
4299
+ "int4"
4300
+ ],
4301
+ "model_hub": "modelscope",
4302
+ "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
4303
+ "model_revision": "master"
4304
+ }
4305
+ ],
4306
+ "prompt_style": {
4307
+ "style_name": "LLAMA3",
4308
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
4309
+ "roles": [
4310
+ "user",
4311
+ "assistant"
4312
+ ],
4313
+ "intra_message_sep": "\n\n",
4314
+ "inter_message_sep": "<|eot_id|>",
4315
+ "stop_token_ids": [
4316
+ 128001,
4317
+ 128009
4318
+ ],
4319
+ "stop": [
4320
+ "<|end_of_text|>",
4321
+ "<|eot_id|>"
4322
+ ]
4323
+ }
4324
+ },
4325
+ {
4326
+ "version": 1,
4327
+ "context_length": 8192,
4328
+ "model_name": "telechat",
4329
+ "model_lang": [
4330
+ "en",
4331
+ "zh"
4332
+ ],
4333
+ "model_ability": [
4334
+ "chat"
4335
+ ],
4336
+ "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
4337
+ "model_specs": [
4338
+ {
4339
+ "model_format": "pytorch",
4340
+ "model_size_in_billions": 7,
4341
+ "quantizations": [
4342
+ "4-bit",
4343
+ "8-bit",
4344
+ "none"
4345
+ ],
4346
+ "model_id": "TeleAI/telechat-7B",
4347
+ "model_hub": "modelscope",
4348
+ "model_revision": "master"
4349
+ },
4350
+ {
4351
+ "model_format": "gptq",
4352
+ "model_size_in_billions": 7,
4353
+ "quantizations": [
4354
+ "int4",
4355
+ "int8"
4356
+ ],
4357
+ "model_id": "TeleAI/telechat-7B-{quantization}",
4358
+ "model_hub": "modelscope",
4359
+ "model_revision": "master"
4360
+ },
4361
+ {
4362
+ "model_format": "pytorch",
4363
+ "model_size_in_billions": 12,
4364
+ "quantizations": [
4365
+ "4-bit",
4366
+ "8-bit",
4367
+ "none"
4368
+ ],
4369
+ "model_id": "TeleAI/TeleChat-12B",
4370
+ "model_hub": "modelscope",
4371
+ "model_revision": "master"
4372
+ },
4373
+ {
4374
+ "model_format": "gptq",
4375
+ "model_size_in_billions": 12,
4376
+ "quantizations": [
4377
+ "int4",
4378
+ "int8"
4379
+ ],
4380
+ "model_id": "TeleAI/TeleChat-12B-{quantization}",
4381
+ "model_hub": "modelscope",
4382
+ "model_revision": "master"
4383
+ },
4384
+ {
4385
+ "model_format": "pytorch",
4386
+ "model_size_in_billions": 52,
4387
+ "quantizations": [
4388
+ "4-bit",
4389
+ "8-bit",
4390
+ "none"
4391
+ ],
4392
+ "model_id": "TeleAI/TeleChat-52B",
4393
+ "model_hub": "modelscope",
4394
+ "model_revision": "master"
4395
+ }
4396
+ ],
4397
+ "prompt_style": {
4398
+ "style_name": "NO_COLON_TWO",
4399
+ "system_prompt": "You are a helpful assistant.",
4400
+ "roles": [
4401
+ "<_user>",
4402
+ "<_bot>"
4403
+ ],
4404
+ "intra_message_sep": "",
4405
+ "inter_message_sep": "",
4406
+ "stop": [
4407
+ "<_end>",
4408
+ "<_start>"
4409
+ ],
4410
+ "stop_token_ids": [
4411
+ 160133,
4412
+ 160132
4413
+ ]
4414
+ }
4415
+ }
3743
4416
  ]