xinference 0.11.3__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

@@ -4,6 +4,7 @@
4
4
  "model_family": "whisper",
5
5
  "model_id": "openai/whisper-tiny",
6
6
  "model_revision": "167c219b21f11ef214220b8fdb7536b8a88c2475",
7
+ "ability": "audio-to-text",
7
8
  "multilingual": true
8
9
  },
9
10
  {
@@ -11,6 +12,7 @@
11
12
  "model_family": "whisper",
12
13
  "model_id": "openai/whisper-tiny.en",
13
14
  "model_revision": "87c7102498dcde7456f24cfd30239ca606ed9063",
15
+ "ability": "audio-to-text",
14
16
  "multilingual": false
15
17
  },
16
18
  {
@@ -18,6 +20,7 @@
18
20
  "model_family": "whisper",
19
21
  "model_id": "openai/whisper-base",
20
22
  "model_revision": "8c1db9b51951100007a96a525d83a8ec81b3c237",
23
+ "ability": "audio-to-text",
21
24
  "multilingual": true
22
25
  },
23
26
  {
@@ -25,6 +28,7 @@
25
28
  "model_family": "whisper",
26
29
  "model_id": "openai/whisper-base.en",
27
30
  "model_revision": "911407f4214e0e1d82085af863093ec0b66f9cd6",
31
+ "ability": "audio-to-text",
28
32
  "multilingual": false
29
33
  },
30
34
  {
@@ -32,6 +36,7 @@
32
36
  "model_family": "whisper",
33
37
  "model_id": "openai/whisper-small",
34
38
  "model_revision": "998cb1a777c20db53d6033a61b977ed4c3792cac",
39
+ "ability": "audio-to-text",
35
40
  "multilingual": true
36
41
  },
37
42
  {
@@ -39,6 +44,7 @@
39
44
  "model_family": "whisper",
40
45
  "model_id": "openai/whisper-small.en",
41
46
  "model_revision": "e8727524f962ee844a7319d92be39ac1bd25655a",
47
+ "ability": "audio-to-text",
42
48
  "multilingual": false
43
49
  },
44
50
  {
@@ -46,6 +52,7 @@
46
52
  "model_family": "whisper",
47
53
  "model_id": "openai/whisper-medium",
48
54
  "model_revision": "16688beb1294bedd0a6f5cd86fe7eec57bce41ed",
55
+ "ability": "audio-to-text",
49
56
  "multilingual": true
50
57
  },
51
58
  {
@@ -53,6 +60,7 @@
53
60
  "model_family": "whisper",
54
61
  "model_id": "openai/whisper-medium.en",
55
62
  "model_revision": "2e98eb6279edf5095af0c8dedb36bdec0acd172b",
63
+ "ability": "audio-to-text",
56
64
  "multilingual": false
57
65
  },
58
66
  {
@@ -60,6 +68,7 @@
60
68
  "model_family": "whisper",
61
69
  "model_id": "openai/whisper-large-v3",
62
70
  "model_revision": "6cdf07a7e3ec3806e5d55f787915b85d4cd020b1",
71
+ "ability": "audio-to-text",
63
72
  "multilingual": true
64
73
  },
65
74
  {
@@ -67,6 +76,7 @@
67
76
  "model_family": "whisper",
68
77
  "model_id": "BELLE-2/Belle-distilwhisper-large-v2-zh",
69
78
  "model_revision": "ed25d13498fa5bac758b2fc479435b698532dfe8",
79
+ "ability": "audio-to-text",
70
80
  "multilingual": false
71
81
  },
72
82
  {
@@ -74,6 +84,7 @@
74
84
  "model_family": "whisper",
75
85
  "model_id": "BELLE-2/Belle-whisper-large-v2-zh",
76
86
  "model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
87
+ "ability": "audio-to-text",
77
88
  "multilingual": false
78
89
  },
79
90
  {
@@ -81,6 +92,15 @@
81
92
  "model_family": "whisper",
82
93
  "model_id": "BELLE-2/Belle-whisper-large-v3-zh",
83
94
  "model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
95
+ "ability": "audio-to-text",
84
96
  "multilingual": false
97
+ },
98
+ {
99
+ "model_name": "ChatTTS",
100
+ "model_family": "ChatTTS",
101
+ "model_id": "2Noise/ChatTTS",
102
+ "model_revision": "ce5913842aebd78e4a01a02d47244b8d62ac4ee3",
103
+ "ability": "text-to-audio",
104
+ "multilingual": true
85
105
  }
86
106
  ]
@@ -117,9 +117,11 @@ def _install():
117
117
  from .pytorch.core import PytorchChatModel, PytorchModel
118
118
  from .pytorch.deepseek_vl import DeepSeekVLChatModel
119
119
  from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
120
+ from .pytorch.glm4v import Glm4VModel
120
121
  from .pytorch.intern_vl import InternVLChatModel
121
122
  from .pytorch.internlm2 import Internlm2PytorchChatModel
122
123
  from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
124
+ from .pytorch.minicpmv25 import MiniCPMV25Model
123
125
  from .pytorch.qwen_vl import QwenVLChatModel
124
126
  from .pytorch.vicuna import VicunaPytorchChatModel
125
127
  from .pytorch.yi_vl import YiVLChatModel
@@ -161,6 +163,8 @@ def _install():
161
163
  InternVLChatModel,
162
164
  PytorchModel,
163
165
  CogVLM2Model,
166
+ MiniCPMV25Model,
167
+ Glm4VModel,
164
168
  ]
165
169
  )
166
170
  if OmniLMMModel: # type: ignore
@@ -831,6 +831,139 @@
831
831
  ]
832
832
  }
833
833
  },
834
+ {
835
+ "version": 1,
836
+ "context_length": 131072,
837
+ "model_name": "glm4-chat",
838
+ "model_lang": [
839
+ "en",
840
+ "zh"
841
+ ],
842
+ "model_ability": [
843
+ "chat",
844
+ "tools"
845
+ ],
846
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
847
+ "model_specs": [
848
+ {
849
+ "model_format": "pytorch",
850
+ "model_size_in_billions": 9,
851
+ "quantizations": [
852
+ "4-bit",
853
+ "8-bit",
854
+ "none"
855
+ ],
856
+ "model_id": "THUDM/glm-4-9b-chat",
857
+ "model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
858
+ }
859
+ ],
860
+ "prompt_style": {
861
+ "style_name": "CHATGLM3",
862
+ "system_prompt": "",
863
+ "roles": [
864
+ "user",
865
+ "assistant"
866
+ ],
867
+ "stop_token_ids": [
868
+ 151329,
869
+ 151336,
870
+ 151338
871
+ ],
872
+ "stop": [
873
+ "<|endoftext|>",
874
+ "<|user|>",
875
+ "<|observation|>"
876
+ ]
877
+ }
878
+ },
879
+ {
880
+ "version": 1,
881
+ "context_length": 1048576,
882
+ "model_name": "glm4-chat-1m",
883
+ "model_lang": [
884
+ "en",
885
+ "zh"
886
+ ],
887
+ "model_ability": [
888
+ "chat",
889
+ "tools"
890
+ ],
891
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
892
+ "model_specs": [
893
+ {
894
+ "model_format": "pytorch",
895
+ "model_size_in_billions": 9,
896
+ "quantizations": [
897
+ "4-bit",
898
+ "8-bit",
899
+ "none"
900
+ ],
901
+ "model_id": "THUDM/glm-4-9b-chat-1m",
902
+ "model_revision": "715ddbe91082f976ff6a4ca06d59e5bbff6c3642"
903
+ }
904
+ ],
905
+ "prompt_style": {
906
+ "style_name": "CHATGLM3",
907
+ "system_prompt": "",
908
+ "roles": [
909
+ "user",
910
+ "assistant"
911
+ ],
912
+ "stop_token_ids": [
913
+ 151329,
914
+ 151336,
915
+ 151338
916
+ ],
917
+ "stop": [
918
+ "<|endoftext|>",
919
+ "<|user|>",
920
+ "<|observation|>"
921
+ ]
922
+ }
923
+ },
924
+ {
925
+ "version": 1,
926
+ "context_length": 8192,
927
+ "model_name": "glm-4v",
928
+ "model_lang": [
929
+ "en",
930
+ "zh"
931
+ ],
932
+ "model_ability": [
933
+ "chat",
934
+ "vision"
935
+ ],
936
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
937
+ "model_specs": [
938
+ {
939
+ "model_format": "pytorch",
940
+ "model_size_in_billions": 9,
941
+ "quantizations": [
942
+ "none"
943
+ ],
944
+ "model_id": "THUDM/glm-4v-9b",
945
+ "model_revision": "e8b84fefc07e58a90c8489337675573fda95e289"
946
+ }
947
+ ],
948
+ "prompt_style": {
949
+ "style_name": "CHATGLM3",
950
+ "system_prompt": "",
951
+ "roles": [
952
+ "user",
953
+ "assistant"
954
+ ],
955
+ "stop_token_ids": [
956
+ 151329,
957
+ 151336,
958
+ 151338
959
+ ],
960
+ "stop": [
961
+ "<|endoftext|>",
962
+ "<|user|>",
963
+ "<|observation|>"
964
+ ]
965
+ }
966
+ },
834
967
  {
835
968
  "version": 1,
836
969
  "context_length": 2048,
@@ -2291,6 +2424,218 @@
2291
2424
  ]
2292
2425
  }
2293
2426
  },
2427
+ {
2428
+ "version": 1,
2429
+ "context_length": 32768,
2430
+ "model_name": "qwen2-instruct",
2431
+ "model_lang": [
2432
+ "en",
2433
+ "zh"
2434
+ ],
2435
+ "model_ability": [
2436
+ "chat",
2437
+ "tools"
2438
+ ],
2439
+ "model_description": "Qwen2 is the new series of Qwen large language models",
2440
+ "model_specs": [
2441
+ {
2442
+ "model_format": "pytorch",
2443
+ "model_size_in_billions": "0_5",
2444
+ "quantizations": [
2445
+ "4-bit",
2446
+ "8-bit",
2447
+ "none"
2448
+ ],
2449
+ "model_id": "Qwen/Qwen2-0.5B-Instruct"
2450
+ },
2451
+ {
2452
+ "model_format": "pytorch",
2453
+ "model_size_in_billions": "1_5",
2454
+ "quantizations": [
2455
+ "4-bit",
2456
+ "8-bit",
2457
+ "none"
2458
+ ],
2459
+ "model_id": "Qwen/Qwen2-1.5B-Instruct"
2460
+ },
2461
+ {
2462
+ "model_format": "pytorch",
2463
+ "model_size_in_billions": 7,
2464
+ "quantizations": [
2465
+ "4-bit",
2466
+ "8-bit",
2467
+ "none"
2468
+ ],
2469
+ "model_id": "Qwen/Qwen2-7B-Instruct"
2470
+ },
2471
+ {
2472
+ "model_format": "pytorch",
2473
+ "model_size_in_billions": 72,
2474
+ "quantizations": [
2475
+ "4-bit",
2476
+ "8-bit",
2477
+ "none"
2478
+ ],
2479
+ "model_id": "Qwen/Qwen2-72B-Instruct"
2480
+ },
2481
+ {
2482
+ "model_format": "gptq",
2483
+ "model_size_in_billions": "0_5",
2484
+ "quantizations": [
2485
+ "Int4",
2486
+ "Int8"
2487
+ ],
2488
+ "model_id": "Qwen/Qwen2-0.5B-Instruct-GPTQ-{quantization}"
2489
+ },
2490
+ {
2491
+ "model_format": "gptq",
2492
+ "model_size_in_billions": "1_5",
2493
+ "quantizations": [
2494
+ "Int4",
2495
+ "Int8"
2496
+ ],
2497
+ "model_id": "Qwen/Qwen2-1.5B-Instruct-GPTQ-{quantization}"
2498
+ },
2499
+ {
2500
+ "model_format": "gptq",
2501
+ "model_size_in_billions": 7,
2502
+ "quantizations": [
2503
+ "Int4",
2504
+ "Int8"
2505
+ ],
2506
+ "model_id": "Qwen/Qwen2-7B-Instruct-GPTQ-{quantization}"
2507
+ },
2508
+ {
2509
+ "model_format": "gptq",
2510
+ "model_size_in_billions": 72,
2511
+ "quantizations": [
2512
+ "Int4",
2513
+ "Int8"
2514
+ ],
2515
+ "model_id": "Qwen/Qwen2-72B-Instruct-GPTQ-{quantization}"
2516
+ },
2517
+ {
2518
+ "model_format": "awq",
2519
+ "model_size_in_billions": "0_5",
2520
+ "quantizations": [
2521
+ "Int4"
2522
+ ],
2523
+ "model_id": "Qwen/Qwen2-0.5B-Instruct-AWQ"
2524
+ },
2525
+ {
2526
+ "model_format": "awq",
2527
+ "model_size_in_billions": "1_5",
2528
+ "quantizations": [
2529
+ "Int4"
2530
+ ],
2531
+ "model_id": "Qwen/Qwen2-1.5B-Instruct-AWQ"
2532
+ },
2533
+ {
2534
+ "model_format": "awq",
2535
+ "model_size_in_billions": 7,
2536
+ "quantizations": [
2537
+ "Int4"
2538
+ ],
2539
+ "model_id": "Qwen/Qwen2-7B-Instruct-AWQ"
2540
+ },
2541
+ {
2542
+ "model_format": "awq",
2543
+ "model_size_in_billions": 72,
2544
+ "quantizations": [
2545
+ "Int4"
2546
+ ],
2547
+ "model_id": "Qwen/Qwen2-72B-Instruct-AWQ"
2548
+ },
2549
+ {
2550
+ "model_format": "ggufv2",
2551
+ "model_size_in_billions": "0_5",
2552
+ "quantizations": [
2553
+ "q2_k",
2554
+ "q3_k_m",
2555
+ "q4_0",
2556
+ "q4_k_m",
2557
+ "q5_0",
2558
+ "q5_k_m",
2559
+ "q6_k",
2560
+ "q8_0",
2561
+ "fp16"
2562
+ ],
2563
+ "model_id": "Qwen/Qwen2-0.5B-Instruct-GGUF",
2564
+ "model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf"
2565
+ }
2566
+ ],
2567
+ "prompt_style": {
2568
+ "style_name": "QWEN",
2569
+ "system_prompt": "You are a helpful assistant.",
2570
+ "roles": [
2571
+ "user",
2572
+ "assistant"
2573
+ ],
2574
+ "intra_message_sep": "\n",
2575
+ "stop_token_ids": [
2576
+ 151643,
2577
+ 151644,
2578
+ 151645
2579
+ ],
2580
+ "stop": [
2581
+ "<|endoftext|>",
2582
+ "<|im_start|>",
2583
+ "<|im_end|>"
2584
+ ]
2585
+ }
2586
+ },
2587
+ {
2588
+ "version": 1,
2589
+ "context_length": 32768,
2590
+ "model_name": "qwen2-moe-instruct",
2591
+ "model_lang": [
2592
+ "en",
2593
+ "zh"
2594
+ ],
2595
+ "model_ability": [
2596
+ "chat"
2597
+ ],
2598
+ "model_description": "Qwen2 is the new series of Qwen large language models. ",
2599
+ "model_specs": [
2600
+ {
2601
+ "model_format": "pytorch",
2602
+ "model_size_in_billions": 14,
2603
+ "quantizations": [
2604
+ "4-bit",
2605
+ "8-bit",
2606
+ "none"
2607
+ ],
2608
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct"
2609
+ },
2610
+ {
2611
+ "model_format": "gptq",
2612
+ "model_size_in_billions": 14,
2613
+ "quantizations": [
2614
+ "Int4"
2615
+ ],
2616
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
2617
+ }
2618
+ ],
2619
+ "prompt_style": {
2620
+ "style_name": "QWEN",
2621
+ "system_prompt": "You are a helpful assistant.",
2622
+ "roles": [
2623
+ "user",
2624
+ "assistant"
2625
+ ],
2626
+ "intra_message_sep": "\n",
2627
+ "stop_token_ids": [
2628
+ 151643,
2629
+ 151644,
2630
+ 151645
2631
+ ],
2632
+ "stop": [
2633
+ "<|endoftext|>",
2634
+ "<|im_start|>",
2635
+ "<|im_end|>"
2636
+ ]
2637
+ }
2638
+ },
2294
2639
  {
2295
2640
  "version": 1,
2296
2641
  "context_length": 8192,
@@ -3251,6 +3596,125 @@
3251
3596
  ]
3252
3597
  }
3253
3598
  },
3599
+ {
3600
+ "version": 1,
3601
+ "context_length": 32768,
3602
+ "model_name": "mistral-instruct-v0.3",
3603
+ "model_lang": [
3604
+ "en"
3605
+ ],
3606
+ "model_ability": [
3607
+ "chat"
3608
+ ],
3609
+ "model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
3610
+ "model_specs": [
3611
+ {
3612
+ "model_format": "pytorch",
3613
+ "model_size_in_billions": 7,
3614
+ "quantizations": [
3615
+ "4-bit",
3616
+ "8-bit",
3617
+ "none"
3618
+ ],
3619
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
3620
+ "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
3621
+ },
3622
+ {
3623
+ "model_format": "gptq",
3624
+ "model_size_in_billions": 7,
3625
+ "quantizations": [
3626
+ "Int4"
3627
+ ],
3628
+ "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
3629
+ },
3630
+ {
3631
+ "model_format": "awq",
3632
+ "model_size_in_billions": 7,
3633
+ "quantizations": [
3634
+ "Int4"
3635
+ ],
3636
+ "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
3637
+ },
3638
+ {
3639
+ "model_format": "ggufv2",
3640
+ "model_size_in_billions": 7,
3641
+ "quantizations": [
3642
+ "Q2_K",
3643
+ "Q3_K_S",
3644
+ "Q3_K_M",
3645
+ "Q3_K_L",
3646
+ "Q4_K_S",
3647
+ "Q4_K_M",
3648
+ "Q5_K_S",
3649
+ "Q5_K_M",
3650
+ "Q6_K",
3651
+ "Q8_0",
3652
+ "fp16"
3653
+ ],
3654
+ "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
3655
+ "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
3656
+ }
3657
+ ],
3658
+ "prompt_style": {
3659
+ "style_name": "LLAMA2",
3660
+ "system_prompt": "[INST] ",
3661
+ "roles": [
3662
+ "[INST]",
3663
+ "[/INST]"
3664
+ ],
3665
+ "intra_message_sep": " ",
3666
+ "inter_message_sep": "<s>",
3667
+ "stop_token_ids": [
3668
+ 2
3669
+ ],
3670
+ "stop": [
3671
+ "</s>"
3672
+ ]
3673
+ }
3674
+ },
3675
+ {
3676
+ "version": 1,
3677
+ "context_length": 32768,
3678
+ "model_name": "codestral-v0.1",
3679
+ "model_lang": [
3680
+ "en"
3681
+ ],
3682
+ "model_ability": [
3683
+ "generate"
3684
+ ],
3685
+ "model_description": "Codestrall-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash",
3686
+ "model_specs": [
3687
+ {
3688
+ "model_format": "pytorch",
3689
+ "model_size_in_billions": 22,
3690
+ "quantizations": [
3691
+ "4-bit",
3692
+ "8-bit",
3693
+ "none"
3694
+ ],
3695
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
3696
+ "model_revision": "9552e7b1d9b2d5bbd87a5aa7221817285dbb6366"
3697
+ },
3698
+ {
3699
+ "model_format": "ggufv2",
3700
+ "model_size_in_billions": 22,
3701
+ "quantizations": [
3702
+ "Q2_K",
3703
+ "Q3_K_S",
3704
+ "Q3_K_M",
3705
+ "Q3_K_L",
3706
+ "Q4_K_S",
3707
+ "Q4_K_M",
3708
+ "Q5_K_S",
3709
+ "Q5_K_M",
3710
+ "Q6_K",
3711
+ "Q8_0"
3712
+ ],
3713
+ "model_id": "bartowski/Codestral-22B-v0.1-GGUF",
3714
+ "model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
3715
+ }
3716
+ ]
3717
+ },
3254
3718
  {
3255
3719
  "version": 1,
3256
3720
  "context_length": 8192,
@@ -5258,6 +5722,48 @@
5258
5722
  ]
5259
5723
  }
5260
5724
  },
5725
+ {
5726
+ "version":1,
5727
+ "context_length":2048,
5728
+ "model_name":"MiniCPM-Llama3-V-2_5",
5729
+ "model_lang":[
5730
+ "en",
5731
+ "zh"
5732
+ ],
5733
+ "model_ability":[
5734
+ "chat",
5735
+ "vision"
5736
+ ],
5737
+ "model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
5738
+ "model_specs":[
5739
+ {
5740
+ "model_format":"pytorch",
5741
+ "model_size_in_billions":8,
5742
+ "quantizations":[
5743
+ "none"
5744
+ ],
5745
+ "model_id":"openbmb/MiniCPM-Llama3-V-2_5",
5746
+ "model_revision":"285a637ba8a30a0660dfcccad16f9a864f75abfd"
5747
+ },
5748
+ {
5749
+ "model_format":"pytorch",
5750
+ "model_size_in_billions":8,
5751
+ "quantizations":[
5752
+ "int4"
5753
+ ],
5754
+ "model_id":"openbmb/MiniCPM-Llama3-V-2_5-{quantization}",
5755
+ "model_revision":"f92aff28552de35de3be204e8fe292dd4824e544"
5756
+ }
5757
+ ],
5758
+ "prompt_style":{
5759
+ "style_name":"OmniLMM",
5760
+ "system_prompt":"The role of first msg should be user",
5761
+ "roles":[
5762
+ "user",
5763
+ "assistant"
5764
+ ]
5765
+ }
5766
+ },
5261
5767
  {
5262
5768
  "version": 1,
5263
5769
  "context_length": 4096,
@@ -6277,7 +6783,7 @@
6277
6783
  "quantizations": [
6278
6784
  "int4"
6279
6785
  ],
6280
- "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantizations}",
6786
+ "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantization}",
6281
6787
  "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
6282
6788
  }
6283
6789
  ],