xinference 0.11.2.post1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (36) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +83 -8
  3. xinference/client/restful/restful_client.py +70 -0
  4. xinference/constants.py +8 -0
  5. xinference/core/__init__.py +0 -2
  6. xinference/core/cache_tracker.py +22 -1
  7. xinference/core/chat_interface.py +71 -10
  8. xinference/core/model.py +141 -12
  9. xinference/core/scheduler.py +428 -0
  10. xinference/core/supervisor.py +31 -3
  11. xinference/core/worker.py +8 -3
  12. xinference/isolation.py +9 -2
  13. xinference/model/audio/chattts.py +84 -0
  14. xinference/model/audio/core.py +10 -3
  15. xinference/model/audio/model_spec.json +20 -0
  16. xinference/model/llm/__init__.py +6 -0
  17. xinference/model/llm/llm_family.json +1063 -260
  18. xinference/model/llm/llm_family_modelscope.json +686 -13
  19. xinference/model/llm/pytorch/baichuan.py +2 -1
  20. xinference/model/llm/pytorch/chatglm.py +2 -1
  21. xinference/model/llm/pytorch/cogvlm2.py +316 -0
  22. xinference/model/llm/pytorch/core.py +92 -6
  23. xinference/model/llm/pytorch/glm4v.py +258 -0
  24. xinference/model/llm/pytorch/intern_vl.py +5 -10
  25. xinference/model/llm/pytorch/minicpmv25.py +232 -0
  26. xinference/model/llm/pytorch/utils.py +386 -2
  27. xinference/model/llm/vllm/core.py +7 -1
  28. xinference/thirdparty/ChatTTS/__init__.py +1 -0
  29. xinference/thirdparty/ChatTTS/core.py +200 -0
  30. xinference/types.py +3 -0
  31. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/METADATA +28 -11
  32. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/RECORD +36 -29
  33. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/LICENSE +0 -0
  34. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/WHEEL +0 -0
  35. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/entry_points.txt +0 -0
  36. {xinference-0.11.2.post1.dist-info → xinference-0.12.0.dist-info}/top_level.txt +0 -0
@@ -831,6 +831,139 @@
831
831
  ]
832
832
  }
833
833
  },
834
+ {
835
+ "version": 1,
836
+ "context_length": 131072,
837
+ "model_name": "glm4-chat",
838
+ "model_lang": [
839
+ "en",
840
+ "zh"
841
+ ],
842
+ "model_ability": [
843
+ "chat",
844
+ "tools"
845
+ ],
846
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
847
+ "model_specs": [
848
+ {
849
+ "model_format": "pytorch",
850
+ "model_size_in_billions": 9,
851
+ "quantizations": [
852
+ "4-bit",
853
+ "8-bit",
854
+ "none"
855
+ ],
856
+ "model_id": "THUDM/glm-4-9b-chat",
857
+ "model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
858
+ }
859
+ ],
860
+ "prompt_style": {
861
+ "style_name": "CHATGLM3",
862
+ "system_prompt": "",
863
+ "roles": [
864
+ "user",
865
+ "assistant"
866
+ ],
867
+ "stop_token_ids": [
868
+ 151329,
869
+ 151336,
870
+ 151338
871
+ ],
872
+ "stop": [
873
+ "<|endoftext|>",
874
+ "<|user|>",
875
+ "<|observation|>"
876
+ ]
877
+ }
878
+ },
879
+ {
880
+ "version": 1,
881
+ "context_length": 1048576,
882
+ "model_name": "glm4-chat-1m",
883
+ "model_lang": [
884
+ "en",
885
+ "zh"
886
+ ],
887
+ "model_ability": [
888
+ "chat",
889
+ "tools"
890
+ ],
891
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
892
+ "model_specs": [
893
+ {
894
+ "model_format": "pytorch",
895
+ "model_size_in_billions": 9,
896
+ "quantizations": [
897
+ "4-bit",
898
+ "8-bit",
899
+ "none"
900
+ ],
901
+ "model_id": "THUDM/glm-4-9b-chat-1m",
902
+ "model_revision": "715ddbe91082f976ff6a4ca06d59e5bbff6c3642"
903
+ }
904
+ ],
905
+ "prompt_style": {
906
+ "style_name": "CHATGLM3",
907
+ "system_prompt": "",
908
+ "roles": [
909
+ "user",
910
+ "assistant"
911
+ ],
912
+ "stop_token_ids": [
913
+ 151329,
914
+ 151336,
915
+ 151338
916
+ ],
917
+ "stop": [
918
+ "<|endoftext|>",
919
+ "<|user|>",
920
+ "<|observation|>"
921
+ ]
922
+ }
923
+ },
924
+ {
925
+ "version": 1,
926
+ "context_length": 8192,
927
+ "model_name": "glm-4v",
928
+ "model_lang": [
929
+ "en",
930
+ "zh"
931
+ ],
932
+ "model_ability": [
933
+ "chat",
934
+ "vision"
935
+ ],
936
+ "model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
937
+ "model_specs": [
938
+ {
939
+ "model_format": "pytorch",
940
+ "model_size_in_billions": 9,
941
+ "quantizations": [
942
+ "none"
943
+ ],
944
+ "model_id": "THUDM/glm-4v-9b",
945
+ "model_revision": "e8b84fefc07e58a90c8489337675573fda95e289"
946
+ }
947
+ ],
948
+ "prompt_style": {
949
+ "style_name": "CHATGLM3",
950
+ "system_prompt": "",
951
+ "roles": [
952
+ "user",
953
+ "assistant"
954
+ ],
955
+ "stop_token_ids": [
956
+ 151329,
957
+ 151336,
958
+ 151338
959
+ ],
960
+ "stop": [
961
+ "<|endoftext|>",
962
+ "<|user|>",
963
+ "<|observation|>"
964
+ ]
965
+ }
966
+ },
834
967
  {
835
968
  "version": 1,
836
969
  "context_length": 2048,
@@ -2293,176 +2426,388 @@
2293
2426
  },
2294
2427
  {
2295
2428
  "version": 1,
2296
- "context_length": 8192,
2297
- "model_name": "starcoder",
2298
- "model_lang": [
2299
- "en"
2300
- ],
2301
- "model_ability": [
2302
- "generate"
2303
- ],
2304
- "model_description": "Starcoder is an open-source Transformer based LLM that is trained on permissively licensed data from GitHub.",
2305
- "model_specs": [
2306
- {
2307
- "model_format": "ggmlv3",
2308
- "model_size_in_billions": 16,
2309
- "quantizations": [
2310
- "q4_0",
2311
- "q4_1",
2312
- "q5_0",
2313
- "q5_1",
2314
- "q8_0"
2315
- ],
2316
- "model_id": "TheBloke/starcoder-GGML",
2317
- "model_file_name_template": "starcoder.ggmlv3.{quantization}.bin"
2318
- }
2319
- ]
2320
- },
2321
- {
2322
- "version": 1,
2323
- "context_length": 1024,
2324
- "model_name": "gpt-2",
2325
- "model_lang": [
2326
- "en"
2327
- ],
2328
- "model_ability": [
2329
- "generate"
2330
- ],
2331
- "model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
2332
- "model_specs": [
2333
- {
2334
- "model_format": "pytorch",
2335
- "model_size_in_billions": "1_5",
2336
- "quantizations": [
2337
- "none"
2338
- ],
2339
- "model_id": "openai-community/gpt2",
2340
- "model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
2341
- }
2342
- ]
2343
- },
2344
- {
2345
- "version": 1,
2346
- "context_length": 8192,
2347
- "model_name": "internlm-7b",
2429
+ "context_length": 32768,
2430
+ "model_name": "qwen2-instruct",
2348
2431
  "model_lang": [
2349
2432
  "en",
2350
2433
  "zh"
2351
2434
  ],
2352
2435
  "model_ability": [
2353
- "generate"
2436
+ "chat",
2437
+ "tools"
2354
2438
  ],
2355
- "model_description": "InternLM is a Transformer-based LLM that is trained on both Chinese and English data, focusing on practical scenarios.",
2439
+ "model_description": "Qwen2 is the new series of Qwen large language models",
2356
2440
  "model_specs": [
2357
2441
  {
2358
2442
  "model_format": "pytorch",
2359
- "model_size_in_billions": 7,
2443
+ "model_size_in_billions": "0_5",
2360
2444
  "quantizations": [
2361
2445
  "4-bit",
2362
2446
  "8-bit",
2363
2447
  "none"
2364
2448
  ],
2365
- "model_id": "internlm/internlm-7b",
2366
- "model_revision": "592b0efc83be3eb1cba8990c4caf41ce604b958c"
2367
- }
2368
- ]
2369
- },
2370
- {
2371
- "version": 1,
2372
- "context_length": 4096,
2373
- "model_name": "internlm-chat-7b",
2374
- "model_lang": [
2375
- "en",
2376
- "zh"
2377
- ],
2378
- "model_ability": [
2379
- "chat"
2380
- ],
2381
- "model_description": "Internlm-chat is a fine-tuned version of the Internlm LLM, specializing in chatting.",
2382
- "model_specs": [
2449
+ "model_id": "Qwen/Qwen2-0.5B-Instruct"
2450
+ },
2383
2451
  {
2384
2452
  "model_format": "pytorch",
2385
- "model_size_in_billions": 7,
2453
+ "model_size_in_billions": "1_5",
2386
2454
  "quantizations": [
2387
2455
  "4-bit",
2388
2456
  "8-bit",
2389
2457
  "none"
2390
2458
  ],
2391
- "model_id": "internlm/internlm-chat-7b",
2392
- "model_revision": "d4fa2dbcbd2fa4edfa6735aa2ba0f0577fed6a62"
2393
- }
2394
- ],
2395
- "prompt_style": {
2396
- "style_name": "INTERNLM",
2397
- "system_prompt": "",
2398
- "roles": [
2399
- "<|User|>",
2400
- "<|Bot|>"
2401
- ],
2402
- "intra_message_sep": "<eoh>\n",
2403
- "inter_message_sep": "<eoa>\n",
2404
- "stop_token_ids": [
2405
- 1,
2406
- 103028
2407
- ],
2408
- "stop": [
2409
- "<eoa>"
2410
- ]
2411
- }
2412
- },
2413
- {
2414
- "version": 1,
2415
- "context_length": 16384,
2416
- "model_name": "internlm-20b",
2417
- "model_lang": [
2418
- "en",
2419
- "zh"
2420
- ],
2421
- "model_ability": [
2422
- "generate"
2423
- ],
2424
- "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
2425
- "model_specs": [
2459
+ "model_id": "Qwen/Qwen2-1.5B-Instruct"
2460
+ },
2426
2461
  {
2427
2462
  "model_format": "pytorch",
2428
- "model_size_in_billions": 20,
2463
+ "model_size_in_billions": 7,
2429
2464
  "quantizations": [
2430
2465
  "4-bit",
2431
2466
  "8-bit",
2432
2467
  "none"
2433
2468
  ],
2434
- "model_id": "internlm/internlm-20b",
2435
- "model_revision": "c56a72957239b490ea206ea857e86611b3f65f3a"
2436
- }
2437
- ]
2438
- },
2439
- {
2440
- "version": 1,
2441
- "context_length": 16384,
2442
- "model_name": "internlm-chat-20b",
2443
- "model_lang": [
2444
- "en",
2445
- "zh"
2446
- ],
2447
- "model_ability": [
2448
- "chat"
2449
- ],
2450
- "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data. The Chat version has undergone SFT and RLHF training.",
2451
- "model_specs": [
2469
+ "model_id": "Qwen/Qwen2-7B-Instruct"
2470
+ },
2452
2471
  {
2453
2472
  "model_format": "pytorch",
2454
- "model_size_in_billions": 20,
2473
+ "model_size_in_billions": 72,
2455
2474
  "quantizations": [
2456
2475
  "4-bit",
2457
2476
  "8-bit",
2458
2477
  "none"
2459
2478
  ],
2460
- "model_id": "internlm/internlm-chat-20b",
2461
- "model_revision": "c67e80e42c4950ebae18a955c9fe138c5ceb5b10"
2462
- }
2463
- ],
2464
- "prompt_style": {
2465
- "style_name": "INTERNLM",
2479
+ "model_id": "Qwen/Qwen2-72B-Instruct"
2480
+ },
2481
+ {
2482
+ "model_format": "gptq",
2483
+ "model_size_in_billions": "0_5",
2484
+ "quantizations": [
2485
+ "Int4",
2486
+ "Int8"
2487
+ ],
2488
+ "model_id": "Qwen/Qwen2-0.5B-Instruct-GPTQ-{quantization}"
2489
+ },
2490
+ {
2491
+ "model_format": "gptq",
2492
+ "model_size_in_billions": "1_5",
2493
+ "quantizations": [
2494
+ "Int4",
2495
+ "Int8"
2496
+ ],
2497
+ "model_id": "Qwen/Qwen2-1.5B-Instruct-GPTQ-{quantization}"
2498
+ },
2499
+ {
2500
+ "model_format": "gptq",
2501
+ "model_size_in_billions": 7,
2502
+ "quantizations": [
2503
+ "Int4",
2504
+ "Int8"
2505
+ ],
2506
+ "model_id": "Qwen/Qwen2-7B-Instruct-GPTQ-{quantization}"
2507
+ },
2508
+ {
2509
+ "model_format": "gptq",
2510
+ "model_size_in_billions": 72,
2511
+ "quantizations": [
2512
+ "Int4",
2513
+ "Int8"
2514
+ ],
2515
+ "model_id": "Qwen/Qwen2-72B-Instruct-GPTQ-{quantization}"
2516
+ },
2517
+ {
2518
+ "model_format": "awq",
2519
+ "model_size_in_billions": "0_5",
2520
+ "quantizations": [
2521
+ "Int4"
2522
+ ],
2523
+ "model_id": "Qwen/Qwen2-0.5B-Instruct-AWQ"
2524
+ },
2525
+ {
2526
+ "model_format": "awq",
2527
+ "model_size_in_billions": "1_5",
2528
+ "quantizations": [
2529
+ "Int4"
2530
+ ],
2531
+ "model_id": "Qwen/Qwen2-1.5B-Instruct-AWQ"
2532
+ },
2533
+ {
2534
+ "model_format": "awq",
2535
+ "model_size_in_billions": 7,
2536
+ "quantizations": [
2537
+ "Int4"
2538
+ ],
2539
+ "model_id": "Qwen/Qwen2-7B-Instruct-AWQ"
2540
+ },
2541
+ {
2542
+ "model_format": "awq",
2543
+ "model_size_in_billions": 72,
2544
+ "quantizations": [
2545
+ "Int4"
2546
+ ],
2547
+ "model_id": "Qwen/Qwen2-72B-Instruct-AWQ"
2548
+ },
2549
+ {
2550
+ "model_format": "ggufv2",
2551
+ "model_size_in_billions": "0_5",
2552
+ "quantizations": [
2553
+ "q2_k",
2554
+ "q3_k_m",
2555
+ "q4_0",
2556
+ "q4_k_m",
2557
+ "q5_0",
2558
+ "q5_k_m",
2559
+ "q6_k",
2560
+ "q8_0",
2561
+ "fp16"
2562
+ ],
2563
+ "model_id": "Qwen/Qwen2-0.5B-Instruct-GGUF",
2564
+ "model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf"
2565
+ }
2566
+ ],
2567
+ "prompt_style": {
2568
+ "style_name": "QWEN",
2569
+ "system_prompt": "You are a helpful assistant.",
2570
+ "roles": [
2571
+ "user",
2572
+ "assistant"
2573
+ ],
2574
+ "intra_message_sep": "\n",
2575
+ "stop_token_ids": [
2576
+ 151643,
2577
+ 151644,
2578
+ 151645
2579
+ ],
2580
+ "stop": [
2581
+ "<|endoftext|>",
2582
+ "<|im_start|>",
2583
+ "<|im_end|>"
2584
+ ]
2585
+ }
2586
+ },
2587
+ {
2588
+ "version": 1,
2589
+ "context_length": 32768,
2590
+ "model_name": "qwen2-moe-instruct",
2591
+ "model_lang": [
2592
+ "en",
2593
+ "zh"
2594
+ ],
2595
+ "model_ability": [
2596
+ "chat"
2597
+ ],
2598
+ "model_description": "Qwen2 is the new series of Qwen large language models. ",
2599
+ "model_specs": [
2600
+ {
2601
+ "model_format": "pytorch",
2602
+ "model_size_in_billions": 14,
2603
+ "quantizations": [
2604
+ "4-bit",
2605
+ "8-bit",
2606
+ "none"
2607
+ ],
2608
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct"
2609
+ },
2610
+ {
2611
+ "model_format": "gptq",
2612
+ "model_size_in_billions": 14,
2613
+ "quantizations": [
2614
+ "Int4"
2615
+ ],
2616
+ "model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
2617
+ }
2618
+ ],
2619
+ "prompt_style": {
2620
+ "style_name": "QWEN",
2621
+ "system_prompt": "You are a helpful assistant.",
2622
+ "roles": [
2623
+ "user",
2624
+ "assistant"
2625
+ ],
2626
+ "intra_message_sep": "\n",
2627
+ "stop_token_ids": [
2628
+ 151643,
2629
+ 151644,
2630
+ 151645
2631
+ ],
2632
+ "stop": [
2633
+ "<|endoftext|>",
2634
+ "<|im_start|>",
2635
+ "<|im_end|>"
2636
+ ]
2637
+ }
2638
+ },
2639
+ {
2640
+ "version": 1,
2641
+ "context_length": 8192,
2642
+ "model_name": "starcoder",
2643
+ "model_lang": [
2644
+ "en"
2645
+ ],
2646
+ "model_ability": [
2647
+ "generate"
2648
+ ],
2649
+ "model_description": "Starcoder is an open-source Transformer based LLM that is trained on permissively licensed data from GitHub.",
2650
+ "model_specs": [
2651
+ {
2652
+ "model_format": "ggmlv3",
2653
+ "model_size_in_billions": 16,
2654
+ "quantizations": [
2655
+ "q4_0",
2656
+ "q4_1",
2657
+ "q5_0",
2658
+ "q5_1",
2659
+ "q8_0"
2660
+ ],
2661
+ "model_id": "TheBloke/starcoder-GGML",
2662
+ "model_file_name_template": "starcoder.ggmlv3.{quantization}.bin"
2663
+ }
2664
+ ]
2665
+ },
2666
+ {
2667
+ "version": 1,
2668
+ "context_length": 1024,
2669
+ "model_name": "gpt-2",
2670
+ "model_lang": [
2671
+ "en"
2672
+ ],
2673
+ "model_ability": [
2674
+ "generate"
2675
+ ],
2676
+ "model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
2677
+ "model_specs": [
2678
+ {
2679
+ "model_format": "pytorch",
2680
+ "model_size_in_billions": "1_5",
2681
+ "quantizations": [
2682
+ "none"
2683
+ ],
2684
+ "model_id": "openai-community/gpt2",
2685
+ "model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
2686
+ }
2687
+ ]
2688
+ },
2689
+ {
2690
+ "version": 1,
2691
+ "context_length": 8192,
2692
+ "model_name": "internlm-7b",
2693
+ "model_lang": [
2694
+ "en",
2695
+ "zh"
2696
+ ],
2697
+ "model_ability": [
2698
+ "generate"
2699
+ ],
2700
+ "model_description": "InternLM is a Transformer-based LLM that is trained on both Chinese and English data, focusing on practical scenarios.",
2701
+ "model_specs": [
2702
+ {
2703
+ "model_format": "pytorch",
2704
+ "model_size_in_billions": 7,
2705
+ "quantizations": [
2706
+ "4-bit",
2707
+ "8-bit",
2708
+ "none"
2709
+ ],
2710
+ "model_id": "internlm/internlm-7b",
2711
+ "model_revision": "592b0efc83be3eb1cba8990c4caf41ce604b958c"
2712
+ }
2713
+ ]
2714
+ },
2715
+ {
2716
+ "version": 1,
2717
+ "context_length": 4096,
2718
+ "model_name": "internlm-chat-7b",
2719
+ "model_lang": [
2720
+ "en",
2721
+ "zh"
2722
+ ],
2723
+ "model_ability": [
2724
+ "chat"
2725
+ ],
2726
+ "model_description": "Internlm-chat is a fine-tuned version of the Internlm LLM, specializing in chatting.",
2727
+ "model_specs": [
2728
+ {
2729
+ "model_format": "pytorch",
2730
+ "model_size_in_billions": 7,
2731
+ "quantizations": [
2732
+ "4-bit",
2733
+ "8-bit",
2734
+ "none"
2735
+ ],
2736
+ "model_id": "internlm/internlm-chat-7b",
2737
+ "model_revision": "d4fa2dbcbd2fa4edfa6735aa2ba0f0577fed6a62"
2738
+ }
2739
+ ],
2740
+ "prompt_style": {
2741
+ "style_name": "INTERNLM",
2742
+ "system_prompt": "",
2743
+ "roles": [
2744
+ "<|User|>",
2745
+ "<|Bot|>"
2746
+ ],
2747
+ "intra_message_sep": "<eoh>\n",
2748
+ "inter_message_sep": "<eoa>\n",
2749
+ "stop_token_ids": [
2750
+ 1,
2751
+ 103028
2752
+ ],
2753
+ "stop": [
2754
+ "<eoa>"
2755
+ ]
2756
+ }
2757
+ },
2758
+ {
2759
+ "version": 1,
2760
+ "context_length": 16384,
2761
+ "model_name": "internlm-20b",
2762
+ "model_lang": [
2763
+ "en",
2764
+ "zh"
2765
+ ],
2766
+ "model_ability": [
2767
+ "generate"
2768
+ ],
2769
+ "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
2770
+ "model_specs": [
2771
+ {
2772
+ "model_format": "pytorch",
2773
+ "model_size_in_billions": 20,
2774
+ "quantizations": [
2775
+ "4-bit",
2776
+ "8-bit",
2777
+ "none"
2778
+ ],
2779
+ "model_id": "internlm/internlm-20b",
2780
+ "model_revision": "c56a72957239b490ea206ea857e86611b3f65f3a"
2781
+ }
2782
+ ]
2783
+ },
2784
+ {
2785
+ "version": 1,
2786
+ "context_length": 16384,
2787
+ "model_name": "internlm-chat-20b",
2788
+ "model_lang": [
2789
+ "en",
2790
+ "zh"
2791
+ ],
2792
+ "model_ability": [
2793
+ "chat"
2794
+ ],
2795
+ "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data. The Chat version has undergone SFT and RLHF training.",
2796
+ "model_specs": [
2797
+ {
2798
+ "model_format": "pytorch",
2799
+ "model_size_in_billions": 20,
2800
+ "quantizations": [
2801
+ "4-bit",
2802
+ "8-bit",
2803
+ "none"
2804
+ ],
2805
+ "model_id": "internlm/internlm-chat-20b",
2806
+ "model_revision": "c67e80e42c4950ebae18a955c9fe138c5ceb5b10"
2807
+ }
2808
+ ],
2809
+ "prompt_style": {
2810
+ "style_name": "INTERNLM",
2466
2811
  "system_prompt": "",
2467
2812
  "roles": [
2468
2813
  "<|User|>",
@@ -3211,7 +3556,84 @@
3211
3556
  "quantizations": [
3212
3557
  "Int4"
3213
3558
  ],
3214
- "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
3559
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
3560
+ },
3561
+ {
3562
+ "model_format": "ggufv2",
3563
+ "model_size_in_billions": 7,
3564
+ "quantizations": [
3565
+ "Q2_K",
3566
+ "Q3_K_S",
3567
+ "Q3_K_M",
3568
+ "Q3_K_L",
3569
+ "Q4_0",
3570
+ "Q4_K_S",
3571
+ "Q4_K_M",
3572
+ "Q5_0",
3573
+ "Q5_K_S",
3574
+ "Q5_K_M",
3575
+ "Q6_K",
3576
+ "Q8_0"
3577
+ ],
3578
+ "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
3579
+ "model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
3580
+ }
3581
+ ],
3582
+ "prompt_style": {
3583
+ "style_name": "LLAMA2",
3584
+ "system_prompt": "[INST] ",
3585
+ "roles": [
3586
+ "[INST]",
3587
+ "[/INST]"
3588
+ ],
3589
+ "intra_message_sep": " ",
3590
+ "inter_message_sep": "<s>",
3591
+ "stop_token_ids": [
3592
+ 2
3593
+ ],
3594
+ "stop": [
3595
+ "</s>"
3596
+ ]
3597
+ }
3598
+ },
3599
+ {
3600
+ "version": 1,
3601
+ "context_length": 32768,
3602
+ "model_name": "mistral-instruct-v0.3",
3603
+ "model_lang": [
3604
+ "en"
3605
+ ],
3606
+ "model_ability": [
3607
+ "chat"
3608
+ ],
3609
+ "model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
3610
+ "model_specs": [
3611
+ {
3612
+ "model_format": "pytorch",
3613
+ "model_size_in_billions": 7,
3614
+ "quantizations": [
3615
+ "4-bit",
3616
+ "8-bit",
3617
+ "none"
3618
+ ],
3619
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
3620
+ "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
3621
+ },
3622
+ {
3623
+ "model_format": "gptq",
3624
+ "model_size_in_billions": 7,
3625
+ "quantizations": [
3626
+ "Int4"
3627
+ ],
3628
+ "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
3629
+ },
3630
+ {
3631
+ "model_format": "awq",
3632
+ "model_size_in_billions": 7,
3633
+ "quantizations": [
3634
+ "Int4"
3635
+ ],
3636
+ "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
3215
3637
  },
3216
3638
  {
3217
3639
  "model_format": "ggufv2",
@@ -3221,17 +3643,16 @@
3221
3643
  "Q3_K_S",
3222
3644
  "Q3_K_M",
3223
3645
  "Q3_K_L",
3224
- "Q4_0",
3225
3646
  "Q4_K_S",
3226
3647
  "Q4_K_M",
3227
- "Q5_0",
3228
3648
  "Q5_K_S",
3229
3649
  "Q5_K_M",
3230
3650
  "Q6_K",
3231
- "Q8_0"
3651
+ "Q8_0",
3652
+ "fp16"
3232
3653
  ],
3233
- "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
3234
- "model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
3654
+ "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
3655
+ "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
3235
3656
  }
3236
3657
  ],
3237
3658
  "prompt_style": {
@@ -3251,6 +3672,49 @@
3251
3672
  ]
3252
3673
  }
3253
3674
  },
3675
+ {
3676
+ "version": 1,
3677
+ "context_length": 32768,
3678
+ "model_name": "codestral-v0.1",
3679
+ "model_lang": [
3680
+ "en"
3681
+ ],
3682
+ "model_ability": [
3683
+ "generate"
3684
+ ],
3685
+ "model_description": "Codestrall-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash",
3686
+ "model_specs": [
3687
+ {
3688
+ "model_format": "pytorch",
3689
+ "model_size_in_billions": 22,
3690
+ "quantizations": [
3691
+ "4-bit",
3692
+ "8-bit",
3693
+ "none"
3694
+ ],
3695
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
3696
+ "model_revision": "9552e7b1d9b2d5bbd87a5aa7221817285dbb6366"
3697
+ },
3698
+ {
3699
+ "model_format": "ggufv2",
3700
+ "model_size_in_billions": 22,
3701
+ "quantizations": [
3702
+ "Q2_K",
3703
+ "Q3_K_S",
3704
+ "Q3_K_M",
3705
+ "Q3_K_L",
3706
+ "Q4_K_S",
3707
+ "Q4_K_M",
3708
+ "Q5_K_S",
3709
+ "Q5_K_M",
3710
+ "Q6_K",
3711
+ "Q8_0"
3712
+ ],
3713
+ "model_id": "bartowski/Codestral-22B-v0.1-GGUF",
3714
+ "model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
3715
+ }
3716
+ ]
3717
+ },
3254
3718
  {
3255
3719
  "version": 1,
3256
3720
  "context_length": 8192,
@@ -3740,39 +4204,253 @@
3740
4204
  "8-bit",
3741
4205
  "none"
3742
4206
  ],
3743
- "model_id": "01-ai/Yi-6B-Chat",
3744
- "model_revision": "1c20c960895e4c3877cf478bc2df074221b81d7b"
4207
+ "model_id": "01-ai/Yi-6B-Chat",
4208
+ "model_revision": "1c20c960895e4c3877cf478bc2df074221b81d7b"
4209
+ },
4210
+ {
4211
+ "model_format": "pytorch",
4212
+ "model_size_in_billions": 34,
4213
+ "quantizations": [
4214
+ "4-bit",
4215
+ "8-bit",
4216
+ "none"
4217
+ ],
4218
+ "model_id": "01-ai/Yi-34B-Chat",
4219
+ "model_revision": "a99ec35331cbfc9da596af7d4538fe2efecff03c"
4220
+ },
4221
+ {
4222
+ "model_format": "ggufv2",
4223
+ "model_size_in_billions": 34,
4224
+ "quantizations": [
4225
+ "Q2_K",
4226
+ "Q3_K_L",
4227
+ "Q3_K_M",
4228
+ "Q3_K_S",
4229
+ "Q4_0",
4230
+ "Q4_K_M",
4231
+ "Q4_K_S",
4232
+ "Q5_0",
4233
+ "Q5_K_M",
4234
+ "Q5_K_S",
4235
+ "Q6_K",
4236
+ "Q8_0"
4237
+ ],
4238
+ "model_id": "TheBloke/Yi-34B-Chat-GGUF",
4239
+ "model_file_name_template": "yi-34b-chat.{quantization}.gguf"
4240
+ }
4241
+ ],
4242
+ "prompt_style": {
4243
+ "style_name": "CHATML",
4244
+ "system_prompt": "",
4245
+ "roles": [
4246
+ "<|im_start|>user",
4247
+ "<|im_start|>assistant"
4248
+ ],
4249
+ "intra_message_sep": "<|im_end|>",
4250
+ "inter_message_sep": "",
4251
+ "stop_token_ids": [
4252
+ 2,
4253
+ 6,
4254
+ 7,
4255
+ 8
4256
+ ],
4257
+ "stop": [
4258
+ "<|endoftext|>",
4259
+ "<|im_start|>",
4260
+ "<|im_end|>",
4261
+ "<|im_sep|>"
4262
+ ]
4263
+ }
4264
+ },
4265
+ {
4266
+ "version": 1,
4267
+ "context_length": 4096,
4268
+ "model_name": "Yi-1.5",
4269
+ "model_lang": [
4270
+ "en",
4271
+ "zh"
4272
+ ],
4273
+ "model_ability": [
4274
+ "generate"
4275
+ ],
4276
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
4277
+ "model_specs": [
4278
+ {
4279
+ "model_format": "pytorch",
4280
+ "model_size_in_billions": 6,
4281
+ "quantizations": [
4282
+ "4-bit",
4283
+ "8-bit",
4284
+ "none"
4285
+ ],
4286
+ "model_id": "01-ai/Yi-1.5-6B",
4287
+ "model_revision": "741a657c42d2081f777ce4c6c5572090f8b8c886"
4288
+ },
4289
+ {
4290
+ "model_format": "pytorch",
4291
+ "model_size_in_billions": 9,
4292
+ "quantizations": [
4293
+ "4-bit",
4294
+ "8-bit",
4295
+ "none"
4296
+ ],
4297
+ "model_id": "01-ai/Yi-1.5-9B",
4298
+ "model_revision": "9a6839c5b9db3dbb245fb98a072bfabc242621f2"
4299
+ },
4300
+ {
4301
+ "model_format": "pytorch",
4302
+ "model_size_in_billions": 34,
4303
+ "quantizations": [
4304
+ "4-bit",
4305
+ "8-bit",
4306
+ "none"
4307
+ ],
4308
+ "model_id": "01-ai/Yi-1.5-34B",
4309
+ "model_revision": "4f83007957ec3eec76d87df19ad061eb0f57b5c5"
4310
+ }
4311
+ ]
4312
+ },
4313
+ {
4314
+ "version": 1,
4315
+ "context_length": 4096,
4316
+ "model_name": "Yi-1.5-chat",
4317
+ "model_lang": [
4318
+ "en",
4319
+ "zh"
4320
+ ],
4321
+ "model_ability": [
4322
+ "chat"
4323
+ ],
4324
+ "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
4325
+ "model_specs": [
4326
+ {
4327
+ "model_format": "pytorch",
4328
+ "model_size_in_billions": 6,
4329
+ "quantizations": [
4330
+ "4-bit",
4331
+ "8-bit",
4332
+ "none"
4333
+ ],
4334
+ "model_id": "01-ai/Yi-1.5-6B-Chat",
4335
+ "model_revision": "d68dab90947a3c869e28c9cb2806996af99a6080"
4336
+ },
4337
+ {
4338
+ "model_format": "pytorch",
4339
+ "model_size_in_billions": 9,
4340
+ "quantizations": [
4341
+ "4-bit",
4342
+ "8-bit",
4343
+ "none"
4344
+ ],
4345
+ "model_id": "01-ai/Yi-1.5-9B-Chat",
4346
+ "model_revision": "1dc6e2b8dcfc12b95bede8dec67e6b6332ac64c6"
4347
+ },
4348
+ {
4349
+ "model_format": "pytorch",
4350
+ "model_size_in_billions": 34,
4351
+ "quantizations": [
4352
+ "4-bit",
4353
+ "8-bit",
4354
+ "none"
4355
+ ],
4356
+ "model_id": "01-ai/Yi-1.5-34B-Chat",
4357
+ "model_revision": "fa695ee438bfcd0ec2b378fa1c7e0dea1b40393e"
4358
+ },
4359
+ {
4360
+ "model_format": "ggufv2",
4361
+ "model_size_in_billions": 6,
4362
+ "quantizations": [
4363
+ "Q3_K_L",
4364
+ "Q4_K_M",
4365
+ "Q5_K_M",
4366
+ "Q6_K",
4367
+ "Q8_0",
4368
+ "f32"
4369
+ ],
4370
+ "model_id": "lmstudio-community/Yi-1.5-6B-Chat-GGUF",
4371
+ "model_file_name_template": "Yi-1.5-6B-Chat-{quantization}.gguf"
4372
+ },
4373
+ {
4374
+ "model_format": "ggufv2",
4375
+ "model_size_in_billions": 9,
4376
+ "quantizations": [
4377
+ "Q3_K_L",
4378
+ "Q4_K_M",
4379
+ "Q5_K_M",
4380
+ "Q6_K",
4381
+ "Q8_0",
4382
+ "f32"
4383
+ ],
4384
+ "model_id": "lmstudio-community/Yi-1.5-9B-Chat-GGUF",
4385
+ "model_file_name_template": "Yi-1.5-9B-Chat-{quantization}.gguf"
4386
+ },
4387
+ {
4388
+ "model_format": "ggufv2",
4389
+ "model_size_in_billions": 34,
4390
+ "quantizations": [
4391
+ "Q2_K",
4392
+ "Q3_K_L",
4393
+ "Q4_K_M",
4394
+ "Q5_K_M",
4395
+ "Q6_K",
4396
+ "Q8_0"
4397
+ ],
4398
+ "model_id": "lmstudio-community/Yi-1.5-34B-Chat-GGUF",
4399
+ "model_file_name_template": "Yi-1.5-34B-Chat-{quantization}.gguf"
4400
+ },
4401
+ {
4402
+ "model_format": "gptq",
4403
+ "model_size_in_billions": 6,
4404
+ "quantizations": [
4405
+ "Int4"
4406
+ ],
4407
+ "model_id": "modelscope/Yi-1.5-6B-Chat-GPTQ",
4408
+ "model_revision": "2ad3a602e64d1c79e28e6e92beced2935047367c"
4409
+ },
4410
+ {
4411
+ "model_format": "gptq",
4412
+ "model_size_in_billions": 9,
4413
+ "quantizations": [
4414
+ "Int4"
4415
+ ],
4416
+ "model_id": "modelscope/Yi-1.5-9B-Chat-GPTQ",
4417
+ "model_revision": "76f47d16982923f7b6674c4e23ddac7c3b1d2e03"
4418
+ },
4419
+ {
4420
+ "model_format": "gptq",
4421
+ "model_size_in_billions": 34,
4422
+ "quantizations": [
4423
+ "Int4"
4424
+ ],
4425
+ "model_id": "modelscope/Yi-1.5-34B-Chat-GPTQ",
4426
+ "model_revision": "173fb4036265b2dac1d6296a8e2fd2f652c19968"
4427
+ },
4428
+ {
4429
+ "model_format": "awq",
4430
+ "model_size_in_billions": 6,
4431
+ "quantizations": [
4432
+ "Int4"
4433
+ ],
4434
+ "model_id": "modelscope/Yi-1.5-6B-Chat-AWQ",
4435
+ "model_revision": "23bf37f1666874e15e239422de0d3948d8735fa9"
3745
4436
  },
3746
4437
  {
3747
- "model_format": "pytorch",
3748
- "model_size_in_billions": 34,
4438
+ "model_format": "awq",
4439
+ "model_size_in_billions": 9,
3749
4440
  "quantizations": [
3750
- "4-bit",
3751
- "8-bit",
3752
- "none"
4441
+ "Int4"
3753
4442
  ],
3754
- "model_id": "01-ai/Yi-34B-Chat",
3755
- "model_revision": "a99ec35331cbfc9da596af7d4538fe2efecff03c"
4443
+ "model_id": "modelscope/Yi-1.5-9B-Chat-AWQ",
4444
+ "model_revision": "2605f388332672789eae1f422644add2901b433f"
3756
4445
  },
3757
4446
  {
3758
- "model_format": "ggufv2",
4447
+ "model_format": "awq",
3759
4448
  "model_size_in_billions": 34,
3760
4449
  "quantizations": [
3761
- "Q2_K",
3762
- "Q3_K_L",
3763
- "Q3_K_M",
3764
- "Q3_K_S",
3765
- "Q4_0",
3766
- "Q4_K_M",
3767
- "Q4_K_S",
3768
- "Q5_0",
3769
- "Q5_K_M",
3770
- "Q5_K_S",
3771
- "Q6_K",
3772
- "Q8_0"
4450
+ "Int4"
3773
4451
  ],
3774
- "model_id": "TheBloke/Yi-34B-Chat-GGUF",
3775
- "model_file_name_template": "yi-34b-chat.{quantization}.gguf"
4452
+ "model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
4453
+ "model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
3776
4454
  }
3777
4455
  ],
3778
4456
  "prompt_style": {
@@ -3800,28 +4478,17 @@
3800
4478
  },
3801
4479
  {
3802
4480
  "version": 1,
3803
- "context_length": 4096,
3804
- "model_name": "Yi-1.5",
4481
+ "context_length": 16384,
4482
+ "model_name": "Yi-1.5-chat-16k",
3805
4483
  "model_lang": [
3806
4484
  "en",
3807
4485
  "zh"
3808
4486
  ],
3809
4487
  "model_ability": [
3810
- "generate"
4488
+ "chat"
3811
4489
  ],
3812
4490
  "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
3813
4491
  "model_specs": [
3814
- {
3815
- "model_format": "pytorch",
3816
- "model_size_in_billions": 6,
3817
- "quantizations": [
3818
- "4-bit",
3819
- "8-bit",
3820
- "none"
3821
- ],
3822
- "model_id": "01-ai/Yi-1.5-6B",
3823
- "model_revision": "741a657c42d2081f777ce4c6c5572090f8b8c886"
3824
- },
3825
4492
  {
3826
4493
  "model_format": "pytorch",
3827
4494
  "model_size_in_billions": 9,
@@ -3830,8 +4497,8 @@
3830
4497
  "8-bit",
3831
4498
  "none"
3832
4499
  ],
3833
- "model_id": "01-ai/Yi-1.5-9B",
3834
- "model_revision": "9a6839c5b9db3dbb245fb98a072bfabc242621f2"
4500
+ "model_id": "01-ai/Yi-1.5-9B-Chat-16K",
4501
+ "model_revision": "551220fb24d69b6bfec5defceeb160395ce5da8d"
3835
4502
  },
3836
4503
  {
3837
4504
  "model_format": "pytorch",
@@ -3841,56 +4508,48 @@
3841
4508
  "8-bit",
3842
4509
  "none"
3843
4510
  ],
3844
- "model_id": "01-ai/Yi-1.5-34B",
3845
- "model_revision": "4f83007957ec3eec76d87df19ad061eb0f57b5c5"
3846
- }
3847
- ]
3848
- },
3849
- {
3850
- "version": 1,
3851
- "context_length": 4096,
3852
- "model_name": "Yi-1.5-chat",
3853
- "model_lang": [
3854
- "en",
3855
- "zh"
3856
- ],
3857
- "model_ability": [
3858
- "chat"
3859
- ],
3860
- "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
3861
- "model_specs": [
3862
- {
3863
- "model_format": "pytorch",
3864
- "model_size_in_billions": 6,
3865
- "quantizations": [
3866
- "4-bit",
3867
- "8-bit",
3868
- "none"
3869
- ],
3870
- "model_id": "01-ai/Yi-1.5-6B-Chat",
3871
- "model_revision": "d68dab90947a3c869e28c9cb2806996af99a6080"
4511
+ "model_id": "01-ai/Yi-1.5-34B-Chat-16K",
4512
+ "model_revision": "dfdbc67be750972bfcc1ac7ffd7fe48689c856fd"
3872
4513
  },
3873
4514
  {
3874
- "model_format": "pytorch",
4515
+ "model_format": "ggufv2",
3875
4516
  "model_size_in_billions": 9,
3876
4517
  "quantizations": [
3877
- "4-bit",
3878
- "8-bit",
3879
- "none"
4518
+ "Q2_K",
4519
+ "Q3_K_L",
4520
+ "Q3_K_M",
4521
+ "Q3_K_S",
4522
+ "Q4_0",
4523
+ "Q4_1",
4524
+ "Q4_K_M",
4525
+ "Q4_K_S",
4526
+ "Q5_0",
4527
+ "Q5_1",
4528
+ "Q5_K_M",
4529
+ "Q5_K_S",
4530
+ "Q6_K",
4531
+ "Q8_0"
3880
4532
  ],
3881
- "model_id": "01-ai/Yi-1.5-9B-Chat",
3882
- "model_revision": "1dc6e2b8dcfc12b95bede8dec67e6b6332ac64c6"
4533
+ "model_id": "QuantFactory/Yi-1.5-9B-Chat-16K-GGUF",
4534
+ "model_file_name_template": "Yi-1.5-9B-Chat-16K.{quantization}.gguf"
3883
4535
  },
3884
4536
  {
3885
- "model_format": "pytorch",
4537
+ "model_format": "ggufv2",
3886
4538
  "model_size_in_billions": 34,
3887
4539
  "quantizations": [
3888
- "4-bit",
3889
- "8-bit",
3890
- "none"
4540
+ "Q2_K",
4541
+ "Q3_K_L",
4542
+ "Q3_K_M",
4543
+ "Q3_K_S",
4544
+ "Q4_K_M",
4545
+ "Q4_K_S",
4546
+ "Q5_K_M",
4547
+ "Q5_K_S",
4548
+ "Q6_K",
4549
+ "Q8_0"
3891
4550
  ],
3892
- "model_id": "01-ai/Yi-1.5-34B-Chat",
3893
- "model_revision": "fa695ee438bfcd0ec2b378fa1c7e0dea1b40393e"
4551
+ "model_id": "bartowski/Yi-1.5-34B-Chat-16K-GGUF",
4552
+ "model_file_name_template": "Yi-1.5-34B-Chat-16K-{quantization}.gguf"
3894
4553
  }
3895
4554
  ],
3896
4555
  "prompt_style": {
@@ -5063,6 +5722,48 @@
5063
5722
  ]
5064
5723
  }
5065
5724
  },
5725
+ {
5726
+ "version":1,
5727
+ "context_length":2048,
5728
+ "model_name":"MiniCPM-Llama3-V-2_5",
5729
+ "model_lang":[
5730
+ "en",
5731
+ "zh"
5732
+ ],
5733
+ "model_ability":[
5734
+ "chat",
5735
+ "vision"
5736
+ ],
5737
+ "model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
5738
+ "model_specs":[
5739
+ {
5740
+ "model_format":"pytorch",
5741
+ "model_size_in_billions":8,
5742
+ "quantizations":[
5743
+ "none"
5744
+ ],
5745
+ "model_id":"openbmb/MiniCPM-Llama3-V-2_5",
5746
+ "model_revision":"285a637ba8a30a0660dfcccad16f9a864f75abfd"
5747
+ },
5748
+ {
5749
+ "model_format":"pytorch",
5750
+ "model_size_in_billions":8,
5751
+ "quantizations":[
5752
+ "int4"
5753
+ ],
5754
+ "model_id":"openbmb/MiniCPM-Llama3-V-2_5-{quantization}",
5755
+ "model_revision":"f92aff28552de35de3be204e8fe292dd4824e544"
5756
+ }
5757
+ ],
5758
+ "prompt_style":{
5759
+ "style_name":"OmniLMM",
5760
+ "system_prompt":"The role of first msg should be user",
5761
+ "roles":[
5762
+ "user",
5763
+ "assistant"
5764
+ ]
5765
+ }
5766
+ },
5066
5767
  {
5067
5768
  "version": 1,
5068
5769
  "context_length": 4096,
@@ -6009,23 +6710,32 @@
6009
6710
  ],
6010
6711
  "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6011
6712
  "model_specs": [
6713
+ {
6714
+ "model_format": "pytorch",
6715
+ "model_size_in_billions": 2,
6716
+ "quantizations": [
6717
+ "none"
6718
+ ],
6719
+ "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
6720
+ "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
6721
+ },
6012
6722
  {
6013
- "model_format": "pytorch",
6014
- "model_size_in_billions": 26,
6015
- "quantizations": [
6016
- "none"
6017
- ],
6018
- "model_id": "OpenGVLab/InternVL-Chat-V1-5",
6019
- "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
6723
+ "model_format": "pytorch",
6724
+ "model_size_in_billions": 26,
6725
+ "quantizations": [
6726
+ "none"
6727
+ ],
6728
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5",
6729
+ "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
6020
6730
  },
6021
6731
  {
6022
- "model_format": "pytorch",
6023
- "model_size_in_billions": 26,
6024
- "quantizations": [
6025
- "Int8"
6026
- ],
6027
- "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
6028
- "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
6732
+ "model_format": "pytorch",
6733
+ "model_size_in_billions": 26,
6734
+ "quantizations": [
6735
+ "Int8"
6736
+ ],
6737
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
6738
+ "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
6029
6739
  }
6030
6740
  ],
6031
6741
  "prompt_style": {
@@ -6043,11 +6753,11 @@
6043
6753
  "<|im_end|>"
6044
6754
  ]
6045
6755
  }
6046
- },
6756
+ },
6047
6757
  {
6048
6758
  "version": 1,
6049
- "context_length": 32768,
6050
- "model_name": "mini-internvl-chat",
6759
+ "context_length": 8192,
6760
+ "model_name": "cogvlm2",
6051
6761
  "model_lang": [
6052
6762
  "en",
6053
6763
  "zh"
@@ -6056,32 +6766,125 @@
6056
6766
  "chat",
6057
6767
  "vision"
6058
6768
  ],
6059
- "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6769
+ "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
6060
6770
  "model_specs": [
6061
- {
6062
- "model_format": "pytorch",
6063
- "model_size_in_billions": 2,
6064
- "quantizations": [
6065
- "none"
6066
- ],
6067
- "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
6068
- "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
6069
- }
6771
+ {
6772
+ "model_format": "pytorch",
6773
+ "model_size_in_billions": 20,
6774
+ "quantizations": [
6775
+ "none"
6776
+ ],
6777
+ "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B",
6778
+ "model_revision": "d88b352bce5ee58a289b1ac8328553eb31efa2ef"
6779
+ },
6780
+ {
6781
+ "model_format": "pytorch",
6782
+ "model_size_in_billions": 20,
6783
+ "quantizations": [
6784
+ "int4"
6785
+ ],
6786
+ "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantization}",
6787
+ "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
6788
+ }
6070
6789
  ],
6071
6790
  "prompt_style": {
6072
- "style_name": "INTERNLM2",
6073
- "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6074
- "roles": [
6075
- "<|im_start|>user",
6076
- "<|im_start|>assistant"
6791
+ "style_name": "LLAMA3",
6792
+ "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
6793
+ "roles": [
6794
+ "user",
6795
+ "assistant"
6796
+ ],
6797
+ "intra_message_sep": "\n\n",
6798
+ "inter_message_sep": "<|eot_id|>",
6799
+ "stop_token_ids": [
6800
+ 128001,
6801
+ 128009
6802
+ ],
6803
+ "stop": [
6804
+ "<|end_of_text|>",
6805
+ "<|eot_id|>"
6806
+ ]
6807
+ }
6808
+ },
6809
+ {
6810
+ "version": 1,
6811
+ "context_length": 8192,
6812
+ "model_name": "telechat",
6813
+ "model_lang": [
6814
+ "en",
6815
+ "zh"
6816
+ ],
6817
+ "model_ability": [
6818
+ "chat"
6819
+ ],
6820
+ "model_description": "The TeleChat is a large language model developed and trained by China Telecom Artificial Intelligence Technology Co., LTD. The 7B model base is trained with 1.5 trillion Tokens and 3 trillion Tokens and Chinese high-quality corpus.",
6821
+ "model_specs": [
6822
+ {
6823
+ "model_format": "pytorch",
6824
+ "model_size_in_billions": 7,
6825
+ "quantizations": [
6826
+ "4-bit",
6827
+ "8-bit",
6828
+ "none"
6077
6829
  ],
6078
- "intra_message_sep": "<|im_end|>",
6079
- "stop_token_ids": [
6080
- 92542
6830
+ "model_id": "Tele-AI/telechat-7B"
6831
+ },
6832
+ {
6833
+ "model_format": "gptq",
6834
+ "model_size_in_billions": 7,
6835
+ "quantizations": [
6836
+ "int4",
6837
+ "int8"
6081
6838
  ],
6082
- "stop": [
6083
- "<|im_end|>"
6084
- ]
6839
+ "model_id": "Tele-AI/telechat-7B-{quantization}"
6840
+ },
6841
+ {
6842
+ "model_format": "pytorch",
6843
+ "model_size_in_billions": 12,
6844
+ "quantizations": [
6845
+ "4-bit",
6846
+ "8-bit",
6847
+ "none"
6848
+ ],
6849
+ "model_id": "Tele-AI/TeleChat-12B"
6850
+ },
6851
+ {
6852
+ "model_format": "gptq",
6853
+ "model_size_in_billions": 12,
6854
+ "quantizations": [
6855
+ "int4",
6856
+ "int8"
6857
+ ],
6858
+ "model_id": "Tele-AI/TeleChat-12B-{quantization}"
6859
+ },
6860
+ {
6861
+ "model_format": "pytorch",
6862
+ "model_size_in_billions": 52,
6863
+ "quantizations": [
6864
+ "4-bit",
6865
+ "8-bit",
6866
+ "none"
6867
+ ],
6868
+ "model_id": "Tele-AI/TeleChat-52B"
6869
+ }
6870
+ ],
6871
+ "prompt_style": {
6872
+ "style_name": "NO_COLON_TWO",
6873
+ "system_prompt": "You are a helpful assistant.",
6874
+ "roles": [
6875
+ "<_user>",
6876
+ "<_bot>"
6877
+ ],
6878
+ "intra_message_sep": "",
6879
+ "inter_message_sep": "",
6880
+ "stop": [
6881
+ "<_end>",
6882
+ "<_start>"
6883
+ ],
6884
+ "stop_token_ids": [
6885
+ 160133,
6886
+ 160132
6887
+ ]
6085
6888
  }
6086
- }
6889
+ }
6087
6890
  ]