xinference 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (64) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +16 -11
  5. xinference/client/restful/restful_client.py +12 -2
  6. xinference/conftest.py +13 -2
  7. xinference/constants.py +2 -0
  8. xinference/core/supervisor.py +32 -1
  9. xinference/core/worker.py +139 -20
  10. xinference/deploy/cmdline.py +119 -20
  11. xinference/model/llm/__init__.py +6 -0
  12. xinference/model/llm/llm_family.json +711 -10
  13. xinference/model/llm/llm_family_modelscope.json +557 -7
  14. xinference/model/llm/pytorch/chatglm.py +2 -1
  15. xinference/model/llm/pytorch/core.py +2 -0
  16. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  17. xinference/model/llm/pytorch/internlm2.py +2 -1
  18. xinference/model/llm/pytorch/omnilmm.py +153 -0
  19. xinference/model/llm/sglang/__init__.py +13 -0
  20. xinference/model/llm/sglang/core.py +365 -0
  21. xinference/model/llm/utils.py +46 -13
  22. xinference/model/llm/vllm/core.py +10 -0
  23. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  24. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  25. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  26. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  27. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  28. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  29. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  30. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  31. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  32. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  33. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  34. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  35. xinference/thirdparty/omnilmm/__init__.py +0 -0
  36. xinference/thirdparty/omnilmm/chat.py +216 -0
  37. xinference/thirdparty/omnilmm/constants.py +4 -0
  38. xinference/thirdparty/omnilmm/conversation.py +332 -0
  39. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  40. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  41. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  42. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  43. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  44. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  45. xinference/thirdparty/omnilmm/utils.py +134 -0
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.98516614.js +3 -0
  49. xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
  54. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/METADATA +21 -5
  55. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/RECORD +60 -31
  56. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  57. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  60. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
  61. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
  62. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
  63. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
  64. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
@@ -338,7 +338,7 @@
338
338
  64797,
339
339
  2
340
340
  ],
341
- "stop":[
341
+ "stop": [
342
342
  "<|user|>",
343
343
  "<|observation|>"
344
344
  ]
@@ -382,13 +382,56 @@
382
382
  64797,
383
383
  2
384
384
  ],
385
- "stop":[
385
+ "stop": [
386
+ "<|user|>",
387
+ "<|observation|>"
388
+ ]
389
+ }
390
+ },
391
+ {
392
+ "version": 1,
393
+ "context_length": 131072,
394
+ "model_name": "chatglm3-128k",
395
+ "model_lang": [
396
+ "en",
397
+ "zh"
398
+ ],
399
+ "model_ability": [
400
+ "chat"
401
+ ],
402
+ "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
403
+ "model_specs": [
404
+ {
405
+ "model_format": "pytorch",
406
+ "model_size_in_billions": 6,
407
+ "quantizations": [
408
+ "4-bit",
409
+ "8-bit",
410
+ "none"
411
+ ],
412
+ "model_hub": "modelscope",
413
+ "model_id": "ZhipuAI/chatglm3-6b-128k",
414
+ "model_revision": "master"
415
+ }
416
+ ],
417
+ "prompt_style": {
418
+ "style_name": "CHATGLM3",
419
+ "system_prompt": "",
420
+ "roles": [
421
+ "user",
422
+ "assistant"
423
+ ],
424
+ "stop_token_ids": [
425
+ 64795,
426
+ 64797,
427
+ 2
428
+ ],
429
+ "stop": [
386
430
  "<|user|>",
387
431
  "<|observation|>"
388
432
  ]
389
433
  }
390
434
  },
391
-
392
435
  {
393
436
  "version": 1,
394
437
  "context_length": 2048,
@@ -728,6 +771,74 @@
728
771
  }
729
772
  ]
730
773
  },
774
+ {
775
+ "version": 1,
776
+ "context_length": 8194,
777
+ "model_name": "codeshell",
778
+ "model_lang": [
779
+ "en",
780
+ "zh"
781
+ ],
782
+ "model_ability": [
783
+ "generate"
784
+ ],
785
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
786
+ "model_specs": [
787
+ {
788
+ "model_format": "pytorch",
789
+ "model_size_in_billions": 7,
790
+ "quantizations": [
791
+ "none"
792
+ ],
793
+ "model_id": "WisdomShell/CodeShell-7B",
794
+ "model_revision": "master",
795
+ "model_hub": "modelscope"
796
+ }
797
+ ]
798
+ },
799
+ {
800
+ "version": 1,
801
+ "context_length": 8194,
802
+ "model_name": "codeshell-chat",
803
+ "model_lang": [
804
+ "en",
805
+ "zh"
806
+ ],
807
+ "model_ability": [
808
+ "chat"
809
+ ],
810
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
811
+ "model_specs": [
812
+ {
813
+ "model_format": "pytorch",
814
+ "model_size_in_billions": 7,
815
+ "quantizations": [
816
+ "none"
817
+ ],
818
+ "model_id": "WisdomShell/CodeShell-7B-Chat",
819
+ "model_revision": "master",
820
+ "model_hub": "modelscope"
821
+ }
822
+ ],
823
+ "prompt_style": {
824
+ "style_name": "CodeShell",
825
+ "system_prompt": "",
826
+ "roles": [
827
+ "## human:",
828
+ "## assistant: "
829
+ ],
830
+ "intra_message_sep": "",
831
+ "inter_message_sep": "",
832
+ "stop_token_ids": [
833
+ 70000
834
+ ],
835
+ "stop": [
836
+ "<|endoftext|>",
837
+ "|||",
838
+ "|<end>|"
839
+ ]
840
+ }
841
+ },
731
842
  {
732
843
  "version": 1,
733
844
  "context_length": 100000,
@@ -970,7 +1081,11 @@
970
1081
  "context_length": 32768,
971
1082
  "model_name": "mixtral-v0.1",
972
1083
  "model_lang": [
973
- "en", "fr", "it", "de", "es"
1084
+ "en",
1085
+ "fr",
1086
+ "it",
1087
+ "de",
1088
+ "es"
974
1089
  ],
975
1090
  "model_ability": [
976
1091
  "generate"
@@ -996,7 +1111,11 @@
996
1111
  "context_length": 32768,
997
1112
  "model_name": "mixtral-instruct-v0.1",
998
1113
  "model_lang": [
999
- "en", "fr", "it", "de", "es"
1114
+ "en",
1115
+ "fr",
1116
+ "it",
1117
+ "de",
1118
+ "es"
1000
1119
  ],
1001
1120
  "model_ability": [
1002
1121
  "chat"
@@ -1929,7 +2048,10 @@
1929
2048
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
1930
2049
  "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
1931
2050
  "quantization_parts": {
1932
- "q4_k_m": ["a", "b"]
2051
+ "q4_k_m": [
2052
+ "a",
2053
+ "b"
2054
+ ]
1933
2055
  }
1934
2056
  }
1935
2057
  ],
@@ -1953,6 +2075,53 @@
1953
2075
  ]
1954
2076
  }
1955
2077
  },
2078
+ {
2079
+ "version": 1,
2080
+ "context_length": 4096,
2081
+ "model_name": "deepseek-vl-chat",
2082
+ "model_lang": [
2083
+ "en",
2084
+ "zh"
2085
+ ],
2086
+ "model_ability": [
2087
+ "chat",
2088
+ "vision"
2089
+ ],
2090
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
2091
+ "model_specs": [
2092
+ {
2093
+ "model_format": "pytorch",
2094
+ "model_size_in_billions": "1_3",
2095
+ "quantizations": [
2096
+ "none"
2097
+ ],
2098
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
2099
+ "model_hub": "modelscope"
2100
+ },
2101
+ {
2102
+ "model_format": "pytorch",
2103
+ "model_size_in_billions": 7,
2104
+ "quantizations": [
2105
+ "none"
2106
+ ],
2107
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
2108
+ "model_hub": "modelscope"
2109
+ }
2110
+ ],
2111
+ "prompt_style": {
2112
+ "style_name": "DEEPSEEK_CHAT",
2113
+ "system_prompt": "<|begin▁of▁sentence|>",
2114
+ "roles": [
2115
+ "User",
2116
+ "Assistant"
2117
+ ],
2118
+ "intra_message_sep": "\n\n",
2119
+ "inter_message_sep": "<|end▁of▁sentence|>",
2120
+ "stop": [
2121
+ "<|end▁of▁sentence|>"
2122
+ ]
2123
+ }
2124
+ },
1956
2125
  {
1957
2126
  "version": 1,
1958
2127
  "context_length": 4096,
@@ -2008,7 +2177,8 @@
2008
2177
  "context_length": 4096,
2009
2178
  "model_name": "deepseek-coder-instruct",
2010
2179
  "model_lang": [
2011
- "en", "zh"
2180
+ "en",
2181
+ "zh"
2012
2182
  ],
2013
2183
  "model_ability": [
2014
2184
  "chat"
@@ -2395,5 +2565,385 @@
2395
2565
  "<start_of_turn>"
2396
2566
  ]
2397
2567
  }
2568
+ },
2569
+ {
2570
+ "version":1,
2571
+ "context_length":2048,
2572
+ "model_name":"OmniLMM",
2573
+ "model_lang":[
2574
+ "en",
2575
+ "zh"
2576
+ ],
2577
+ "model_ability":[
2578
+ "chat",
2579
+ "vision"
2580
+ ],
2581
+ "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
2582
+ "model_specs":[
2583
+ {
2584
+ "model_format":"pytorch",
2585
+ "model_size_in_billions":3,
2586
+ "quantizations":[
2587
+ "none"
2588
+ ],
2589
+ "model_id":"OpenBMB/MiniCPM-V",
2590
+ "model_hub":"modelscope",
2591
+ "model_revision":"master"
2592
+ },
2593
+ {
2594
+ "model_format":"pytorch",
2595
+ "model_size_in_billions":12,
2596
+ "quantizations":[
2597
+ "none"
2598
+ ],
2599
+ "model_id":"OpenBMB/OmniLMM-12B",
2600
+ "model_hub":"modelscope",
2601
+ "model_revision":"master"
2602
+ }
2603
+ ],
2604
+ "prompt_style":{
2605
+ "style_name":"OmniLMM",
2606
+ "system_prompt":"The role of first msg should be user",
2607
+ "roles":[
2608
+ "user",
2609
+ "assistant"
2610
+ ]
2611
+ }
2612
+ },
2613
+ {
2614
+ "version": 1,
2615
+ "context_length": 4096,
2616
+ "model_name": "minicpm-2b-sft-bf16",
2617
+ "model_lang": [
2618
+ "zh"
2619
+ ],
2620
+ "model_ability": [
2621
+ "chat"
2622
+ ],
2623
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2624
+ "model_specs": [
2625
+ {
2626
+ "model_format": "pytorch",
2627
+ "model_size_in_billions": 2,
2628
+ "quantizations": [
2629
+ "none"
2630
+ ],
2631
+ "model_hub": "modelscope",
2632
+ "model_id": "OpenBMB/miniCPM-bf16",
2633
+ "model_revision": "master"
2634
+ }
2635
+ ],
2636
+ "prompt_style": {
2637
+ "style_name": "MINICPM-2B",
2638
+ "system_prompt": "",
2639
+ "roles": [
2640
+ "user",
2641
+ "assistant"
2642
+ ],
2643
+ "stop_token_ids": [
2644
+ 1,
2645
+ 2
2646
+ ],
2647
+ "stop": [
2648
+ "<s>",
2649
+ "</s>"
2650
+ ]
2651
+ }
2652
+ },
2653
+ {
2654
+ "version": 1,
2655
+ "context_length": 4096,
2656
+ "model_name": "minicpm-2b-sft-fp32",
2657
+ "model_lang": [
2658
+ "zh"
2659
+ ],
2660
+ "model_ability": [
2661
+ "chat"
2662
+ ],
2663
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2664
+ "model_specs": [
2665
+ {
2666
+ "model_format": "pytorch",
2667
+ "model_size_in_billions": 2,
2668
+ "quantizations": [
2669
+ "none"
2670
+ ],
2671
+ "model_hub": "modelscope",
2672
+ "model_id": "OpenBMB/MiniCPM-2B-sft-fp32",
2673
+ "model_revision": "master"
2674
+ }
2675
+ ],
2676
+ "prompt_style": {
2677
+ "style_name": "MINICPM-2B",
2678
+ "system_prompt": "",
2679
+ "roles": [
2680
+ "user",
2681
+ "assistant"
2682
+ ],
2683
+ "stop_token_ids": [
2684
+ 1,
2685
+ 2
2686
+ ],
2687
+ "stop": [
2688
+ "<s>",
2689
+ "</s>"
2690
+ ]
2691
+ }
2692
+ },
2693
+ {
2694
+ "version": 1,
2695
+ "context_length": 4096,
2696
+ "model_name": "minicpm-2b-dpo-bf16",
2697
+ "model_lang": [
2698
+ "zh"
2699
+ ],
2700
+ "model_ability": [
2701
+ "chat"
2702
+ ],
2703
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2704
+ "model_specs": [
2705
+ {
2706
+ "model_format": "pytorch",
2707
+ "model_size_in_billions": 2,
2708
+ "quantizations": [
2709
+ "none"
2710
+ ],
2711
+ "model_hub": "modelscope",
2712
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-bf16",
2713
+ "model_revision": "master"
2714
+ }
2715
+ ],
2716
+ "prompt_style": {
2717
+ "style_name": "MINICPM-2B",
2718
+ "system_prompt": "",
2719
+ "roles": [
2720
+ "user",
2721
+ "assistant"
2722
+ ],
2723
+ "stop_token_ids": [
2724
+ 1,
2725
+ 2
2726
+ ],
2727
+ "stop": [
2728
+ "<s>",
2729
+ "</s>"
2730
+ ]
2731
+ }
2732
+ },
2733
+ {
2734
+ "version": 1,
2735
+ "context_length": 4096,
2736
+ "model_name": "minicpm-2b-dpo-fp16",
2737
+ "model_lang": [
2738
+ "zh"
2739
+ ],
2740
+ "model_ability": [
2741
+ "chat"
2742
+ ],
2743
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2744
+ "model_specs": [
2745
+ {
2746
+ "model_format": "pytorch",
2747
+ "model_size_in_billions": 2,
2748
+ "quantizations": [
2749
+ "none"
2750
+ ],
2751
+ "model_hub": "modelscope",
2752
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-fp16",
2753
+ "model_revision": "master"
2754
+ }
2755
+ ],
2756
+ "prompt_style": {
2757
+ "style_name": "MINICPM-2B",
2758
+ "system_prompt": "",
2759
+ "roles": [
2760
+ "user",
2761
+ "assistant"
2762
+ ],
2763
+ "stop_token_ids": [
2764
+ 1,
2765
+ 2
2766
+ ],
2767
+ "stop": [
2768
+ "<s>",
2769
+ "</s>"
2770
+ ]
2771
+ }
2772
+ },
2773
+ {
2774
+ "version": 1,
2775
+ "context_length": 4096,
2776
+ "model_name": "minicpm-2b-dpo-fp32",
2777
+ "model_lang": [
2778
+ "zh"
2779
+ ],
2780
+ "model_ability": [
2781
+ "chat"
2782
+ ],
2783
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2784
+ "model_specs": [
2785
+ {
2786
+ "model_format": "pytorch",
2787
+ "model_size_in_billions": 2,
2788
+ "quantizations": [
2789
+ "none"
2790
+ ],
2791
+ "model_hub": "modelscope",
2792
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-fp32",
2793
+ "model_revision": "master"
2794
+ }
2795
+ ],
2796
+ "prompt_style": {
2797
+ "style_name": "MINICPM-2B",
2798
+ "system_prompt": "",
2799
+ "roles": [
2800
+ "user",
2801
+ "assistant"
2802
+ ],
2803
+ "stop_token_ids": [
2804
+ 1,
2805
+ 2
2806
+ ],
2807
+ "stop": [
2808
+ "<s>",
2809
+ "</s>"
2810
+ ]
2811
+ }
2812
+ },
2813
+ {
2814
+ "version": 1,
2815
+ "context_length": 2048,
2816
+ "model_name": "aquila2",
2817
+ "model_lang": [
2818
+ "zh"
2819
+ ],
2820
+ "model_ability": [
2821
+ "generate"
2822
+ ],
2823
+ "model_description": "Aquila2 series models are the base language models",
2824
+ "model_specs": [
2825
+ {
2826
+ "model_format": "pytorch",
2827
+ "model_size_in_billions": 34,
2828
+ "quantizations": [
2829
+ "none"
2830
+ ],
2831
+ "model_hub": "modelscope",
2832
+ "model_id": "BAAI/Aquila2-34B",
2833
+ "model_revision": "master"
2834
+ },
2835
+ {
2836
+ "model_format": "pytorch",
2837
+ "model_size_in_billions": 70,
2838
+ "quantizations": [
2839
+ "none"
2840
+ ],
2841
+ "model_hub": "modelscope",
2842
+ "model_id": "BAAI/Aquila2-70B-Expr",
2843
+ "model_revision": "master"
2844
+ }
2845
+ ]
2846
+ },
2847
+ {
2848
+ "version": 1,
2849
+ "context_length": 2048,
2850
+ "model_name": "aquila2-chat",
2851
+ "model_lang": [
2852
+ "zh"
2853
+ ],
2854
+ "model_ability": [
2855
+ "generate"
2856
+ ],
2857
+ "model_description": "Aquila2-chat series models are the chat models",
2858
+ "model_specs": [
2859
+ {
2860
+ "model_format": "pytorch",
2861
+ "model_size_in_billions": 34,
2862
+ "quantizations": [
2863
+ "none"
2864
+ ],
2865
+ "model_hub": "modelscope",
2866
+ "model_id": "BAAI/AquilaChat2-34B",
2867
+ "model_revision": "master"
2868
+ },
2869
+ {
2870
+ "model_format": "gptq",
2871
+ "model_size_in_billions": 34,
2872
+ "quantizations": [
2873
+ "Int4"
2874
+ ],
2875
+ "model_hub": "modelscope",
2876
+ "model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
2877
+ "model_revision": "master"
2878
+ },
2879
+ {
2880
+ "model_format": "pytorch",
2881
+ "model_size_in_billions": 70,
2882
+ "quantizations": [
2883
+ "none"
2884
+ ],
2885
+ "model_hub": "modelscope",
2886
+ "model_id": "BAAI/AquilaChat2-70B-Expr",
2887
+ "model_revision": "master"
2888
+ }
2889
+ ],
2890
+ "prompt_style": {
2891
+ "style_name": "ADD_COLON_SINGLE",
2892
+ "intra_message_sep": "\n",
2893
+ "system_prompt": "",
2894
+ "roles": [
2895
+ "USER",
2896
+ "ASSISTANT"
2897
+ ],
2898
+ "stop_token_ids": [
2899
+ 100006,
2900
+ 100007
2901
+ ],
2902
+ "stop": [
2903
+ "[CLS]",
2904
+ "</s>"
2905
+ ]
2906
+ }
2907
+ },
2908
+ {
2909
+ "version": 1,
2910
+ "context_length": 16384,
2911
+ "model_name": "aquila2-chat-16k",
2912
+ "model_lang": [
2913
+ "zh"
2914
+ ],
2915
+ "model_ability": [
2916
+ "generate"
2917
+ ],
2918
+ "model_description": "AquilaChat2-16k series models are the long-text chat models",
2919
+ "model_specs": [
2920
+ {
2921
+ "model_format": "pytorch",
2922
+ "model_size_in_billions": 34,
2923
+ "quantizations": [
2924
+ "none"
2925
+ ],
2926
+ "model_hub": "modelscope",
2927
+ "model_id": "BAAI/AquilaChat2-34B-16K",
2928
+ "model_revision": "master"
2929
+ }
2930
+ ],
2931
+ "prompt_style": {
2932
+ "style_name": "ADD_COLON_SINGLE",
2933
+ "intra_message_sep": "\n",
2934
+ "system_prompt": "",
2935
+ "roles": [
2936
+ "USER",
2937
+ "ASSISTANT"
2938
+ ],
2939
+ "stop_token_ids": [
2940
+ 100006,
2941
+ 100007
2942
+ ],
2943
+ "stop": [
2944
+ "[CLS]",
2945
+ "</s>"
2946
+ ]
2947
+ }
2398
2948
  }
2399
2949
  ]
@@ -148,6 +148,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
148
148
 
149
149
  def _stream_generator():
150
150
  last_chunk_text_length = 0
151
+ chunk_id = "chat-" + str(uuid.uuid1())
151
152
  for chunk_text, _ in self._model.stream_chat(
152
153
  self._tokenizer, prompt, chat_history, **kwargs
153
154
  ):
@@ -157,7 +158,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
157
158
  text=chunk_text, index=0, logprobs=None, finish_reason=None
158
159
  )
159
160
  yield CompletionChunk(
160
- id=str(uuid.uuid1()),
161
+ id=chunk_id,
161
162
  object="text_completion",
162
163
  created=int(time.time()),
163
164
  model=self.model_uid,
@@ -465,7 +465,9 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
465
465
  "llama-2-chat",
466
466
  "internlm2-chat",
467
467
  "qwen-vl-chat",
468
+ "OmniLMM",
468
469
  "yi-vl-chat",
470
+ "deepseek-vl-chat",
469
471
  ]:
470
472
  return False
471
473
  if "chat" not in llm_family.model_ability: