xinference 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (59) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +9 -1
  5. xinference/client/restful/restful_client.py +12 -2
  6. xinference/conftest.py +13 -2
  7. xinference/core/supervisor.py +32 -1
  8. xinference/core/worker.py +139 -20
  9. xinference/deploy/cmdline.py +119 -20
  10. xinference/model/llm/__init__.py +4 -0
  11. xinference/model/llm/llm_family.json +627 -0
  12. xinference/model/llm/llm_family_modelscope.json +471 -0
  13. xinference/model/llm/pytorch/core.py +2 -0
  14. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  15. xinference/model/llm/pytorch/omnilmm.py +153 -0
  16. xinference/model/llm/utils.py +11 -1
  17. xinference/model/llm/vllm/core.py +3 -0
  18. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  19. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  20. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  21. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  22. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  23. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  24. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  25. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  26. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  27. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  28. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  29. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  30. xinference/thirdparty/omnilmm/__init__.py +0 -0
  31. xinference/thirdparty/omnilmm/chat.py +216 -0
  32. xinference/thirdparty/omnilmm/constants.py +4 -0
  33. xinference/thirdparty/omnilmm/conversation.py +332 -0
  34. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  35. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  36. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  37. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  38. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  39. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  40. xinference/thirdparty/omnilmm/utils.py +134 -0
  41. xinference/web/ui/build/asset-manifest.json +3 -3
  42. xinference/web/ui/build/index.html +1 -1
  43. xinference/web/ui/build/static/js/main.98516614.js +3 -0
  44. xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
  49. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/METADATA +18 -5
  50. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/RECORD +55 -28
  51. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  52. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  53. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  54. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  55. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
  56. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
  57. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
  58. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
  59. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
@@ -388,6 +388,50 @@
388
388
  ]
389
389
  }
390
390
  },
391
+ {
392
+ "version": 1,
393
+ "context_length": 131072,
394
+ "model_name": "chatglm3-128k",
395
+ "model_lang": [
396
+ "en",
397
+ "zh"
398
+ ],
399
+ "model_ability": [
400
+ "chat"
401
+ ],
402
+ "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
403
+ "model_specs": [
404
+ {
405
+ "model_format": "pytorch",
406
+ "model_size_in_billions": 6,
407
+ "quantizations": [
408
+ "4-bit",
409
+ "8-bit",
410
+ "none"
411
+ ],
412
+ "model_hub": "modelscope",
413
+ "model_id": "ZhipuAI/chatglm3-6b-128k",
414
+ "model_revision": "master"
415
+ }
416
+ ],
417
+ "prompt_style": {
418
+ "style_name": "CHATGLM3",
419
+ "system_prompt": "",
420
+ "roles": [
421
+ "user",
422
+ "assistant"
423
+ ],
424
+ "stop_token_ids": [
425
+ 64795,
426
+ 64797,
427
+ 2
428
+ ],
429
+ "stop": [
430
+ "<|user|>",
431
+ "<|observation|>"
432
+ ]
433
+ }
434
+ },
391
435
  {
392
436
  "version": 1,
393
437
  "context_length": 2048,
@@ -2031,6 +2075,53 @@
2031
2075
  ]
2032
2076
  }
2033
2077
  },
2078
+ {
2079
+ "version": 1,
2080
+ "context_length": 4096,
2081
+ "model_name": "deepseek-vl-chat",
2082
+ "model_lang": [
2083
+ "en",
2084
+ "zh"
2085
+ ],
2086
+ "model_ability": [
2087
+ "chat",
2088
+ "vision"
2089
+ ],
2090
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
2091
+ "model_specs": [
2092
+ {
2093
+ "model_format": "pytorch",
2094
+ "model_size_in_billions": "1_3",
2095
+ "quantizations": [
2096
+ "none"
2097
+ ],
2098
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
2099
+ "model_hub": "modelscope"
2100
+ },
2101
+ {
2102
+ "model_format": "pytorch",
2103
+ "model_size_in_billions": 7,
2104
+ "quantizations": [
2105
+ "none"
2106
+ ],
2107
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
2108
+ "model_hub": "modelscope"
2109
+ }
2110
+ ],
2111
+ "prompt_style": {
2112
+ "style_name": "DEEPSEEK_CHAT",
2113
+ "system_prompt": "<|begin▁of▁sentence|>",
2114
+ "roles": [
2115
+ "User",
2116
+ "Assistant"
2117
+ ],
2118
+ "intra_message_sep": "\n\n",
2119
+ "inter_message_sep": "<|end▁of▁sentence|>",
2120
+ "stop": [
2121
+ "<|end▁of▁sentence|>"
2122
+ ]
2123
+ }
2124
+ },
2034
2125
  {
2035
2126
  "version": 1,
2036
2127
  "context_length": 4096,
@@ -2474,5 +2565,385 @@
2474
2565
  "<start_of_turn>"
2475
2566
  ]
2476
2567
  }
2568
+ },
2569
+ {
2570
+ "version":1,
2571
+ "context_length":2048,
2572
+ "model_name":"OmniLMM",
2573
+ "model_lang":[
2574
+ "en",
2575
+ "zh"
2576
+ ],
2577
+ "model_ability":[
2578
+ "chat",
2579
+ "vision"
2580
+ ],
2581
+ "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
2582
+ "model_specs":[
2583
+ {
2584
+ "model_format":"pytorch",
2585
+ "model_size_in_billions":3,
2586
+ "quantizations":[
2587
+ "none"
2588
+ ],
2589
+ "model_id":"OpenBMB/MiniCPM-V",
2590
+ "model_hub":"modelscope",
2591
+ "model_revision":"master"
2592
+ },
2593
+ {
2594
+ "model_format":"pytorch",
2595
+ "model_size_in_billions":12,
2596
+ "quantizations":[
2597
+ "none"
2598
+ ],
2599
+ "model_id":"OpenBMB/OmniLMM-12B",
2600
+ "model_hub":"modelscope",
2601
+ "model_revision":"master"
2602
+ }
2603
+ ],
2604
+ "prompt_style":{
2605
+ "style_name":"OmniLMM",
2606
+ "system_prompt":"The role of first msg should be user",
2607
+ "roles":[
2608
+ "user",
2609
+ "assistant"
2610
+ ]
2611
+ }
2612
+ },
2613
+ {
2614
+ "version": 1,
2615
+ "context_length": 4096,
2616
+ "model_name": "minicpm-2b-sft-bf16",
2617
+ "model_lang": [
2618
+ "zh"
2619
+ ],
2620
+ "model_ability": [
2621
+ "chat"
2622
+ ],
2623
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2624
+ "model_specs": [
2625
+ {
2626
+ "model_format": "pytorch",
2627
+ "model_size_in_billions": 2,
2628
+ "quantizations": [
2629
+ "none"
2630
+ ],
2631
+ "model_hub": "modelscope",
2632
+ "model_id": "OpenBMB/miniCPM-bf16",
2633
+ "model_revision": "master"
2634
+ }
2635
+ ],
2636
+ "prompt_style": {
2637
+ "style_name": "MINICPM-2B",
2638
+ "system_prompt": "",
2639
+ "roles": [
2640
+ "user",
2641
+ "assistant"
2642
+ ],
2643
+ "stop_token_ids": [
2644
+ 1,
2645
+ 2
2646
+ ],
2647
+ "stop": [
2648
+ "<s>",
2649
+ "</s>"
2650
+ ]
2651
+ }
2652
+ },
2653
+ {
2654
+ "version": 1,
2655
+ "context_length": 4096,
2656
+ "model_name": "minicpm-2b-sft-fp32",
2657
+ "model_lang": [
2658
+ "zh"
2659
+ ],
2660
+ "model_ability": [
2661
+ "chat"
2662
+ ],
2663
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2664
+ "model_specs": [
2665
+ {
2666
+ "model_format": "pytorch",
2667
+ "model_size_in_billions": 2,
2668
+ "quantizations": [
2669
+ "none"
2670
+ ],
2671
+ "model_hub": "modelscope",
2672
+ "model_id": "OpenBMB/MiniCPM-2B-sft-fp32",
2673
+ "model_revision": "master"
2674
+ }
2675
+ ],
2676
+ "prompt_style": {
2677
+ "style_name": "MINICPM-2B",
2678
+ "system_prompt": "",
2679
+ "roles": [
2680
+ "user",
2681
+ "assistant"
2682
+ ],
2683
+ "stop_token_ids": [
2684
+ 1,
2685
+ 2
2686
+ ],
2687
+ "stop": [
2688
+ "<s>",
2689
+ "</s>"
2690
+ ]
2691
+ }
2692
+ },
2693
+ {
2694
+ "version": 1,
2695
+ "context_length": 4096,
2696
+ "model_name": "minicpm-2b-dpo-bf16",
2697
+ "model_lang": [
2698
+ "zh"
2699
+ ],
2700
+ "model_ability": [
2701
+ "chat"
2702
+ ],
2703
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2704
+ "model_specs": [
2705
+ {
2706
+ "model_format": "pytorch",
2707
+ "model_size_in_billions": 2,
2708
+ "quantizations": [
2709
+ "none"
2710
+ ],
2711
+ "model_hub": "modelscope",
2712
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-bf16",
2713
+ "model_revision": "master"
2714
+ }
2715
+ ],
2716
+ "prompt_style": {
2717
+ "style_name": "MINICPM-2B",
2718
+ "system_prompt": "",
2719
+ "roles": [
2720
+ "user",
2721
+ "assistant"
2722
+ ],
2723
+ "stop_token_ids": [
2724
+ 1,
2725
+ 2
2726
+ ],
2727
+ "stop": [
2728
+ "<s>",
2729
+ "</s>"
2730
+ ]
2731
+ }
2732
+ },
2733
+ {
2734
+ "version": 1,
2735
+ "context_length": 4096,
2736
+ "model_name": "minicpm-2b-dpo-fp16",
2737
+ "model_lang": [
2738
+ "zh"
2739
+ ],
2740
+ "model_ability": [
2741
+ "chat"
2742
+ ],
2743
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2744
+ "model_specs": [
2745
+ {
2746
+ "model_format": "pytorch",
2747
+ "model_size_in_billions": 2,
2748
+ "quantizations": [
2749
+ "none"
2750
+ ],
2751
+ "model_hub": "modelscope",
2752
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-fp16",
2753
+ "model_revision": "master"
2754
+ }
2755
+ ],
2756
+ "prompt_style": {
2757
+ "style_name": "MINICPM-2B",
2758
+ "system_prompt": "",
2759
+ "roles": [
2760
+ "user",
2761
+ "assistant"
2762
+ ],
2763
+ "stop_token_ids": [
2764
+ 1,
2765
+ 2
2766
+ ],
2767
+ "stop": [
2768
+ "<s>",
2769
+ "</s>"
2770
+ ]
2771
+ }
2772
+ },
2773
+ {
2774
+ "version": 1,
2775
+ "context_length": 4096,
2776
+ "model_name": "minicpm-2b-dpo-fp32",
2777
+ "model_lang": [
2778
+ "zh"
2779
+ ],
2780
+ "model_ability": [
2781
+ "chat"
2782
+ ],
2783
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2784
+ "model_specs": [
2785
+ {
2786
+ "model_format": "pytorch",
2787
+ "model_size_in_billions": 2,
2788
+ "quantizations": [
2789
+ "none"
2790
+ ],
2791
+ "model_hub": "modelscope",
2792
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-fp32",
2793
+ "model_revision": "master"
2794
+ }
2795
+ ],
2796
+ "prompt_style": {
2797
+ "style_name": "MINICPM-2B",
2798
+ "system_prompt": "",
2799
+ "roles": [
2800
+ "user",
2801
+ "assistant"
2802
+ ],
2803
+ "stop_token_ids": [
2804
+ 1,
2805
+ 2
2806
+ ],
2807
+ "stop": [
2808
+ "<s>",
2809
+ "</s>"
2810
+ ]
2811
+ }
2812
+ },
2813
+ {
2814
+ "version": 1,
2815
+ "context_length": 2048,
2816
+ "model_name": "aquila2",
2817
+ "model_lang": [
2818
+ "zh"
2819
+ ],
2820
+ "model_ability": [
2821
+ "generate"
2822
+ ],
2823
+ "model_description": "Aquila2 series models are the base language models",
2824
+ "model_specs": [
2825
+ {
2826
+ "model_format": "pytorch",
2827
+ "model_size_in_billions": 34,
2828
+ "quantizations": [
2829
+ "none"
2830
+ ],
2831
+ "model_hub": "modelscope",
2832
+ "model_id": "BAAI/Aquila2-34B",
2833
+ "model_revision": "master"
2834
+ },
2835
+ {
2836
+ "model_format": "pytorch",
2837
+ "model_size_in_billions": 70,
2838
+ "quantizations": [
2839
+ "none"
2840
+ ],
2841
+ "model_hub": "modelscope",
2842
+ "model_id": "BAAI/Aquila2-70B-Expr",
2843
+ "model_revision": "master"
2844
+ }
2845
+ ]
2846
+ },
2847
+ {
2848
+ "version": 1,
2849
+ "context_length": 2048,
2850
+ "model_name": "aquila2-chat",
2851
+ "model_lang": [
2852
+ "zh"
2853
+ ],
2854
+ "model_ability": [
2855
+ "generate"
2856
+ ],
2857
+ "model_description": "Aquila2-chat series models are the chat models",
2858
+ "model_specs": [
2859
+ {
2860
+ "model_format": "pytorch",
2861
+ "model_size_in_billions": 34,
2862
+ "quantizations": [
2863
+ "none"
2864
+ ],
2865
+ "model_hub": "modelscope",
2866
+ "model_id": "BAAI/AquilaChat2-34B",
2867
+ "model_revision": "master"
2868
+ },
2869
+ {
2870
+ "model_format": "gptq",
2871
+ "model_size_in_billions": 34,
2872
+ "quantizations": [
2873
+ "Int4"
2874
+ ],
2875
+ "model_hub": "modelscope",
2876
+ "model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
2877
+ "model_revision": "master"
2878
+ },
2879
+ {
2880
+ "model_format": "pytorch",
2881
+ "model_size_in_billions": 70,
2882
+ "quantizations": [
2883
+ "none"
2884
+ ],
2885
+ "model_hub": "modelscope",
2886
+ "model_id": "BAAI/AquilaChat2-70B-Expr",
2887
+ "model_revision": "master"
2888
+ }
2889
+ ],
2890
+ "prompt_style": {
2891
+ "style_name": "ADD_COLON_SINGLE",
2892
+ "intra_message_sep": "\n",
2893
+ "system_prompt": "",
2894
+ "roles": [
2895
+ "USER",
2896
+ "ASSISTANT"
2897
+ ],
2898
+ "stop_token_ids": [
2899
+ 100006,
2900
+ 100007
2901
+ ],
2902
+ "stop": [
2903
+ "[CLS]",
2904
+ "</s>"
2905
+ ]
2906
+ }
2907
+ },
2908
+ {
2909
+ "version": 1,
2910
+ "context_length": 16384,
2911
+ "model_name": "aquila2-chat-16k",
2912
+ "model_lang": [
2913
+ "zh"
2914
+ ],
2915
+ "model_ability": [
2916
+ "generate"
2917
+ ],
2918
+ "model_description": "AquilaChat2-16k series models are the long-text chat models",
2919
+ "model_specs": [
2920
+ {
2921
+ "model_format": "pytorch",
2922
+ "model_size_in_billions": 34,
2923
+ "quantizations": [
2924
+ "none"
2925
+ ],
2926
+ "model_hub": "modelscope",
2927
+ "model_id": "BAAI/AquilaChat2-34B-16K",
2928
+ "model_revision": "master"
2929
+ }
2930
+ ],
2931
+ "prompt_style": {
2932
+ "style_name": "ADD_COLON_SINGLE",
2933
+ "intra_message_sep": "\n",
2934
+ "system_prompt": "",
2935
+ "roles": [
2936
+ "USER",
2937
+ "ASSISTANT"
2938
+ ],
2939
+ "stop_token_ids": [
2940
+ 100006,
2941
+ 100007
2942
+ ],
2943
+ "stop": [
2944
+ "[CLS]",
2945
+ "</s>"
2946
+ ]
2947
+ }
2477
2948
  }
2478
2949
  ]
@@ -465,7 +465,9 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
465
465
  "llama-2-chat",
466
466
  "internlm2-chat",
467
467
  "qwen-vl-chat",
468
+ "OmniLMM",
468
469
  "yi-vl-chat",
470
+ "deepseek-vl-chat",
469
471
  ]:
470
472
  return False
471
473
  if "chat" not in llm_family.model_ability: