xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (101) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +56 -8
  3. xinference/client/restful/restful_client.py +49 -4
  4. xinference/core/model.py +36 -4
  5. xinference/core/scheduler.py +2 -0
  6. xinference/core/supervisor.py +132 -15
  7. xinference/core/worker.py +239 -53
  8. xinference/deploy/cmdline.py +5 -0
  9. xinference/deploy/utils.py +33 -2
  10. xinference/model/audio/chattts.py +6 -6
  11. xinference/model/audio/core.py +23 -15
  12. xinference/model/core.py +12 -3
  13. xinference/model/embedding/core.py +25 -16
  14. xinference/model/flexible/__init__.py +40 -0
  15. xinference/model/flexible/core.py +228 -0
  16. xinference/model/flexible/launchers/__init__.py +15 -0
  17. xinference/model/flexible/launchers/transformers_launcher.py +63 -0
  18. xinference/model/flexible/utils.py +33 -0
  19. xinference/model/image/core.py +18 -14
  20. xinference/model/image/custom.py +1 -1
  21. xinference/model/llm/__init__.py +5 -2
  22. xinference/model/llm/core.py +3 -2
  23. xinference/model/llm/ggml/llamacpp.py +1 -10
  24. xinference/model/llm/llm_family.json +292 -36
  25. xinference/model/llm/llm_family.py +102 -53
  26. xinference/model/llm/llm_family_modelscope.json +247 -27
  27. xinference/model/llm/mlx/__init__.py +13 -0
  28. xinference/model/llm/mlx/core.py +408 -0
  29. xinference/model/llm/pytorch/chatglm.py +2 -9
  30. xinference/model/llm/pytorch/cogvlm2.py +206 -21
  31. xinference/model/llm/pytorch/core.py +213 -120
  32. xinference/model/llm/pytorch/glm4v.py +171 -15
  33. xinference/model/llm/pytorch/qwen_vl.py +168 -7
  34. xinference/model/llm/pytorch/utils.py +53 -62
  35. xinference/model/llm/utils.py +28 -7
  36. xinference/model/rerank/core.py +29 -25
  37. xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
  38. xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
  39. xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
  40. xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
  41. xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
  42. xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
  43. xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
  44. xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
  45. xinference/types.py +0 -1
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
  49. xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
  65. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
  66. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
  67. xinference/model/llm/ggml/chatglm.py +0 -457
  68. xinference/thirdparty/ChatTTS/__init__.py +0 -1
  69. xinference/thirdparty/ChatTTS/core.py +0 -200
  70. xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
  71. xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
  72. xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
  73. xinference/thirdparty/ChatTTS/infer/api.py +0 -125
  74. xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
  75. xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
  76. xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
  77. xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
  78. xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
  79. xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
  80. xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
  81. xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
  82. xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
  97. /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
  98. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
  99. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
  100. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
  101. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
@@ -304,21 +304,6 @@
304
304
  ],
305
305
  "model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
306
306
  "model_specs": [
307
- {
308
- "model_format": "ggmlv3",
309
- "model_size_in_billions": 6,
310
- "quantizations": [
311
- "q4_0",
312
- "q4_1",
313
- "q5_0",
314
- "q5_1",
315
- "q8_0"
316
- ],
317
- "model_hub": "modelscope",
318
- "model_id": "Xorbits/chatglm2-6B-GGML",
319
- "model_revision": "v1.0.0",
320
- "model_file_name_template": "chatglm2-ggml-{quantization}.bin"
321
- },
322
307
  {
323
308
  "model_format": "pytorch",
324
309
  "model_size_in_billions": 6,
@@ -392,17 +377,6 @@
392
377
  ],
393
378
  "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
394
379
  "model_specs": [
395
- {
396
- "model_format": "ggmlv3",
397
- "model_size_in_billions": 6,
398
- "quantizations": [
399
- "q4_0"
400
- ],
401
- "model_hub": "modelscope",
402
- "model_id": "Xorbits/chatglm3-ggml",
403
- "model_revision": "v1.0.0",
404
- "model_file_name_template": "chatglm3-ggml-{quantization}.bin"
405
- },
406
380
  {
407
381
  "model_format": "pytorch",
408
382
  "model_size_in_billions": 6,
@@ -547,6 +521,33 @@
547
521
  "model_hub": "modelscope",
548
522
  "model_id": "ZhipuAI/glm-4-9b-chat",
549
523
  "model_revision": "master"
524
+ },
525
+ {
526
+ "model_format": "ggufv2",
527
+ "model_size_in_billions": 9,
528
+ "quantizations": [
529
+ "Q2_K",
530
+ "IQ3_XS",
531
+ "IQ3_S",
532
+ "IQ3_M",
533
+ "Q3_K_S",
534
+ "Q3_K_L",
535
+ "Q3_K",
536
+ "IQ4_XS",
537
+ "IQ4_NL",
538
+ "Q4_K_S",
539
+ "Q4_K",
540
+ "Q5_K_S",
541
+ "Q5_K",
542
+ "Q6_K",
543
+ "Q8_0",
544
+ "BF16",
545
+ "FP16"
546
+ ],
547
+ "model_file_name_template": "glm-4-9b-chat.{quantization}.gguf",
548
+ "model_hub": "modelscope",
549
+ "model_id": "LLM-Research/glm-4-9b-chat-GGUF",
550
+ "model_revision": "master"
550
551
  }
551
552
  ],
552
553
  "prompt_style": {
@@ -593,6 +594,33 @@
593
594
  "model_hub": "modelscope",
594
595
  "model_id": "ZhipuAI/glm-4-9b-chat-1m",
595
596
  "model_revision": "master"
597
+ },
598
+ {
599
+ "model_format": "ggufv2",
600
+ "model_size_in_billions": 9,
601
+ "quantizations": [
602
+ "Q2_K",
603
+ "IQ3_XS",
604
+ "IQ3_S",
605
+ "IQ3_M",
606
+ "Q3_K_S",
607
+ "Q3_K_L",
608
+ "Q3_K",
609
+ "IQ4_XS",
610
+ "IQ4_NL",
611
+ "Q4_K_S",
612
+ "Q4_K",
613
+ "Q5_K_S",
614
+ "Q5_K",
615
+ "Q6_K",
616
+ "Q8_0",
617
+ "BF16",
618
+ "FP16"
619
+ ],
620
+ "model_file_name_template": "glm-4-9b-chat-1m.{quantization}.gguf",
621
+ "model_hub": "modelscope",
622
+ "model_id": "LLM-Research/glm-4-9b-chat-1m-GGUF",
623
+ "model_revision": "master"
596
624
  }
597
625
  ],
598
626
  "prompt_style": {
@@ -2921,6 +2949,33 @@
2921
2949
  "model_id": "qwen/Qwen2-72B-Instruct-AWQ",
2922
2950
  "model_hub": "modelscope"
2923
2951
  },
2952
+ {
2953
+ "model_format": "mlx",
2954
+ "model_size_in_billions": "0_5",
2955
+ "quantizations": [
2956
+ "4-bit"
2957
+ ],
2958
+ "model_id": "qwen/Qwen2-0.5B-Instruct-MLX",
2959
+ "model_hub": "modelscope"
2960
+ },
2961
+ {
2962
+ "model_format": "mlx",
2963
+ "model_size_in_billions": "1_5",
2964
+ "quantizations": [
2965
+ "4-bit"
2966
+ ],
2967
+ "model_id": "qwen/Qwen2-1.5B-Instruct-MLX",
2968
+ "model_hub": "modelscope"
2969
+ },
2970
+ {
2971
+ "model_format": "mlx",
2972
+ "model_size_in_billions": 7,
2973
+ "quantizations": [
2974
+ "4-bit"
2975
+ ],
2976
+ "model_id": "qwen/Qwen2-7B-Instruct-MLX",
2977
+ "model_hub": "modelscope"
2978
+ },
2924
2979
  {
2925
2980
  "model_format": "ggufv2",
2926
2981
  "model_size_in_billions": "0_5",
@@ -2938,6 +2993,85 @@
2938
2993
  "model_id": "qwen/Qwen2-0.5B-Instruct-GGUF",
2939
2994
  "model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf",
2940
2995
  "model_hub": "modelscope"
2996
+ },
2997
+ {
2998
+ "model_format": "ggufv2",
2999
+ "model_size_in_billions": "1_5",
3000
+ "quantizations": [
3001
+ "q2_k",
3002
+ "q3_k_m",
3003
+ "q4_0",
3004
+ "q4_k_m",
3005
+ "q5_0",
3006
+ "q5_k_m",
3007
+ "q6_k",
3008
+ "q8_0",
3009
+ "fp16"
3010
+ ],
3011
+ "model_id": "qwen/Qwen2-1.5B-Instruct-GGUF",
3012
+ "model_file_name_template": "qwen2-1_5b-instruct-{quantization}.gguf",
3013
+ "model_hub": "modelscope"
3014
+ },
3015
+ {
3016
+ "model_format": "ggufv2",
3017
+ "model_size_in_billions": 7,
3018
+ "quantizations": [
3019
+ "q2_k",
3020
+ "q3_k_m",
3021
+ "q4_0",
3022
+ "q4_k_m",
3023
+ "q5_0",
3024
+ "q5_k_m",
3025
+ "q6_k",
3026
+ "q8_0",
3027
+ "fp16"
3028
+ ],
3029
+ "model_id": "qwen/Qwen2-7B-Instruct-GGUF",
3030
+ "model_file_name_template": "qwen2-7b-instruct-{quantization}.gguf",
3031
+ "model_hub": "modelscope"
3032
+ },
3033
+ {
3034
+ "model_format": "ggufv2",
3035
+ "model_size_in_billions": 72,
3036
+ "quantizations": [
3037
+ "q2_k",
3038
+ "q3_k_m",
3039
+ "q4_0",
3040
+ "q4_k_m",
3041
+ "q5_0",
3042
+ "q5_k_m",
3043
+ "q6_k",
3044
+ "q8_0",
3045
+ "fp16"
3046
+ ],
3047
+ "model_id": "qwen/Qwen2-72B-Instruct-GGUF",
3048
+ "model_hub": "modelscope",
3049
+ "model_file_name_template": "qwen2-72b-instruct-{quantization}.gguf",
3050
+ "model_file_name_split_template": "qwen2-72b-instruct-{quantization}-{part}.gguf",
3051
+ "quantization_parts": {
3052
+ "q5_0": [
3053
+ "00001-of-00002",
3054
+ "00002-of-00002"
3055
+ ],
3056
+ "q5_k_m": [
3057
+ "00001-of-00002",
3058
+ "00002-of-00002"
3059
+ ],
3060
+ "q6_k": [
3061
+ "00001-of-00002",
3062
+ "00002-of-00002"
3063
+ ],
3064
+ "q8_0": [
3065
+ "00001-of-00002",
3066
+ "00002-of-00002"
3067
+ ],
3068
+ "fp16": [
3069
+ "00001-of-00004",
3070
+ "00002-of-00004",
3071
+ "00003-of-00004",
3072
+ "00004-of-00004"
3073
+ ]
3074
+ }
2941
3075
  }
2942
3076
  ],
2943
3077
  "prompt_style": {
@@ -2993,6 +3127,35 @@
2993
3127
  ],
2994
3128
  "model_id": "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
2995
3129
  "model_hub": "modelscope"
3130
+ },
3131
+ {
3132
+ "model_format": "ggufv2",
3133
+ "model_size_in_billions": 14,
3134
+ "quantizations": [
3135
+ "q3_k_m",
3136
+ "q4_0",
3137
+ "q4_k_m",
3138
+ "q5_0",
3139
+ "q5_k_m",
3140
+ "q6_k",
3141
+ "q8_0",
3142
+ "fp16"
3143
+ ],
3144
+ "model_id": "qwen/Qwen2-57B-A14B-Instruct-GGUF",
3145
+ "model_hub": "modelscope",
3146
+ "model_file_name_template": "qwen2-57b-a14b-instruct-{quantization}.gguf",
3147
+ "model_file_name_split_template": "qwen2-57b-a14b-instruct-{quantization}-{part}.gguf",
3148
+ "quantization_parts": {
3149
+ "q8_0": [
3150
+ "00001-of-00002",
3151
+ "00002-of-00002"
3152
+ ],
3153
+ "fp16": [
3154
+ "00001-of-00003",
3155
+ "00002-of-00003",
3156
+ "00003-of-00003"
3157
+ ]
3158
+ }
2996
3159
  }
2997
3160
  ],
2998
3161
  "prompt_style": {
@@ -3402,6 +3565,16 @@
3402
3565
  "roles": [
3403
3566
  "user",
3404
3567
  "assistant"
3568
+ ],
3569
+ "stop_token_ids": [
3570
+ 151643,
3571
+ 151644,
3572
+ 151645
3573
+ ],
3574
+ "stop": [
3575
+ "<|endoftext|>",
3576
+ "<|im_start|>",
3577
+ "<|im_end|>"
3405
3578
  ]
3406
3579
  }
3407
3580
  },
@@ -3593,6 +3766,53 @@
3593
3766
  ]
3594
3767
  }
3595
3768
  },
3769
+ {
3770
+ "version": 1,
3771
+ "context_length": 8192,
3772
+ "model_name": "gemma-2-it",
3773
+ "model_lang": [
3774
+ "en"
3775
+ ],
3776
+ "model_ability": [
3777
+ "chat"
3778
+ ],
3779
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
3780
+ "model_specs": [
3781
+ {
3782
+ "model_format": "pytorch",
3783
+ "model_size_in_billions": 9,
3784
+ "quantizations": [
3785
+ "none",
3786
+ "4-bit",
3787
+ "8-bit"
3788
+ ],
3789
+ "model_id": "AI-ModelScope/gemma-2-9b-it",
3790
+ "model_hub": "modelscope"
3791
+ },
3792
+ {
3793
+ "model_format": "pytorch",
3794
+ "model_size_in_billions": 27,
3795
+ "quantizations": [
3796
+ "none",
3797
+ "4-bit",
3798
+ "8-bit"
3799
+ ],
3800
+ "model_id": "AI-ModelScope/gemma-2-27b-it",
3801
+ "model_hub": "modelscope"
3802
+ }
3803
+ ],
3804
+ "prompt_style": {
3805
+ "style_name": "gemma",
3806
+ "roles": [
3807
+ "user",
3808
+ "model"
3809
+ ],
3810
+ "stop": [
3811
+ "<end_of_turn>",
3812
+ "<start_of_turn>"
3813
+ ]
3814
+ }
3815
+ },
3596
3816
  {
3597
3817
  "version":1,
3598
3818
  "context_length":2048,
@@ -3923,7 +4143,7 @@
3923
4143
  "zh"
3924
4144
  ],
3925
4145
  "model_ability": [
3926
- "generate"
4146
+ "chat"
3927
4147
  ],
3928
4148
  "model_description": "Aquila2-chat series models are the chat models",
3929
4149
  "model_specs": [
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.