xinference 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (136) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/async_restful_client.py +8 -13
  3. xinference/client/restful/restful_client.py +6 -2
  4. xinference/core/chat_interface.py +6 -4
  5. xinference/core/media_interface.py +5 -0
  6. xinference/core/model.py +1 -5
  7. xinference/core/supervisor.py +117 -68
  8. xinference/core/worker.py +49 -37
  9. xinference/deploy/test/test_cmdline.py +2 -6
  10. xinference/model/audio/__init__.py +26 -23
  11. xinference/model/audio/chattts.py +3 -2
  12. xinference/model/audio/core.py +49 -98
  13. xinference/model/audio/cosyvoice.py +3 -2
  14. xinference/model/audio/custom.py +28 -73
  15. xinference/model/audio/f5tts.py +3 -2
  16. xinference/model/audio/f5tts_mlx.py +3 -2
  17. xinference/model/audio/fish_speech.py +3 -2
  18. xinference/model/audio/funasr.py +17 -4
  19. xinference/model/audio/kokoro.py +3 -2
  20. xinference/model/audio/megatts.py +3 -2
  21. xinference/model/audio/melotts.py +3 -2
  22. xinference/model/audio/model_spec.json +572 -171
  23. xinference/model/audio/utils.py +0 -6
  24. xinference/model/audio/whisper.py +3 -2
  25. xinference/model/audio/whisper_mlx.py +3 -2
  26. xinference/model/cache_manager.py +141 -0
  27. xinference/model/core.py +6 -49
  28. xinference/model/custom.py +174 -0
  29. xinference/model/embedding/__init__.py +67 -56
  30. xinference/model/embedding/cache_manager.py +35 -0
  31. xinference/model/embedding/core.py +104 -84
  32. xinference/model/embedding/custom.py +55 -78
  33. xinference/model/embedding/embed_family.py +80 -31
  34. xinference/model/embedding/flag/core.py +21 -5
  35. xinference/model/embedding/llama_cpp/__init__.py +0 -0
  36. xinference/model/embedding/llama_cpp/core.py +234 -0
  37. xinference/model/embedding/model_spec.json +968 -103
  38. xinference/model/embedding/sentence_transformers/core.py +30 -20
  39. xinference/model/embedding/vllm/core.py +11 -5
  40. xinference/model/flexible/__init__.py +8 -2
  41. xinference/model/flexible/core.py +26 -119
  42. xinference/model/flexible/custom.py +69 -0
  43. xinference/model/flexible/launchers/image_process_launcher.py +1 -0
  44. xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
  45. xinference/model/flexible/launchers/transformers_launcher.py +15 -3
  46. xinference/model/flexible/launchers/yolo_launcher.py +5 -1
  47. xinference/model/image/__init__.py +20 -20
  48. xinference/model/image/cache_manager.py +62 -0
  49. xinference/model/image/core.py +70 -182
  50. xinference/model/image/custom.py +28 -72
  51. xinference/model/image/model_spec.json +402 -119
  52. xinference/model/image/ocr/got_ocr2.py +3 -2
  53. xinference/model/image/stable_diffusion/core.py +22 -7
  54. xinference/model/image/stable_diffusion/mlx.py +6 -6
  55. xinference/model/image/utils.py +2 -2
  56. xinference/model/llm/__init__.py +71 -94
  57. xinference/model/llm/cache_manager.py +292 -0
  58. xinference/model/llm/core.py +37 -111
  59. xinference/model/llm/custom.py +88 -0
  60. xinference/model/llm/llama_cpp/core.py +5 -7
  61. xinference/model/llm/llm_family.json +16260 -8151
  62. xinference/model/llm/llm_family.py +138 -839
  63. xinference/model/llm/lmdeploy/core.py +5 -7
  64. xinference/model/llm/memory.py +3 -4
  65. xinference/model/llm/mlx/core.py +6 -8
  66. xinference/model/llm/reasoning_parser.py +3 -1
  67. xinference/model/llm/sglang/core.py +32 -14
  68. xinference/model/llm/transformers/chatglm.py +3 -7
  69. xinference/model/llm/transformers/core.py +49 -27
  70. xinference/model/llm/transformers/deepseek_v2.py +2 -2
  71. xinference/model/llm/transformers/gemma3.py +2 -2
  72. xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
  73. xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
  74. xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
  75. xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
  76. xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
  77. xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
  78. xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
  79. xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
  80. xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
  81. xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
  82. xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
  83. xinference/model/llm/transformers/opt.py +3 -7
  84. xinference/model/llm/utils.py +34 -49
  85. xinference/model/llm/vllm/core.py +77 -27
  86. xinference/model/llm/vllm/xavier/engine.py +5 -3
  87. xinference/model/llm/vllm/xavier/scheduler.py +10 -6
  88. xinference/model/llm/vllm/xavier/transfer.py +1 -1
  89. xinference/model/rerank/__init__.py +26 -25
  90. xinference/model/rerank/core.py +47 -87
  91. xinference/model/rerank/custom.py +25 -71
  92. xinference/model/rerank/model_spec.json +158 -33
  93. xinference/model/rerank/utils.py +2 -2
  94. xinference/model/utils.py +115 -54
  95. xinference/model/video/__init__.py +13 -17
  96. xinference/model/video/core.py +44 -102
  97. xinference/model/video/diffusers.py +4 -3
  98. xinference/model/video/model_spec.json +90 -21
  99. xinference/types.py +5 -3
  100. xinference/web/ui/build/asset-manifest.json +3 -3
  101. xinference/web/ui/build/index.html +1 -1
  102. xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
  103. xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
  109. xinference/web/ui/src/locales/en.json +0 -1
  110. xinference/web/ui/src/locales/ja.json +0 -1
  111. xinference/web/ui/src/locales/ko.json +0 -1
  112. xinference/web/ui/src/locales/zh.json +0 -1
  113. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
  114. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
  115. xinference/model/audio/model_spec_modelscope.json +0 -231
  116. xinference/model/embedding/model_spec_modelscope.json +0 -293
  117. xinference/model/embedding/utils.py +0 -18
  118. xinference/model/image/model_spec_modelscope.json +0 -375
  119. xinference/model/llm/llama_cpp/memory.py +0 -457
  120. xinference/model/llm/llm_family_csghub.json +0 -56
  121. xinference/model/llm/llm_family_modelscope.json +0 -8700
  122. xinference/model/llm/llm_family_openmind_hub.json +0 -1019
  123. xinference/model/rerank/model_spec_modelscope.json +0 -85
  124. xinference/model/video/model_spec_modelscope.json +0 -184
  125. xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
  126. xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
  132. /xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
  133. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
  134. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
  135. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
  136. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0
@@ -1,375 +0,0 @@
1
- [
2
- {
3
- "model_name": "FLUX.1-schnell",
4
- "model_family": "stable_diffusion",
5
- "model_hub": "modelscope",
6
- "model_id": "AI-ModelScope/FLUX.1-schnell",
7
- "model_revision": "master",
8
- "model_ability": [
9
- "text2image",
10
- "image2image",
11
- "inpainting"
12
- ],
13
- "default_model_config": {
14
- "quantize": true,
15
- "quantize_text_encoder": "text_encoder_2",
16
- "torch_dtype": "bfloat16"
17
- },
18
- "gguf_model_id": "Xorbits/FLUX.1-schnell-gguf",
19
- "gguf_quantizations": [
20
- "F16",
21
- "Q2_K",
22
- "Q3_K_S",
23
- "Q4_0",
24
- "Q4_1",
25
- "Q4_K_S",
26
- "Q5_0",
27
- "Q5_1",
28
- "Q5_K_S",
29
- "Q6_K",
30
- "Q8_0"
31
- ],
32
- "gguf_model_file_name_template": "flux1-schnell-{quantization}.gguf"
33
- },
34
- {
35
- "model_name": "FLUX.1-dev",
36
- "model_family": "stable_diffusion",
37
- "model_hub": "modelscope",
38
- "model_id": "AI-ModelScope/FLUX.1-dev",
39
- "model_revision": "master",
40
- "model_ability": [
41
- "text2image",
42
- "image2image",
43
- "inpainting"
44
- ],
45
- "default_model_config": {
46
- "quantize": true,
47
- "quantize_text_encoder": "text_encoder_2",
48
- "torch_dtype": "bfloat16"
49
- },
50
- "gguf_model_id": "AI-ModelScope/FLUX.1-dev-gguf",
51
- "gguf_quantizations": [
52
- "F16",
53
- "Q2_K",
54
- "Q3_K_S",
55
- "Q4_0",
56
- "Q4_1",
57
- "Q4_K_S",
58
- "Q5_0",
59
- "Q5_1",
60
- "Q5_K_S",
61
- "Q6_K",
62
- "Q8_0"
63
- ],
64
- "gguf_model_file_name_template": "flux1-dev-{quantization}.gguf"
65
- },
66
- {
67
- "model_name": "sd3-medium",
68
- "model_family": "stable_diffusion",
69
- "model_hub": "modelscope",
70
- "model_id": "AI-ModelScope/stable-diffusion-3-medium-diffusers",
71
- "model_revision": "master",
72
- "model_ability": [
73
- "text2image",
74
- "image2image",
75
- "inpainting"
76
- ],
77
- "default_model_config": {
78
- "quantize": true,
79
- "quantize_text_encoder": "text_encoder_3"
80
- }
81
- },
82
- {
83
- "model_name": "sd3.5-medium",
84
- "model_family": "stable_diffusion",
85
- "model_hub": "modelscope",
86
- "model_id": "AI-ModelScope/stable-diffusion-3.5-medium",
87
- "model_revision": "master",
88
- "model_ability": [
89
- "text2image",
90
- "image2image",
91
- "inpainting"
92
- ],
93
- "default_model_config": {
94
- "quantize": true,
95
- "quantize_text_encoder": "text_encoder_3",
96
- "torch_dtype": "bfloat16"
97
- },
98
- "gguf_model_id": "Xorbits/stable-diffusion-3.5-medium-gguf",
99
- "gguf_quantizations": [
100
- "F16",
101
- "Q3_K_M",
102
- "Q3_K_S",
103
- "Q4_0",
104
- "Q4_1",
105
- "Q4_K_M",
106
- "Q4_K_S",
107
- "Q5_0",
108
- "Q5_1",
109
- "Q5_K_M",
110
- "Q5_K_S",
111
- "Q6_K",
112
- "Q8_0"
113
- ],
114
- "gguf_model_file_name_template": "sd3.5_medium-{quantization}.gguf"
115
- },
116
- {
117
- "model_name": "sd3.5-large",
118
- "model_family": "stable_diffusion",
119
- "model_hub": "modelscope",
120
- "model_id": "AI-ModelScope/stable-diffusion-3.5-large",
121
- "model_revision": "master",
122
- "model_ability": [
123
- "text2image",
124
- "image2image",
125
- "inpainting"
126
- ],
127
- "default_model_config": {
128
- "quantize": true,
129
- "quantize_text_encoder": "text_encoder_3",
130
- "torch_dtype": "bfloat16",
131
- "transformer_quantization": "nf4"
132
- },
133
- "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-gguf",
134
- "gguf_quantizations": [
135
- "F16",
136
- "Q4_0",
137
- "Q4_1",
138
- "Q5_0",
139
- "Q5_1",
140
- "Q8_0"
141
- ],
142
- "gguf_model_file_name_template": "sd3.5_large-{quantization}.gguf"
143
- },
144
- {
145
- "model_name": "sd3.5-large-turbo",
146
- "model_family": "stable_diffusion",
147
- "model_hub": "modelscope",
148
- "model_id": "AI-ModelScope/stable-diffusion-3.5-large-turbo",
149
- "model_revision": "master",
150
- "model_ability": [
151
- "text2image",
152
- "image2image",
153
- "inpainting"
154
- ],
155
- "default_model_config": {
156
- "quantize": true,
157
- "quantize_text_encoder": "text_encoder_3",
158
- "torch_dtype": "bfloat16",
159
- "transformer_quantization": "nf4"
160
- },
161
- "default_generate_config": {
162
- "guidance_scale": 1.0,
163
- "num_inference_steps": 4
164
- },
165
- "gguf_model_id": "Xorbits/stable-diffusion-3.5-large-turbo-gguf",
166
- "gguf_quantizations": [
167
- "F16",
168
- "Q4_0",
169
- "Q4_1",
170
- "Q5_0",
171
- "Q5_1",
172
- "Q8_0"
173
- ],
174
- "gguf_model_file_name_template": "sd3.5_large_turbo-{quantization}.gguf"
175
- },
176
- {
177
- "model_name": "HunyuanDiT-v1.2",
178
- "model_family": "stable_diffusion",
179
- "model_hub": "modelscope",
180
- "model_id": "Xorbits/HunyuanDiT-v1.2-Diffusers",
181
- "model_revision": "master",
182
- "model_ability": [
183
- "text2image"
184
- ]
185
- },
186
- {
187
- "model_name": "HunyuanDiT-v1.2-Distilled",
188
- "model_family": "stable_diffusion",
189
- "model_hub": "modelscope",
190
- "model_id": "Xorbits/HunyuanDiT-v1.2-Diffusers-Distilled",
191
- "model_revision": "master",
192
- "model_ability": [
193
- "text2image"
194
- ]
195
- },
196
- {
197
- "model_name": "sd-turbo",
198
- "model_family": "stable_diffusion",
199
- "model_hub": "modelscope",
200
- "model_id": "AI-ModelScope/sd-turbo",
201
- "model_revision": "master",
202
- "model_ability": [
203
- "text2image"
204
- ],
205
- "default_generate_config": {
206
- "guidance_scale": 0.0,
207
- "num_inference_steps": 1
208
- }
209
- },
210
- {
211
- "model_name": "sdxl-turbo",
212
- "model_family": "stable_diffusion",
213
- "model_hub": "modelscope",
214
- "model_id": "AI-ModelScope/sdxl-turbo",
215
- "model_revision": "master",
216
- "model_ability": [
217
- "text2image"
218
- ],
219
- "default_generate_config": {
220
- "guidance_scale": 0.0,
221
- "num_inference_steps": 1
222
- }
223
- },
224
- {
225
- "model_name": "stable-diffusion-v1.5",
226
- "model_family": "stable_diffusion",
227
- "model_hub": "modelscope",
228
- "model_id": "AI-ModelScope/stable-diffusion-v1-5",
229
- "model_revision": "master",
230
- "model_ability": [
231
- "text2image",
232
- "image2image"
233
- ],
234
- "controlnet": [
235
- {
236
- "model_name":"canny",
237
- "model_family": "controlnet",
238
- "model_id": "lllyasviel/sd-controlnet-canny",
239
- "model_revision": "7f2f69197050967007f6bbd23ab5e52f0384162a"
240
- },
241
- {
242
- "model_name":"mlsd",
243
- "model_family": "controlnet",
244
- "model_id": "lllyasviel/sd-controlnet-mlsd",
245
- "model_revision": "456fc1af4dc8f7d5fa6c949c5040d3828a24e3ad"
246
- },
247
- {
248
- "model_name":"hed",
249
- "model_family": "controlnet",
250
- "model_id": "lllyasviel/sd-controlnet-hed",
251
- "model_revision": "04473d9334ab44908daa66107bbfb6f710aa056d"
252
- },
253
- {
254
- "model_name":"scribble",
255
- "model_family": "controlnet",
256
- "model_id": "lllyasviel/sd-controlnet-scribble",
257
- "model_revision": "864edcd5ccc6ee2695eeebea5b4512100c83e7b3"
258
- },
259
- {
260
- "model_name":"openpose",
261
- "model_family": "controlnet",
262
- "model_id": "lllyasviel/sd-controlnet-openpose",
263
- "model_revision": "df796456519d1ac5dbe674caa0653fcc9673bca8"
264
- },
265
- {
266
- "model_name":"normal",
267
- "model_family": "controlnet",
268
- "model_id": "lllyasviel/sd-controlnet-normal",
269
- "model_revision": "1cbed9b3ca84422e4a2f23c14b9f5a114742b31d"
270
- },
271
- {
272
- "model_name":"seg",
273
- "model_family": "controlnet",
274
- "model_id": "lllyasviel/sd-controlnet-seg",
275
- "model_revision": "ecdcb5645b5099c9a7500a504fb9ab3f743c4d96"
276
- }
277
- ]
278
- },
279
- {
280
- "model_name": "stable-diffusion-xl-base-1.0",
281
- "model_family": "stable_diffusion",
282
- "model_hub": "modelscope",
283
- "model_id": "AI-ModelScope/stable-diffusion-xl-base-1.0",
284
- "model_revision": "master",
285
- "model_ability": [
286
- "text2image",
287
- "image2image"
288
- ],
289
- "controlnet": [
290
- {
291
- "model_name":"canny",
292
- "model_family": "controlnet",
293
- "model_id": "diffusers/controlnet-canny-sdxl-1.0",
294
- "model_revision": "eb115a19a10d14909256db740ed109532ab1483c"
295
- },
296
- {
297
- "model_name":"depth",
298
- "model_family": "controlnet",
299
- "model_id": "diffusers/controlnet-depth-sdxl-1.0",
300
- "model_revision": "cb2e660019b26f7d2379e26922b5b2c87253dcf7"
301
- },
302
- {
303
- "model_name":"zoe-depth",
304
- "model_family": "controlnet",
305
- "model_id": "diffusers/controlnet-zoe-depth-sdxl-1.0",
306
- "model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
307
- }
308
- ]
309
- },
310
- {
311
- "model_name": "kolors",
312
- "model_family": "stable_diffusion",
313
- "model_hub": "modelscope",
314
- "model_id": "JunHowie/Kolors-diffusers",
315
- "model_revision": "master",
316
- "model_ability": [
317
- "text2image",
318
- "image2image"
319
- ],
320
- "default_model_config": {
321
- "variant": "fp16"
322
- },
323
- "virtualenv": {
324
- "packages": [
325
- "diffusers>=0.30.0",
326
- "#system_numpy#"
327
- ]
328
- }
329
- },
330
- {
331
- "model_name": "cogview4",
332
- "model_family": "stable_diffusion",
333
- "model_hub": "modelscope",
334
- "model_id": "ZhipuAI/CogView4-6B",
335
- "model_revision": "master",
336
- "model_ability": [
337
- "text2image"
338
- ],
339
- "default_model_config": {
340
- "torch_dtype": "bfloat16"
341
- },
342
- "virtualenv": {
343
- "packages": [
344
- "diffusers>=0.33.0",
345
- "#system_numpy#"
346
- ]
347
- }
348
- },
349
- {
350
- "model_name": "GOT-OCR2_0",
351
- "model_family": "ocr",
352
- "model_id": "stepfun-ai/GOT-OCR2_0",
353
- "model_revision": "master",
354
- "model_hub": "modelscope",
355
- "model_ability": [
356
- "ocr"
357
- ],
358
- "virtualenv": {
359
- "packages": [
360
- "transformers==4.37.2",
361
- "httpx==0.24.0",
362
- "deepspeed==0.12.3",
363
- "peft==0.4.0",
364
- "tiktoken==0.6.0",
365
- "bitsandbytes==0.41.0",
366
- "scikit-learn==1.2.2",
367
- "sentencepiece==0.1.99",
368
- "einops==0.6.1",
369
- "einops-exts==0.0.4",
370
- "timm==0.6.13",
371
- "numpy==1.26.4"
372
- ]
373
- }
374
- }
375
- ]