xinference 1.8.1rc1__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (108) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +2 -1
  3. xinference/core/model.py +8 -4
  4. xinference/core/supervisor.py +2 -3
  5. xinference/core/worker.py +7 -5
  6. xinference/deploy/cmdline.py +2 -0
  7. xinference/deploy/local.py +5 -0
  8. xinference/deploy/test/test_cmdline.py +1 -1
  9. xinference/deploy/worker.py +6 -0
  10. xinference/model/audio/cosyvoice.py +0 -1
  11. xinference/model/audio/model_spec.json +44 -20
  12. xinference/model/core.py +3 -0
  13. xinference/model/embedding/flag/core.py +5 -0
  14. xinference/model/embedding/llama_cpp/core.py +22 -19
  15. xinference/model/embedding/sentence_transformers/core.py +18 -4
  16. xinference/model/embedding/vllm/core.py +36 -9
  17. xinference/model/image/cache_manager.py +56 -0
  18. xinference/model/image/core.py +9 -0
  19. xinference/model/image/model_spec.json +178 -1
  20. xinference/model/image/stable_diffusion/core.py +155 -23
  21. xinference/model/llm/cache_manager.py +17 -3
  22. xinference/model/llm/harmony.py +245 -0
  23. xinference/model/llm/llama_cpp/core.py +41 -40
  24. xinference/model/llm/llm_family.json +688 -11
  25. xinference/model/llm/llm_family.py +1 -1
  26. xinference/model/llm/sglang/core.py +108 -5
  27. xinference/model/llm/transformers/core.py +20 -18
  28. xinference/model/llm/transformers/gemma3.py +1 -1
  29. xinference/model/llm/transformers/gpt_oss.py +91 -0
  30. xinference/model/llm/transformers/multimodal/core.py +1 -1
  31. xinference/model/llm/transformers/multimodal/gemma3.py +1 -1
  32. xinference/model/llm/transformers/multimodal/glm4_1v.py +2 -2
  33. xinference/model/llm/transformers/multimodal/ovis2.py +1 -1
  34. xinference/model/llm/transformers/multimodal/qwen-omni.py +7 -8
  35. xinference/model/llm/transformers/multimodal/qwen2_vl.py +9 -6
  36. xinference/model/llm/transformers/utils.py +1 -33
  37. xinference/model/llm/utils.py +61 -7
  38. xinference/model/llm/vllm/core.py +44 -8
  39. xinference/model/rerank/__init__.py +66 -23
  40. xinference/model/rerank/cache_manager.py +35 -0
  41. xinference/model/rerank/core.py +87 -339
  42. xinference/model/rerank/custom.py +33 -8
  43. xinference/model/rerank/model_spec.json +251 -212
  44. xinference/model/rerank/rerank_family.py +137 -0
  45. xinference/model/rerank/sentence_transformers/__init__.py +13 -0
  46. xinference/model/rerank/sentence_transformers/core.py +337 -0
  47. xinference/model/rerank/vllm/__init__.py +13 -0
  48. xinference/model/rerank/vllm/core.py +156 -0
  49. xinference/model/utils.py +108 -0
  50. xinference/model/video/model_spec.json +95 -1
  51. xinference/thirdparty/cosyvoice/bin/export_jit.py +3 -4
  52. xinference/thirdparty/cosyvoice/bin/export_onnx.py +49 -126
  53. xinference/thirdparty/cosyvoice/bin/{inference.py → inference_deprecated.py} +1 -0
  54. xinference/thirdparty/cosyvoice/bin/train.py +23 -3
  55. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +8 -4
  56. xinference/thirdparty/cosyvoice/cli/frontend.py +4 -4
  57. xinference/thirdparty/cosyvoice/cli/model.py +53 -75
  58. xinference/thirdparty/cosyvoice/dataset/dataset.py +5 -18
  59. xinference/thirdparty/cosyvoice/dataset/processor.py +24 -25
  60. xinference/thirdparty/cosyvoice/flow/decoder.py +24 -433
  61. xinference/thirdparty/cosyvoice/flow/flow.py +6 -14
  62. xinference/thirdparty/cosyvoice/flow/flow_matching.py +33 -145
  63. xinference/thirdparty/cosyvoice/hifigan/generator.py +169 -1
  64. xinference/thirdparty/cosyvoice/llm/llm.py +108 -17
  65. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +14 -115
  66. xinference/thirdparty/cosyvoice/utils/common.py +20 -0
  67. xinference/thirdparty/cosyvoice/utils/executor.py +8 -4
  68. xinference/thirdparty/cosyvoice/utils/file_utils.py +45 -1
  69. xinference/thirdparty/cosyvoice/utils/losses.py +37 -0
  70. xinference/thirdparty/cosyvoice/utils/mask.py +35 -1
  71. xinference/thirdparty/cosyvoice/utils/train_utils.py +24 -6
  72. xinference/thirdparty/cosyvoice/vllm/cosyvoice2.py +103 -0
  73. xinference/types.py +2 -0
  74. xinference/ui/gradio/chat_interface.py +2 -0
  75. xinference/ui/gradio/media_interface.py +353 -7
  76. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  77. xinference/ui/web/ui/build/index.html +1 -1
  78. xinference/ui/web/ui/build/static/js/main.1086c759.js +3 -0
  79. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +1 -0
  80. xinference/ui/web/ui/node_modules/.cache/babel-loader/28012da921a51f1082549956d3ae82acd769a754b22afda9acddd98a4daf9ea4.json +1 -0
  81. xinference/ui/web/ui/node_modules/.cache/babel-loader/3c5758bd12fa334294b1de0ff6b1a4bac8d963c45472eab9dc3e530d82aa6b3f.json +1 -0
  82. xinference/ui/web/ui/node_modules/.cache/babel-loader/475936ebe725eca62a6f52ce182c06a19b2cef4df9545a05ed0591ee0c539d43.json +1 -0
  83. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +1 -0
  84. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +1 -0
  85. xinference/ui/web/ui/node_modules/.cache/babel-loader/aee5aaba26f2b1e816a3ea9efa68bad8b95695a3d80adcfd8dd57a7bb17ac71a.json +1 -0
  86. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +1 -0
  87. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +1 -0
  88. xinference/ui/web/ui/src/locales/en.json +2 -0
  89. xinference/ui/web/ui/src/locales/ja.json +2 -0
  90. xinference/ui/web/ui/src/locales/ko.json +2 -0
  91. xinference/ui/web/ui/src/locales/zh.json +2 -0
  92. {xinference-1.8.1rc1.dist-info → xinference-1.9.1.dist-info}/METADATA +15 -10
  93. {xinference-1.8.1rc1.dist-info → xinference-1.9.1.dist-info}/RECORD +98 -89
  94. xinference/ui/web/ui/build/static/js/main.b969199a.js +0 -3
  95. xinference/ui/web/ui/build/static/js/main.b969199a.js.map +0 -1
  96. xinference/ui/web/ui/node_modules/.cache/babel-loader/1409a96b9f9f9f5de99a89ab0f738f6da62b449521b0a8d3e4efcf7f5c23534d.json +0 -1
  97. xinference/ui/web/ui/node_modules/.cache/babel-loader/3d2a89f0eccc1f90fc5036c9a1d587c2120e6a6b128aae31d1db7d6bad52722b.json +0 -1
  98. xinference/ui/web/ui/node_modules/.cache/babel-loader/43b889c3a8e2634092ade463d52481c7c5581c72ded8f23bc5f012ea0ef8cea5.json +0 -1
  99. xinference/ui/web/ui/node_modules/.cache/babel-loader/5d47532fb42128280d87f57c8a0b02bc1930f7ef764aa7e90579247df18bba83.json +0 -1
  100. xinference/ui/web/ui/node_modules/.cache/babel-loader/830882bb275468a969614824a9ab8983f874b4581f2eb625e9c66426cdc65e5b.json +0 -1
  101. xinference/ui/web/ui/node_modules/.cache/babel-loader/8e5cb82c2ff3299c6a44563fe6b1c5515c9750613c51bb63abee0b1d70fc5019.json +0 -1
  102. xinference/ui/web/ui/node_modules/.cache/babel-loader/9df08abcb5a7c1e48a4eb25c5d5f5d7253ea6854a4397e6d74d1fd75a14acda1.json +0 -1
  103. xinference/ui/web/ui/node_modules/.cache/babel-loader/b99034986a06445701accc7a4914bb9320947435e8d4e15793392ca4f679316c.json +0 -1
  104. /xinference/ui/web/ui/build/static/js/{main.b969199a.js.LICENSE.txt → main.1086c759.js.LICENSE.txt} +0 -0
  105. {xinference-1.8.1rc1.dist-info → xinference-1.9.1.dist-info}/WHEEL +0 -0
  106. {xinference-1.8.1rc1.dist-info → xinference-1.9.1.dist-info}/entry_points.txt +0 -0
  107. {xinference-1.8.1rc1.dist-info → xinference-1.9.1.dist-info}/licenses/LICENSE +0 -0
  108. {xinference-1.8.1rc1.dist-info → xinference-1.9.1.dist-info}/top_level.txt +0 -0
@@ -1,215 +1,254 @@
1
1
  [
2
- {
3
- "version": 2,
4
- "model_name": "bge-reranker-large",
5
- "type": "normal",
6
- "language": [
7
- "en",
8
- "zh"
9
- ],
10
- "max_tokens": 512,
11
- "model_src": {
12
- "huggingface": {
13
- "model_id": "BAAI/bge-reranker-large",
14
- "model_revision": "27c9168d479987529781de8474dff94d69beca11"
15
- },
16
- "modelscope": {
17
- "model_id": "Xorbits/bge-reranker-large",
18
- "model_revision": "v0.0.1"
19
- }
2
+ {
3
+ "version": 2,
4
+ "model_name": "bge-reranker-large",
5
+ "type": "normal",
6
+ "language": ["en", "zh"],
7
+ "max_tokens": 512,
8
+ "model_specs": [
9
+ {
10
+ "model_format": "pytorch",
11
+ "model_src": {
12
+ "huggingface": {
13
+ "model_id": "BAAI/bge-reranker-large",
14
+ "model_revision": "27c9168d479987529781de8474dff94d69beca11",
15
+ "quantizations": ["none"]
16
+ },
17
+ "modelscope": {
18
+ "model_id": "Xorbits/bge-reranker-large",
19
+ "model_revision": "v0.0.1",
20
+ "quantizations": ["none"]
21
+ }
22
+ }
23
+ }
24
+ ]
25
+ },
26
+ {
27
+ "version": 2,
28
+ "model_name": "bge-reranker-base",
29
+ "type": "normal",
30
+ "language": ["en", "zh"],
31
+ "max_tokens": 512,
32
+ "model_specs": [
33
+ {
34
+ "model_format": "pytorch",
35
+ "model_src": {
36
+ "huggingface": {
37
+ "model_id": "BAAI/bge-reranker-base",
38
+ "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae",
39
+ "quantizations": ["none"]
40
+ },
41
+ "modelscope": {
42
+ "model_id": "Xorbits/bge-reranker-base",
43
+ "model_revision": "v0.0.1",
44
+ "quantizations": ["none"]
45
+ }
46
+ }
47
+ }
48
+ ]
49
+ },
50
+ {
51
+ "version": 2,
52
+ "model_name": "bce-reranker-base_v1",
53
+ "type": "normal",
54
+ "language": ["en", "zh"],
55
+ "max_tokens": 512,
56
+ "model_specs": [
57
+ {
58
+ "model_format": "pytorch",
59
+ "model_src": {
60
+ "huggingface": {
61
+ "model_id": "maidalun1020/bce-reranker-base_v1",
62
+ "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496",
63
+ "quantizations": ["none"]
64
+ },
65
+ "modelscope": {
66
+ "model_id": "maidalun/bce-reranker-base_v1",
67
+ "model_revision": "v0.0.1",
68
+ "quantizations": ["none"]
69
+ }
70
+ }
71
+ }
72
+ ]
73
+ },
74
+ {
75
+ "version": 2,
76
+ "model_name": "bge-reranker-v2-m3",
77
+ "type": "normal",
78
+ "language": ["en", "zh", "multilingual"],
79
+ "max_tokens": 8192,
80
+ "model_specs": [
81
+ {
82
+ "model_format": "pytorch",
83
+ "model_src": {
84
+ "huggingface": {
85
+ "model_id": "BAAI/bge-reranker-v2-m3",
86
+ "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4",
87
+ "quantizations": ["none"]
88
+ },
89
+ "modelscope": {
90
+ "model_id": "AI-ModelScope/bge-reranker-v2-m3",
91
+ "quantizations": ["none"]
92
+ }
93
+ }
94
+ }
95
+ ]
96
+ },
97
+ {
98
+ "version": 2,
99
+ "model_name": "bge-reranker-v2-gemma",
100
+ "type": "LLM-based",
101
+ "language": ["en", "zh", "multilingual"],
102
+ "max_tokens": 8192,
103
+ "model_specs": [
104
+ {
105
+ "model_format": "pytorch",
106
+ "model_src": {
107
+ "huggingface": {
108
+ "model_id": "BAAI/bge-reranker-v2-gemma",
109
+ "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17",
110
+ "quantizations": ["none"]
111
+ },
112
+ "modelscope": {
113
+ "model_id": "AI-ModelScope/bge-reranker-v2-gemma",
114
+ "quantizations": ["none"]
115
+ }
116
+ }
117
+ }
118
+ ]
119
+ },
120
+ {
121
+ "version": 2,
122
+ "model_name": "bge-reranker-v2-minicpm-layerwise",
123
+ "type": "LLM-based layerwise",
124
+ "language": ["en", "zh", "multilingual"],
125
+ "max_tokens": 2048,
126
+ "model_specs": [
127
+ {
128
+ "model_format": "pytorch",
129
+ "model_src": {
130
+ "huggingface": {
131
+ "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
132
+ "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881",
133
+ "quantizations": ["none"]
134
+ },
135
+ "modelscope": {
136
+ "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise",
137
+ "quantizations": ["none"]
138
+ }
139
+ }
140
+ }
141
+ ]
142
+ },
143
+ {
144
+ "version": 2,
145
+ "model_name": "jina-reranker-v2",
146
+ "type": "normal",
147
+ "language": ["en", "zh", "multilingual"],
148
+ "max_tokens": 1024,
149
+ "model_specs": [
150
+ {
151
+ "model_format": "pytorch",
152
+ "model_src": {
153
+ "huggingface": {
154
+ "model_id": "jinaai/jina-reranker-v2-base-multilingual",
155
+ "model_revision": "298e48cada4a9318650d7fbd795f63827f884087",
156
+ "quantizations": ["none"]
157
+ }
158
+ }
159
+ }
160
+ ]
161
+ },
162
+ {
163
+ "version": 2,
164
+ "model_name": "minicpm-reranker",
165
+ "type": "normal",
166
+ "language": ["en", "zh"],
167
+ "max_tokens": 1024,
168
+ "model_specs": [
169
+ {
170
+ "model_format": "pytorch",
171
+ "model_src": {
172
+ "huggingface": {
173
+ "model_id": "openbmb/MiniCPM-Reranker",
174
+ "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0",
175
+ "quantizations": ["none"]
176
+ },
177
+ "modelscope": {
178
+ "model_id": "OpenBMB/MiniCPM-Reranker",
179
+ "quantizations": ["none"]
180
+ }
181
+ }
182
+ }
183
+ ]
184
+ },
185
+ {
186
+ "version": 2,
187
+ "model_name": "Qwen3-Reranker-0.6B",
188
+ "type": "normal",
189
+ "language": ["en", "zh"],
190
+ "max_tokens": 32768,
191
+ "model_specs": [
192
+ {
193
+ "model_format": "pytorch",
194
+ "model_src": {
195
+ "huggingface": {
196
+ "model_id": "Qwen/Qwen3-Reranker-0.6B",
197
+ "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3",
198
+ "quantizations": ["none"]
199
+ },
200
+ "modelscope": {
201
+ "model_id": "Qwen/Qwen3-Reranker-0.6B",
202
+ "quantizations": ["none"]
203
+ }
204
+ }
205
+ }
206
+ ]
207
+ },
208
+ {
209
+ "version": 2,
210
+ "model_name": "Qwen3-Reranker-4B",
211
+ "type": "normal",
212
+ "language": ["en", "zh"],
213
+ "max_tokens": 32768,
214
+ "model_specs": [
215
+ {
216
+ "model_format": "pytorch",
217
+ "model_src": {
218
+ "huggingface": {
219
+ "model_id": "Qwen/Qwen3-Reranker-4B",
220
+ "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5",
221
+ "quantizations": ["none"]
222
+ },
223
+ "modelscope": {
224
+ "model_id": "Qwen/Qwen3-Reranker-4B",
225
+ "quantizations": ["none"]
226
+ }
227
+ }
228
+ }
229
+ ]
230
+ },
231
+ {
232
+ "version": 2,
233
+ "model_name": "Qwen3-Reranker-8B",
234
+ "type": "normal",
235
+ "language": ["en", "zh"],
236
+ "max_tokens": 32768,
237
+ "model_specs": [
238
+ {
239
+ "model_format": "pytorch",
240
+ "model_src": {
241
+ "huggingface": {
242
+ "model_id": "Qwen/Qwen3-Reranker-8B",
243
+ "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db",
244
+ "quantizations": ["none"]
245
+ },
246
+ "modelscope": {
247
+ "model_id": "Qwen/Qwen3-Reranker-8B",
248
+ "quantizations": ["none"]
249
+ }
250
+ }
251
+ }
252
+ ]
20
253
  }
21
- },
22
- {
23
- "version": 2,
24
- "model_name": "bge-reranker-base",
25
- "type": "normal",
26
- "language": [
27
- "en",
28
- "zh"
29
- ],
30
- "max_tokens": 512,
31
- "model_src": {
32
- "huggingface": {
33
- "model_id": "BAAI/bge-reranker-base",
34
- "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
35
- },
36
- "modelscope": {
37
- "model_id": "Xorbits/bge-reranker-base",
38
- "model_revision": "v0.0.1"
39
- }
40
- }
41
- },
42
- {
43
- "version": 2,
44
- "model_name": "bce-reranker-base_v1",
45
- "type": "normal",
46
- "language": [
47
- "en",
48
- "zh"
49
- ],
50
- "max_tokens": 512,
51
- "model_src": {
52
- "huggingface": {
53
- "model_id": "maidalun1020/bce-reranker-base_v1",
54
- "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
55
- },
56
- "modelscope": {
57
- "model_id": "maidalun/bce-reranker-base_v1",
58
- "model_revision": "v0.0.1"
59
- }
60
- }
61
- },
62
- {
63
- "version": 2,
64
- "model_name": "bge-reranker-v2-m3",
65
- "type": "normal",
66
- "language": [
67
- "en",
68
- "zh",
69
- "multilingual"
70
- ],
71
- "max_tokens": 8192,
72
- "model_src": {
73
- "huggingface": {
74
- "model_id": "BAAI/bge-reranker-v2-m3",
75
- "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4"
76
- },
77
- "modelscope": {
78
- "model_id": "AI-ModelScope/bge-reranker-v2-m3"
79
- }
80
- }
81
- },
82
- {
83
- "version": 2,
84
- "model_name": "bge-reranker-v2-gemma",
85
- "type": "LLM-based",
86
- "language": [
87
- "en",
88
- "zh",
89
- "multilingual"
90
- ],
91
- "max_tokens": 8192,
92
- "model_src": {
93
- "huggingface": {
94
- "model_id": "BAAI/bge-reranker-v2-gemma",
95
- "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17"
96
- },
97
- "modelscope": {
98
- "model_id": "AI-ModelScope/bge-reranker-v2-gemma"
99
- }
100
- }
101
- },
102
- {
103
- "version": 2,
104
- "model_name": "bge-reranker-v2-minicpm-layerwise",
105
- "type": "LLM-based layerwise",
106
- "language": [
107
- "en",
108
- "zh",
109
- "multilingual"
110
- ],
111
- "max_tokens": 2048,
112
- "model_src": {
113
- "huggingface": {
114
- "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
115
- "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881"
116
- },
117
- "modelscope": {
118
- "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise"
119
- }
120
- }
121
- },
122
- {
123
- "version": 2,
124
- "model_name": "jina-reranker-v2",
125
- "type": "normal",
126
- "language": [
127
- "en",
128
- "zh",
129
- "multilingual"
130
- ],
131
- "max_tokens": 1024,
132
- "model_src": {
133
- "huggingface": {
134
- "model_id": "jinaai/jina-reranker-v2-base-multilingual",
135
- "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
136
- }
137
- }
138
- },
139
- {
140
- "version": 2,
141
- "model_name": "minicpm-reranker",
142
- "type": "normal",
143
- "language": [
144
- "en",
145
- "zh"
146
- ],
147
- "max_tokens": 1024,
148
- "model_src": {
149
- "huggingface": {
150
- "model_id": "openbmb/MiniCPM-Reranker",
151
- "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
152
- },
153
- "modelscope": {
154
- "model_id": "OpenBMB/MiniCPM-Reranker"
155
- }
156
- }
157
- },
158
- {
159
- "version": 2,
160
- "model_name": "Qwen3-Reranker-0.6B",
161
- "type": "normal",
162
- "language": [
163
- "en",
164
- "zh"
165
- ],
166
- "max_tokens": 32768,
167
- "model_src": {
168
- "huggingface": {
169
- "model_id": "Qwen/Qwen3-Reranker-0.6B",
170
- "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3"
171
- },
172
- "modelscope": {
173
- "model_id": "Qwen/Qwen3-Reranker-0.6B"
174
- }
175
- }
176
- },
177
- {
178
- "version": 2,
179
- "model_name": "Qwen3-Reranker-4B",
180
- "type": "normal",
181
- "language": [
182
- "en",
183
- "zh"
184
- ],
185
- "max_tokens": 32768,
186
- "model_src": {
187
- "huggingface": {
188
- "model_id": "Qwen/Qwen3-Reranker-4B",
189
- "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5"
190
- },
191
- "modelscope": {
192
- "model_id": "Qwen/Qwen3-Reranker-4B"
193
- }
194
- }
195
- },
196
- {
197
- "version": 2,
198
- "model_name": "Qwen3-Reranker-8B",
199
- "type": "normal",
200
- "language": [
201
- "en",
202
- "zh"
203
- ],
204
- "max_tokens": 32768,
205
- "model_src": {
206
- "huggingface": {
207
- "model_id": "Qwen/Qwen3-Reranker-8B",
208
- "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db"
209
- },
210
- "modelscope": {
211
- "model_id": "Qwen/Qwen3-Reranker-8B"
212
- }
213
- }
214
- }
215
254
  ]
@@ -0,0 +1,137 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Type, Union
17
+
18
+ if TYPE_CHECKING:
19
+ from .core import RerankModel, RerankModelFamilyV2, RerankSpecV1
20
+
21
+ FLAG_RERANKER_CLASSES: List[Type["RerankModel"]] = []
22
+ SENTENCE_TRANSFORMER_CLASSES: List[Type["RerankModel"]] = []
23
+ VLLM_CLASSES: List[Type["RerankModel"]] = []
24
+
25
+ BUILTIN_RERANK_MODELS: Dict[str, "RerankModelFamilyV2"] = {}
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def match_rerank(
31
+ model_name: str,
32
+ model_format: Optional[str] = None,
33
+ quantization: Optional[str] = None,
34
+ download_hub: Optional[
35
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
36
+ ] = None,
37
+ ) -> "RerankModelFamilyV2":
38
+ from ..utils import download_from_modelscope
39
+ from .custom import get_user_defined_reranks
40
+
41
+ target_family = None
42
+
43
+ if model_name in BUILTIN_RERANK_MODELS:
44
+ target_family = BUILTIN_RERANK_MODELS[model_name]
45
+ else:
46
+ for model_family in get_user_defined_reranks():
47
+ if model_name == model_family.model_name:
48
+ target_family = model_family
49
+ break
50
+
51
+ if target_family is None:
52
+ raise ValueError(
53
+ f"Rerank model {model_name} not found, available models: {BUILTIN_RERANK_MODELS.keys()}"
54
+ )
55
+
56
+ if download_hub == "modelscope" or download_from_modelscope():
57
+ specs = [
58
+ x for x in target_family.model_specs if x.model_hub == "modelscope"
59
+ ] + [x for x in target_family.model_specs if x.model_hub == "huggingface"]
60
+ else:
61
+ specs = [x for x in target_family.model_specs if x.model_hub == "huggingface"]
62
+
63
+ def _match_quantization(q: Union[str, None], _quantization: str):
64
+ # Currently, the quantization name could include both uppercase and lowercase letters,
65
+ # so it is necessary to ensure that the case sensitivity does not
66
+ # affect the matching results.
67
+ if q is None:
68
+ return None
69
+ return _quantization if q.lower() == _quantization.lower() else None
70
+
71
+ def _apply_format_to_model_id(_spec: "RerankSpecV1", q: str) -> "RerankSpecV1":
72
+ # Different quantized versions of some models use different model ids,
73
+ # Here we check the `{}` in the model id to format the id.
74
+ if _spec.model_id and "{" in _spec.model_id:
75
+ _spec.model_id = _spec.model_id.format(quantization=q)
76
+ return _spec
77
+
78
+ for spec in specs:
79
+ matched_quantization = _match_quantization(quantization, spec.quantization)
80
+ if (
81
+ model_format
82
+ and model_format != spec.model_format
83
+ or quantization
84
+ and matched_quantization is None
85
+ ):
86
+ continue
87
+ # Copy spec to avoid _apply_format_to_model_id modify the original spec.
88
+ spec = spec.copy()
89
+ _family = target_family.copy()
90
+ if quantization:
91
+ _family.model_specs = [
92
+ _apply_format_to_model_id(spec, matched_quantization)
93
+ ]
94
+ return _family
95
+ else:
96
+ # TODO: If user does not specify quantization, just use the first one
97
+ _q = "none" if spec.model_format == "pytorch" else spec.quantization
98
+ _family.model_specs = [_apply_format_to_model_id(spec, _q)]
99
+ return _family
100
+
101
+ raise ValueError(
102
+ f"Rerank model {model_name} with format {model_format} and quantization {quantization} not found."
103
+ )
104
+
105
+
106
+ # { rerank model name -> { engine name -> engine params } }
107
+ RERANK_ENGINES: Dict[str, Dict[str, List[Dict[str, Type["RerankModel"]]]]] = {}
108
+ SUPPORTED_ENGINES: Dict[str, List[Type["RerankModel"]]] = {}
109
+
110
+
111
+ def check_engine_by_model_name_and_engine(
112
+ model_engine: str,
113
+ model_name: str,
114
+ model_format: Optional[str],
115
+ quantization: Optional[str],
116
+ ) -> Type["RerankModel"]:
117
+ def get_model_engine_from_spell(engine_str: str) -> str:
118
+ for engine in RERANK_ENGINES[model_name].keys():
119
+ if engine.lower() == engine_str.lower():
120
+ return engine
121
+ return engine_str
122
+
123
+ if model_name not in RERANK_ENGINES:
124
+ raise ValueError(f"Model {model_name} not found.")
125
+ model_engine = get_model_engine_from_spell(model_engine)
126
+ if model_engine not in RERANK_ENGINES[model_name]:
127
+ raise ValueError(f"Model {model_name} cannot be run on engine {model_engine}.")
128
+ match_params = RERANK_ENGINES[model_name][model_engine]
129
+ for param in match_params:
130
+ if model_name != param["model_name"]:
131
+ continue
132
+ if (model_format and model_format != param["model_format"]) or (
133
+ quantization and quantization != param["quantization"]
134
+ ):
135
+ continue
136
+ return param["rerank_class"]
137
+ raise ValueError(f"Model {model_name} cannot be run on engine {model_engine}.")
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.