xinference 1.8.1rc1__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (64) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +2 -1
  3. xinference/core/model.py +5 -0
  4. xinference/core/supervisor.py +2 -3
  5. xinference/core/worker.py +3 -4
  6. xinference/deploy/local.py +5 -0
  7. xinference/deploy/worker.py +6 -0
  8. xinference/model/core.py +3 -0
  9. xinference/model/embedding/sentence_transformers/core.py +3 -4
  10. xinference/model/embedding/vllm/core.py +4 -3
  11. xinference/model/image/model_spec.json +69 -0
  12. xinference/model/image/stable_diffusion/core.py +22 -0
  13. xinference/model/llm/cache_manager.py +17 -3
  14. xinference/model/llm/harmony.py +245 -0
  15. xinference/model/llm/llm_family.json +293 -8
  16. xinference/model/llm/llm_family.py +1 -1
  17. xinference/model/llm/sglang/core.py +108 -5
  18. xinference/model/llm/transformers/core.py +15 -7
  19. xinference/model/llm/transformers/gemma3.py +1 -1
  20. xinference/model/llm/transformers/gpt_oss.py +91 -0
  21. xinference/model/llm/transformers/multimodal/core.py +1 -1
  22. xinference/model/llm/transformers/multimodal/gemma3.py +1 -1
  23. xinference/model/llm/transformers/multimodal/glm4_1v.py +2 -2
  24. xinference/model/llm/transformers/multimodal/ovis2.py +1 -1
  25. xinference/model/llm/transformers/multimodal/qwen-omni.py +7 -8
  26. xinference/model/llm/transformers/multimodal/qwen2_vl.py +9 -6
  27. xinference/model/llm/transformers/utils.py +1 -33
  28. xinference/model/llm/utils.py +61 -7
  29. xinference/model/llm/vllm/core.py +38 -8
  30. xinference/model/rerank/__init__.py +66 -23
  31. xinference/model/rerank/cache_manager.py +35 -0
  32. xinference/model/rerank/core.py +84 -339
  33. xinference/model/rerank/custom.py +33 -8
  34. xinference/model/rerank/model_spec.json +251 -212
  35. xinference/model/rerank/rerank_family.py +137 -0
  36. xinference/model/rerank/sentence_transformers/__init__.py +13 -0
  37. xinference/model/rerank/sentence_transformers/core.py +337 -0
  38. xinference/model/rerank/vllm/__init__.py +13 -0
  39. xinference/model/rerank/vllm/core.py +106 -0
  40. xinference/model/utils.py +109 -0
  41. xinference/types.py +2 -0
  42. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  43. xinference/ui/web/ui/build/index.html +1 -1
  44. xinference/ui/web/ui/build/static/js/{main.b969199a.js → main.4918643a.js} +3 -3
  45. xinference/ui/web/ui/build/static/js/{main.b969199a.js.map → main.4918643a.js.map} +1 -1
  46. xinference/ui/web/ui/node_modules/.cache/babel-loader/28012da921a51f1082549956d3ae82acd769a754b22afda9acddd98a4daf9ea4.json +1 -0
  47. xinference/ui/web/ui/node_modules/.cache/babel-loader/475936ebe725eca62a6f52ce182c06a19b2cef4df9545a05ed0591ee0c539d43.json +1 -0
  48. xinference/ui/web/ui/node_modules/.cache/babel-loader/89179f8f51887b9167721860a12412549ff04f78162e921a7b6aa6532646deb2.json +1 -0
  49. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +1 -0
  50. xinference/ui/web/ui/node_modules/.cache/babel-loader/9dc5cfc67dd0617b0272aeef8651f1589b2155a4ff1fd72ad3166b217089b619.json +1 -0
  51. xinference/ui/web/ui/node_modules/.cache/babel-loader/aee5aaba26f2b1e816a3ea9efa68bad8b95695a3d80adcfd8dd57a7bb17ac71a.json +1 -0
  52. {xinference-1.8.1rc1.dist-info → xinference-1.9.0.dist-info}/METADATA +6 -1
  53. {xinference-1.8.1rc1.dist-info → xinference-1.9.0.dist-info}/RECORD +58 -50
  54. xinference/ui/web/ui/node_modules/.cache/babel-loader/1409a96b9f9f9f5de99a89ab0f738f6da62b449521b0a8d3e4efcf7f5c23534d.json +0 -1
  55. xinference/ui/web/ui/node_modules/.cache/babel-loader/43b889c3a8e2634092ade463d52481c7c5581c72ded8f23bc5f012ea0ef8cea5.json +0 -1
  56. xinference/ui/web/ui/node_modules/.cache/babel-loader/5d47532fb42128280d87f57c8a0b02bc1930f7ef764aa7e90579247df18bba83.json +0 -1
  57. xinference/ui/web/ui/node_modules/.cache/babel-loader/830882bb275468a969614824a9ab8983f874b4581f2eb625e9c66426cdc65e5b.json +0 -1
  58. xinference/ui/web/ui/node_modules/.cache/babel-loader/9df08abcb5a7c1e48a4eb25c5d5f5d7253ea6854a4397e6d74d1fd75a14acda1.json +0 -1
  59. xinference/ui/web/ui/node_modules/.cache/babel-loader/b99034986a06445701accc7a4914bb9320947435e8d4e15793392ca4f679316c.json +0 -1
  60. /xinference/ui/web/ui/build/static/js/{main.b969199a.js.LICENSE.txt → main.4918643a.js.LICENSE.txt} +0 -0
  61. {xinference-1.8.1rc1.dist-info → xinference-1.9.0.dist-info}/WHEEL +0 -0
  62. {xinference-1.8.1rc1.dist-info → xinference-1.9.0.dist-info}/entry_points.txt +0 -0
  63. {xinference-1.8.1rc1.dist-info → xinference-1.9.0.dist-info}/licenses/LICENSE +0 -0
  64. {xinference-1.8.1rc1.dist-info → xinference-1.9.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import logging
15
- from typing import List, Literal, Optional
15
+ from typing import List, Literal
16
16
 
17
17
  from ..custom import ModelRegistry
18
18
  from .core import RerankModelFamilyV2
@@ -22,10 +22,6 @@ logger = logging.getLogger(__name__)
22
22
 
23
23
  class CustomRerankModelFamilyV2(RerankModelFamilyV2):
24
24
  version: Literal[2] = 2
25
- model_id: Optional[str] # type: ignore
26
- model_revision: Optional[str] # type: ignore
27
- model_uri: Optional[str]
28
- model_type: Literal["rerank"] = "rerank" # for frontend
29
25
 
30
26
 
31
27
  UD_RERANKS: List[CustomRerankModelFamilyV2] = []
@@ -35,12 +31,41 @@ class RerankModelRegistry(ModelRegistry):
35
31
  model_type = "rerank"
36
32
 
37
33
  def __init__(self):
38
- from . import BUILTIN_RERANK_MODELS
34
+ from .rerank_family import BUILTIN_RERANK_MODELS
39
35
 
40
36
  super().__init__()
41
37
  self.models = UD_RERANKS
42
38
  self.builtin_models = list(BUILTIN_RERANK_MODELS.keys())
43
39
 
40
+ def add_ud_model(self, model_spec):
41
+ from . import generate_engine_config_by_model_name
42
+
43
+ UD_RERANKS.append(model_spec)
44
+ generate_engine_config_by_model_name(model_spec)
45
+
46
+ def check_model_uri(self, model_family: "RerankModelFamilyV2"):
47
+ from ..utils import is_valid_model_uri
48
+
49
+ for spec in model_family.model_specs:
50
+ model_uri = spec.model_uri
51
+ if model_uri and not is_valid_model_uri(model_uri):
52
+ raise ValueError(f"Invalid model URI {model_uri}.")
53
+
54
+ def remove_ud_model(self, model_family: "CustomRerankModelFamilyV2"):
55
+ from .rerank_family import RERANK_ENGINES
56
+
57
+ UD_RERANKS.remove(model_family)
58
+ del RERANK_ENGINES[model_family.model_name]
59
+
60
+ def remove_ud_model_files(self, model_family: "CustomRerankModelFamilyV2"):
61
+ from .cache_manager import RerankCacheManager
62
+
63
+ _model_family = model_family.copy()
64
+ for spec in model_family.model_specs:
65
+ _model_family.model_specs = [spec]
66
+ cache_manager = RerankCacheManager(_model_family)
67
+ cache_manager.unregister_custom_model(self.model_type)
68
+
44
69
 
45
70
  def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
46
71
  from ..custom import RegistryManager
@@ -49,11 +74,11 @@ def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
49
74
  return registry.get_custom_models()
50
75
 
51
76
 
52
- def register_rerank(model_spec: CustomRerankModelFamilyV2, persist: bool):
77
+ def register_rerank(model_family: CustomRerankModelFamilyV2, persist: bool):
53
78
  from ..custom import RegistryManager
54
79
 
55
80
  registry = RegistryManager.get_registry("rerank")
56
- registry.register(model_spec, persist)
81
+ registry.register(model_family, persist)
57
82
 
58
83
 
59
84
  def unregister_rerank(model_name: str, raise_error: bool = True):
@@ -1,215 +1,254 @@
1
1
  [
2
- {
3
- "version": 2,
4
- "model_name": "bge-reranker-large",
5
- "type": "normal",
6
- "language": [
7
- "en",
8
- "zh"
9
- ],
10
- "max_tokens": 512,
11
- "model_src": {
12
- "huggingface": {
13
- "model_id": "BAAI/bge-reranker-large",
14
- "model_revision": "27c9168d479987529781de8474dff94d69beca11"
15
- },
16
- "modelscope": {
17
- "model_id": "Xorbits/bge-reranker-large",
18
- "model_revision": "v0.0.1"
19
- }
2
+ {
3
+ "version": 2,
4
+ "model_name": "bge-reranker-large",
5
+ "type": "normal",
6
+ "language": ["en", "zh"],
7
+ "max_tokens": 512,
8
+ "model_specs": [
9
+ {
10
+ "model_format": "pytorch",
11
+ "model_src": {
12
+ "huggingface": {
13
+ "model_id": "BAAI/bge-reranker-large",
14
+ "model_revision": "27c9168d479987529781de8474dff94d69beca11",
15
+ "quantizations": ["none"]
16
+ },
17
+ "modelscope": {
18
+ "model_id": "Xorbits/bge-reranker-large",
19
+ "model_revision": "v0.0.1",
20
+ "quantizations": ["none"]
21
+ }
22
+ }
23
+ }
24
+ ]
25
+ },
26
+ {
27
+ "version": 2,
28
+ "model_name": "bge-reranker-base",
29
+ "type": "normal",
30
+ "language": ["en", "zh"],
31
+ "max_tokens": 512,
32
+ "model_specs": [
33
+ {
34
+ "model_format": "pytorch",
35
+ "model_src": {
36
+ "huggingface": {
37
+ "model_id": "BAAI/bge-reranker-base",
38
+ "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae",
39
+ "quantizations": ["none"]
40
+ },
41
+ "modelscope": {
42
+ "model_id": "Xorbits/bge-reranker-base",
43
+ "model_revision": "v0.0.1",
44
+ "quantizations": ["none"]
45
+ }
46
+ }
47
+ }
48
+ ]
49
+ },
50
+ {
51
+ "version": 2,
52
+ "model_name": "bce-reranker-base_v1",
53
+ "type": "normal",
54
+ "language": ["en", "zh"],
55
+ "max_tokens": 512,
56
+ "model_specs": [
57
+ {
58
+ "model_format": "pytorch",
59
+ "model_src": {
60
+ "huggingface": {
61
+ "model_id": "maidalun1020/bce-reranker-base_v1",
62
+ "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496",
63
+ "quantizations": ["none"]
64
+ },
65
+ "modelscope": {
66
+ "model_id": "maidalun/bce-reranker-base_v1",
67
+ "model_revision": "v0.0.1",
68
+ "quantizations": ["none"]
69
+ }
70
+ }
71
+ }
72
+ ]
73
+ },
74
+ {
75
+ "version": 2,
76
+ "model_name": "bge-reranker-v2-m3",
77
+ "type": "normal",
78
+ "language": ["en", "zh", "multilingual"],
79
+ "max_tokens": 8192,
80
+ "model_specs": [
81
+ {
82
+ "model_format": "pytorch",
83
+ "model_src": {
84
+ "huggingface": {
85
+ "model_id": "BAAI/bge-reranker-v2-m3",
86
+ "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4",
87
+ "quantizations": ["none"]
88
+ },
89
+ "modelscope": {
90
+ "model_id": "AI-ModelScope/bge-reranker-v2-m3",
91
+ "quantizations": ["none"]
92
+ }
93
+ }
94
+ }
95
+ ]
96
+ },
97
+ {
98
+ "version": 2,
99
+ "model_name": "bge-reranker-v2-gemma",
100
+ "type": "LLM-based",
101
+ "language": ["en", "zh", "multilingual"],
102
+ "max_tokens": 8192,
103
+ "model_specs": [
104
+ {
105
+ "model_format": "pytorch",
106
+ "model_src": {
107
+ "huggingface": {
108
+ "model_id": "BAAI/bge-reranker-v2-gemma",
109
+ "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17",
110
+ "quantizations": ["none"]
111
+ },
112
+ "modelscope": {
113
+ "model_id": "AI-ModelScope/bge-reranker-v2-gemma",
114
+ "quantizations": ["none"]
115
+ }
116
+ }
117
+ }
118
+ ]
119
+ },
120
+ {
121
+ "version": 2,
122
+ "model_name": "bge-reranker-v2-minicpm-layerwise",
123
+ "type": "LLM-based layerwise",
124
+ "language": ["en", "zh", "multilingual"],
125
+ "max_tokens": 2048,
126
+ "model_specs": [
127
+ {
128
+ "model_format": "pytorch",
129
+ "model_src": {
130
+ "huggingface": {
131
+ "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
132
+ "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881",
133
+ "quantizations": ["none"]
134
+ },
135
+ "modelscope": {
136
+ "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise",
137
+ "quantizations": ["none"]
138
+ }
139
+ }
140
+ }
141
+ ]
142
+ },
143
+ {
144
+ "version": 2,
145
+ "model_name": "jina-reranker-v2",
146
+ "type": "normal",
147
+ "language": ["en", "zh", "multilingual"],
148
+ "max_tokens": 1024,
149
+ "model_specs": [
150
+ {
151
+ "model_format": "pytorch",
152
+ "model_src": {
153
+ "huggingface": {
154
+ "model_id": "jinaai/jina-reranker-v2-base-multilingual",
155
+ "model_revision": "298e48cada4a9318650d7fbd795f63827f884087",
156
+ "quantizations": ["none"]
157
+ }
158
+ }
159
+ }
160
+ ]
161
+ },
162
+ {
163
+ "version": 2,
164
+ "model_name": "minicpm-reranker",
165
+ "type": "normal",
166
+ "language": ["en", "zh"],
167
+ "max_tokens": 1024,
168
+ "model_specs": [
169
+ {
170
+ "model_format": "pytorch",
171
+ "model_src": {
172
+ "huggingface": {
173
+ "model_id": "openbmb/MiniCPM-Reranker",
174
+ "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0",
175
+ "quantizations": ["none"]
176
+ },
177
+ "modelscope": {
178
+ "model_id": "OpenBMB/MiniCPM-Reranker",
179
+ "quantizations": ["none"]
180
+ }
181
+ }
182
+ }
183
+ ]
184
+ },
185
+ {
186
+ "version": 2,
187
+ "model_name": "Qwen3-Reranker-0.6B",
188
+ "type": "normal",
189
+ "language": ["en", "zh"],
190
+ "max_tokens": 32768,
191
+ "model_specs": [
192
+ {
193
+ "model_format": "pytorch",
194
+ "model_src": {
195
+ "huggingface": {
196
+ "model_id": "Qwen/Qwen3-Reranker-0.6B",
197
+ "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3",
198
+ "quantizations": ["none"]
199
+ },
200
+ "modelscope": {
201
+ "model_id": "Qwen/Qwen3-Reranker-0.6B",
202
+ "quantizations": ["none"]
203
+ }
204
+ }
205
+ }
206
+ ]
207
+ },
208
+ {
209
+ "version": 2,
210
+ "model_name": "Qwen3-Reranker-4B",
211
+ "type": "normal",
212
+ "language": ["en", "zh"],
213
+ "max_tokens": 32768,
214
+ "model_specs": [
215
+ {
216
+ "model_format": "pytorch",
217
+ "model_src": {
218
+ "huggingface": {
219
+ "model_id": "Qwen/Qwen3-Reranker-4B",
220
+ "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5",
221
+ "quantizations": ["none"]
222
+ },
223
+ "modelscope": {
224
+ "model_id": "Qwen/Qwen3-Reranker-4B",
225
+ "quantizations": ["none"]
226
+ }
227
+ }
228
+ }
229
+ ]
230
+ },
231
+ {
232
+ "version": 2,
233
+ "model_name": "Qwen3-Reranker-8B",
234
+ "type": "normal",
235
+ "language": ["en", "zh"],
236
+ "max_tokens": 32768,
237
+ "model_specs": [
238
+ {
239
+ "model_format": "pytorch",
240
+ "model_src": {
241
+ "huggingface": {
242
+ "model_id": "Qwen/Qwen3-Reranker-8B",
243
+ "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db",
244
+ "quantizations": ["none"]
245
+ },
246
+ "modelscope": {
247
+ "model_id": "Qwen/Qwen3-Reranker-8B",
248
+ "quantizations": ["none"]
249
+ }
250
+ }
251
+ }
252
+ ]
20
253
  }
21
- },
22
- {
23
- "version": 2,
24
- "model_name": "bge-reranker-base",
25
- "type": "normal",
26
- "language": [
27
- "en",
28
- "zh"
29
- ],
30
- "max_tokens": 512,
31
- "model_src": {
32
- "huggingface": {
33
- "model_id": "BAAI/bge-reranker-base",
34
- "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
35
- },
36
- "modelscope": {
37
- "model_id": "Xorbits/bge-reranker-base",
38
- "model_revision": "v0.0.1"
39
- }
40
- }
41
- },
42
- {
43
- "version": 2,
44
- "model_name": "bce-reranker-base_v1",
45
- "type": "normal",
46
- "language": [
47
- "en",
48
- "zh"
49
- ],
50
- "max_tokens": 512,
51
- "model_src": {
52
- "huggingface": {
53
- "model_id": "maidalun1020/bce-reranker-base_v1",
54
- "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
55
- },
56
- "modelscope": {
57
- "model_id": "maidalun/bce-reranker-base_v1",
58
- "model_revision": "v0.0.1"
59
- }
60
- }
61
- },
62
- {
63
- "version": 2,
64
- "model_name": "bge-reranker-v2-m3",
65
- "type": "normal",
66
- "language": [
67
- "en",
68
- "zh",
69
- "multilingual"
70
- ],
71
- "max_tokens": 8192,
72
- "model_src": {
73
- "huggingface": {
74
- "model_id": "BAAI/bge-reranker-v2-m3",
75
- "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4"
76
- },
77
- "modelscope": {
78
- "model_id": "AI-ModelScope/bge-reranker-v2-m3"
79
- }
80
- }
81
- },
82
- {
83
- "version": 2,
84
- "model_name": "bge-reranker-v2-gemma",
85
- "type": "LLM-based",
86
- "language": [
87
- "en",
88
- "zh",
89
- "multilingual"
90
- ],
91
- "max_tokens": 8192,
92
- "model_src": {
93
- "huggingface": {
94
- "model_id": "BAAI/bge-reranker-v2-gemma",
95
- "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17"
96
- },
97
- "modelscope": {
98
- "model_id": "AI-ModelScope/bge-reranker-v2-gemma"
99
- }
100
- }
101
- },
102
- {
103
- "version": 2,
104
- "model_name": "bge-reranker-v2-minicpm-layerwise",
105
- "type": "LLM-based layerwise",
106
- "language": [
107
- "en",
108
- "zh",
109
- "multilingual"
110
- ],
111
- "max_tokens": 2048,
112
- "model_src": {
113
- "huggingface": {
114
- "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
115
- "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881"
116
- },
117
- "modelscope": {
118
- "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise"
119
- }
120
- }
121
- },
122
- {
123
- "version": 2,
124
- "model_name": "jina-reranker-v2",
125
- "type": "normal",
126
- "language": [
127
- "en",
128
- "zh",
129
- "multilingual"
130
- ],
131
- "max_tokens": 1024,
132
- "model_src": {
133
- "huggingface": {
134
- "model_id": "jinaai/jina-reranker-v2-base-multilingual",
135
- "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
136
- }
137
- }
138
- },
139
- {
140
- "version": 2,
141
- "model_name": "minicpm-reranker",
142
- "type": "normal",
143
- "language": [
144
- "en",
145
- "zh"
146
- ],
147
- "max_tokens": 1024,
148
- "model_src": {
149
- "huggingface": {
150
- "model_id": "openbmb/MiniCPM-Reranker",
151
- "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
152
- },
153
- "modelscope": {
154
- "model_id": "OpenBMB/MiniCPM-Reranker"
155
- }
156
- }
157
- },
158
- {
159
- "version": 2,
160
- "model_name": "Qwen3-Reranker-0.6B",
161
- "type": "normal",
162
- "language": [
163
- "en",
164
- "zh"
165
- ],
166
- "max_tokens": 32768,
167
- "model_src": {
168
- "huggingface": {
169
- "model_id": "Qwen/Qwen3-Reranker-0.6B",
170
- "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3"
171
- },
172
- "modelscope": {
173
- "model_id": "Qwen/Qwen3-Reranker-0.6B"
174
- }
175
- }
176
- },
177
- {
178
- "version": 2,
179
- "model_name": "Qwen3-Reranker-4B",
180
- "type": "normal",
181
- "language": [
182
- "en",
183
- "zh"
184
- ],
185
- "max_tokens": 32768,
186
- "model_src": {
187
- "huggingface": {
188
- "model_id": "Qwen/Qwen3-Reranker-4B",
189
- "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5"
190
- },
191
- "modelscope": {
192
- "model_id": "Qwen/Qwen3-Reranker-4B"
193
- }
194
- }
195
- },
196
- {
197
- "version": 2,
198
- "model_name": "Qwen3-Reranker-8B",
199
- "type": "normal",
200
- "language": [
201
- "en",
202
- "zh"
203
- ],
204
- "max_tokens": 32768,
205
- "model_src": {
206
- "huggingface": {
207
- "model_id": "Qwen/Qwen3-Reranker-8B",
208
- "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db"
209
- },
210
- "modelscope": {
211
- "model_id": "Qwen/Qwen3-Reranker-8B"
212
- }
213
- }
214
- }
215
254
  ]
@@ -0,0 +1,137 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Type, Union
17
+
18
+ if TYPE_CHECKING:
19
+ from .core import RerankModel, RerankModelFamilyV2, RerankSpecV1
20
+
21
+ FLAG_RERANKER_CLASSES: List[Type["RerankModel"]] = []
22
+ SENTENCE_TRANSFORMER_CLASSES: List[Type["RerankModel"]] = []
23
+ VLLM_CLASSES: List[Type["RerankModel"]] = []
24
+
25
+ BUILTIN_RERANK_MODELS: Dict[str, "RerankModelFamilyV2"] = {}
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def match_rerank(
31
+ model_name: str,
32
+ model_format: Optional[str] = None,
33
+ quantization: Optional[str] = None,
34
+ download_hub: Optional[
35
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
36
+ ] = None,
37
+ ) -> "RerankModelFamilyV2":
38
+ from ..utils import download_from_modelscope
39
+ from .custom import get_user_defined_reranks
40
+
41
+ target_family = None
42
+
43
+ if model_name in BUILTIN_RERANK_MODELS:
44
+ target_family = BUILTIN_RERANK_MODELS[model_name]
45
+ else:
46
+ for model_family in get_user_defined_reranks():
47
+ if model_name == model_family.model_name:
48
+ target_family = model_family
49
+ break
50
+
51
+ if target_family is None:
52
+ raise ValueError(
53
+ f"Rerank model {model_name} not found, available models: {BUILTIN_RERANK_MODELS.keys()}"
54
+ )
55
+
56
+ if download_hub == "modelscope" or download_from_modelscope():
57
+ specs = [
58
+ x for x in target_family.model_specs if x.model_hub == "modelscope"
59
+ ] + [x for x in target_family.model_specs if x.model_hub == "huggingface"]
60
+ else:
61
+ specs = [x for x in target_family.model_specs if x.model_hub == "huggingface"]
62
+
63
+ def _match_quantization(q: Union[str, None], _quantization: str):
64
+ # Currently, the quantization name could include both uppercase and lowercase letters,
65
+ # so it is necessary to ensure that the case sensitivity does not
66
+ # affect the matching results.
67
+ if q is None:
68
+ return None
69
+ return _quantization if q.lower() == _quantization.lower() else None
70
+
71
+ def _apply_format_to_model_id(_spec: "RerankSpecV1", q: str) -> "RerankSpecV1":
72
+ # Different quantized versions of some models use different model ids,
73
+ # Here we check the `{}` in the model id to format the id.
74
+ if _spec.model_id and "{" in _spec.model_id:
75
+ _spec.model_id = _spec.model_id.format(quantization=q)
76
+ return _spec
77
+
78
+ for spec in specs:
79
+ matched_quantization = _match_quantization(quantization, spec.quantization)
80
+ if (
81
+ model_format
82
+ and model_format != spec.model_format
83
+ or quantization
84
+ and matched_quantization is None
85
+ ):
86
+ continue
87
+ # Copy spec to avoid _apply_format_to_model_id modify the original spec.
88
+ spec = spec.copy()
89
+ _family = target_family.copy()
90
+ if quantization:
91
+ _family.model_specs = [
92
+ _apply_format_to_model_id(spec, matched_quantization)
93
+ ]
94
+ return _family
95
+ else:
96
+ # TODO: If user does not specify quantization, just use the first one
97
+ _q = "none" if spec.model_format == "pytorch" else spec.quantization
98
+ _family.model_specs = [_apply_format_to_model_id(spec, _q)]
99
+ return _family
100
+
101
+ raise ValueError(
102
+ f"Rerank model {model_name} with format {model_format} and quantization {quantization} not found."
103
+ )
104
+
105
+
106
+ # { rerank model name -> { engine name -> engine params } }
107
+ RERANK_ENGINES: Dict[str, Dict[str, List[Dict[str, Type["RerankModel"]]]]] = {}
108
+ SUPPORTED_ENGINES: Dict[str, List[Type["RerankModel"]]] = {}
109
+
110
+
111
+ def check_engine_by_model_name_and_engine(
112
+ model_engine: str,
113
+ model_name: str,
114
+ model_format: Optional[str],
115
+ quantization: Optional[str],
116
+ ) -> Type["RerankModel"]:
117
+ def get_model_engine_from_spell(engine_str: str) -> str:
118
+ for engine in RERANK_ENGINES[model_name].keys():
119
+ if engine.lower() == engine_str.lower():
120
+ return engine
121
+ return engine_str
122
+
123
+ if model_name not in RERANK_ENGINES:
124
+ raise ValueError(f"Model {model_name} not found.")
125
+ model_engine = get_model_engine_from_spell(model_engine)
126
+ if model_engine not in RERANK_ENGINES[model_name]:
127
+ raise ValueError(f"Model {model_name} cannot be run on engine {model_engine}.")
128
+ match_params = RERANK_ENGINES[model_name][model_engine]
129
+ for param in match_params:
130
+ if model_name != param["model_name"]:
131
+ continue
132
+ if (model_format and model_format != param["model_format"]) or (
133
+ quantization and quantization != param["quantization"]
134
+ ):
135
+ continue
136
+ return param["rerank_class"]
137
+ raise ValueError(f"Model {model_name} cannot be run on engine {model_engine}.")
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.