xinference 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (136) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/async_restful_client.py +8 -13
  3. xinference/client/restful/restful_client.py +6 -2
  4. xinference/core/chat_interface.py +6 -4
  5. xinference/core/media_interface.py +5 -0
  6. xinference/core/model.py +1 -5
  7. xinference/core/supervisor.py +117 -68
  8. xinference/core/worker.py +49 -37
  9. xinference/deploy/test/test_cmdline.py +2 -6
  10. xinference/model/audio/__init__.py +26 -23
  11. xinference/model/audio/chattts.py +3 -2
  12. xinference/model/audio/core.py +49 -98
  13. xinference/model/audio/cosyvoice.py +3 -2
  14. xinference/model/audio/custom.py +28 -73
  15. xinference/model/audio/f5tts.py +3 -2
  16. xinference/model/audio/f5tts_mlx.py +3 -2
  17. xinference/model/audio/fish_speech.py +3 -2
  18. xinference/model/audio/funasr.py +17 -4
  19. xinference/model/audio/kokoro.py +3 -2
  20. xinference/model/audio/megatts.py +3 -2
  21. xinference/model/audio/melotts.py +3 -2
  22. xinference/model/audio/model_spec.json +572 -171
  23. xinference/model/audio/utils.py +0 -6
  24. xinference/model/audio/whisper.py +3 -2
  25. xinference/model/audio/whisper_mlx.py +3 -2
  26. xinference/model/cache_manager.py +141 -0
  27. xinference/model/core.py +6 -49
  28. xinference/model/custom.py +174 -0
  29. xinference/model/embedding/__init__.py +67 -56
  30. xinference/model/embedding/cache_manager.py +35 -0
  31. xinference/model/embedding/core.py +104 -84
  32. xinference/model/embedding/custom.py +55 -78
  33. xinference/model/embedding/embed_family.py +80 -31
  34. xinference/model/embedding/flag/core.py +21 -5
  35. xinference/model/embedding/llama_cpp/__init__.py +0 -0
  36. xinference/model/embedding/llama_cpp/core.py +234 -0
  37. xinference/model/embedding/model_spec.json +968 -103
  38. xinference/model/embedding/sentence_transformers/core.py +30 -20
  39. xinference/model/embedding/vllm/core.py +11 -5
  40. xinference/model/flexible/__init__.py +8 -2
  41. xinference/model/flexible/core.py +26 -119
  42. xinference/model/flexible/custom.py +69 -0
  43. xinference/model/flexible/launchers/image_process_launcher.py +1 -0
  44. xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
  45. xinference/model/flexible/launchers/transformers_launcher.py +15 -3
  46. xinference/model/flexible/launchers/yolo_launcher.py +5 -1
  47. xinference/model/image/__init__.py +20 -20
  48. xinference/model/image/cache_manager.py +62 -0
  49. xinference/model/image/core.py +70 -182
  50. xinference/model/image/custom.py +28 -72
  51. xinference/model/image/model_spec.json +402 -119
  52. xinference/model/image/ocr/got_ocr2.py +3 -2
  53. xinference/model/image/stable_diffusion/core.py +22 -7
  54. xinference/model/image/stable_diffusion/mlx.py +6 -6
  55. xinference/model/image/utils.py +2 -2
  56. xinference/model/llm/__init__.py +71 -94
  57. xinference/model/llm/cache_manager.py +292 -0
  58. xinference/model/llm/core.py +37 -111
  59. xinference/model/llm/custom.py +88 -0
  60. xinference/model/llm/llama_cpp/core.py +5 -7
  61. xinference/model/llm/llm_family.json +16260 -8151
  62. xinference/model/llm/llm_family.py +138 -839
  63. xinference/model/llm/lmdeploy/core.py +5 -7
  64. xinference/model/llm/memory.py +3 -4
  65. xinference/model/llm/mlx/core.py +6 -8
  66. xinference/model/llm/reasoning_parser.py +3 -1
  67. xinference/model/llm/sglang/core.py +32 -14
  68. xinference/model/llm/transformers/chatglm.py +3 -7
  69. xinference/model/llm/transformers/core.py +49 -27
  70. xinference/model/llm/transformers/deepseek_v2.py +2 -2
  71. xinference/model/llm/transformers/gemma3.py +2 -2
  72. xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
  73. xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
  74. xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
  75. xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
  76. xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
  77. xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
  78. xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
  79. xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
  80. xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
  81. xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
  82. xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
  83. xinference/model/llm/transformers/opt.py +3 -7
  84. xinference/model/llm/utils.py +34 -49
  85. xinference/model/llm/vllm/core.py +77 -27
  86. xinference/model/llm/vllm/xavier/engine.py +5 -3
  87. xinference/model/llm/vllm/xavier/scheduler.py +10 -6
  88. xinference/model/llm/vllm/xavier/transfer.py +1 -1
  89. xinference/model/rerank/__init__.py +26 -25
  90. xinference/model/rerank/core.py +47 -87
  91. xinference/model/rerank/custom.py +25 -71
  92. xinference/model/rerank/model_spec.json +158 -33
  93. xinference/model/rerank/utils.py +2 -2
  94. xinference/model/utils.py +115 -54
  95. xinference/model/video/__init__.py +13 -17
  96. xinference/model/video/core.py +44 -102
  97. xinference/model/video/diffusers.py +4 -3
  98. xinference/model/video/model_spec.json +90 -21
  99. xinference/types.py +5 -3
  100. xinference/web/ui/build/asset-manifest.json +3 -3
  101. xinference/web/ui/build/index.html +1 -1
  102. xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
  103. xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
  109. xinference/web/ui/src/locales/en.json +0 -1
  110. xinference/web/ui/src/locales/ja.json +0 -1
  111. xinference/web/ui/src/locales/ko.json +0 -1
  112. xinference/web/ui/src/locales/zh.json +0 -1
  113. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
  114. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
  115. xinference/model/audio/model_spec_modelscope.json +0 -231
  116. xinference/model/embedding/model_spec_modelscope.json +0 -293
  117. xinference/model/embedding/utils.py +0 -18
  118. xinference/model/image/model_spec_modelscope.json +0 -375
  119. xinference/model/llm/llama_cpp/memory.py +0 -457
  120. xinference/model/llm/llm_family_csghub.json +0 -56
  121. xinference/model/llm/llm_family_modelscope.json +0 -8700
  122. xinference/model/llm/llm_family_openmind_hub.json +0 -1019
  123. xinference/model/rerank/model_spec_modelscope.json +0 -85
  124. xinference/model/video/model_spec_modelscope.json +0 -184
  125. xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
  126. xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
  132. /xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
  133. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
  134. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
  135. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
  136. {xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0
@@ -12,98 +12,52 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import logging
15
- import os
16
- from threading import Lock
17
15
  from typing import List, Literal, Optional
18
16
 
19
- from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
20
- from .core import RerankModelSpec
17
+ from ..custom import ModelRegistry
18
+ from .core import RerankModelFamilyV2
21
19
 
22
20
  logger = logging.getLogger(__name__)
23
21
 
24
22
 
25
- UD_RERANK_LOCK = Lock()
26
-
27
-
28
- class CustomRerankModelSpec(RerankModelSpec):
23
+ class CustomRerankModelFamilyV2(RerankModelFamilyV2):
24
+ version: Literal[2] = 2
29
25
  model_id: Optional[str] # type: ignore
30
26
  model_revision: Optional[str] # type: ignore
31
27
  model_uri: Optional[str]
32
28
  model_type: Literal["rerank"] = "rerank" # for frontend
33
29
 
34
30
 
35
- UD_RERANKS: List[CustomRerankModelSpec] = []
31
+ UD_RERANKS: List[CustomRerankModelFamilyV2] = []
36
32
 
37
33
 
38
- def get_user_defined_reranks() -> List[CustomRerankModelSpec]:
39
- with UD_RERANK_LOCK:
40
- return UD_RERANKS.copy()
34
+ class RerankModelRegistry(ModelRegistry):
35
+ model_type = "rerank"
41
36
 
37
+ def __init__(self):
38
+ from . import BUILTIN_RERANK_MODELS
42
39
 
43
- def register_rerank(model_spec: CustomRerankModelSpec, persist: bool):
44
- from ...constants import XINFERENCE_MODEL_DIR
45
- from ..utils import is_valid_model_name, is_valid_model_uri
46
- from . import BUILTIN_RERANK_MODELS, MODELSCOPE_RERANK_MODELS
40
+ super().__init__()
41
+ self.models = UD_RERANKS
42
+ self.builtin_models = list(BUILTIN_RERANK_MODELS.keys())
47
43
 
48
- if not is_valid_model_name(model_spec.model_name):
49
- raise ValueError(f"Invalid model name {model_spec.model_name}.")
50
44
 
51
- model_uri = model_spec.model_uri
52
- if model_uri and not is_valid_model_uri(model_uri):
53
- raise ValueError(f"Invalid model URI {model_uri}.")
45
+ def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
46
+ from ..custom import RegistryManager
54
47
 
55
- with UD_RERANK_LOCK:
56
- for model_name in (
57
- list(BUILTIN_RERANK_MODELS.keys())
58
- + list(MODELSCOPE_RERANK_MODELS.keys())
59
- + [spec.model_name for spec in UD_RERANKS]
60
- ):
61
- if model_spec.model_name == model_name:
62
- raise ValueError(
63
- f"Model name conflicts with existing model {model_spec.model_name}"
64
- )
48
+ registry = RegistryManager.get_registry("rerank")
49
+ return registry.get_custom_models()
65
50
 
66
- UD_RERANKS.append(model_spec)
67
51
 
68
- if persist:
69
- persist_path = os.path.join(
70
- XINFERENCE_MODEL_DIR, "rerank", f"{model_spec.model_name}.json"
71
- )
72
- os.makedirs(os.path.dirname(persist_path), exist_ok=True)
73
- with open(persist_path, mode="w") as fd:
74
- fd.write(model_spec.json())
52
+ def register_rerank(model_spec: CustomRerankModelFamilyV2, persist: bool):
53
+ from ..custom import RegistryManager
54
+
55
+ registry = RegistryManager.get_registry("rerank")
56
+ registry.register(model_spec, persist)
75
57
 
76
58
 
77
59
  def unregister_rerank(model_name: str, raise_error: bool = True):
78
- with UD_RERANK_LOCK:
79
- model_spec = None
80
- for i, f in enumerate(UD_RERANKS):
81
- if f.model_name == model_name:
82
- model_spec = f
83
- break
84
- if model_spec:
85
- UD_RERANKS.remove(model_spec)
86
-
87
- persist_path = os.path.join(
88
- XINFERENCE_MODEL_DIR, "rerank", f"{model_spec.model_name}.json"
89
- )
90
- if os.path.exists(persist_path):
91
- os.remove(persist_path)
92
-
93
- cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
94
- if os.path.exists(cache_dir):
95
- logger.warning(
96
- f"Remove the cache of user-defined model {model_spec.model_name}. "
97
- f"Cache directory: {cache_dir}"
98
- )
99
- if os.path.islink(cache_dir):
100
- os.remove(cache_dir)
101
- else:
102
- logger.warning(
103
- f"Cache directory is not a soft link, please remove it manually."
104
- )
105
- else:
106
- if raise_error:
107
- raise ValueError(f"Model {model_name} not found")
108
- else:
109
- logger.warning(f"Custom rerank model {model_name} not found")
60
+ from ..custom import RegistryManager
61
+
62
+ registry = RegistryManager.get_registry("rerank")
63
+ registry.unregister(model_name, raise_error)
@@ -1,90 +1,215 @@
1
1
  [
2
2
  {
3
+ "version": 2,
3
4
  "model_name": "bge-reranker-large",
4
5
  "type": "normal",
5
- "language": ["en", "zh"],
6
+ "language": [
7
+ "en",
8
+ "zh"
9
+ ],
6
10
  "max_tokens": 512,
7
- "model_id": "BAAI/bge-reranker-large",
8
- "model_revision": "27c9168d479987529781de8474dff94d69beca11"
11
+ "model_src": {
12
+ "huggingface": {
13
+ "model_id": "BAAI/bge-reranker-large",
14
+ "model_revision": "27c9168d479987529781de8474dff94d69beca11"
15
+ },
16
+ "modelscope": {
17
+ "model_id": "Xorbits/bge-reranker-large",
18
+ "model_revision": "v0.0.1"
19
+ }
20
+ }
9
21
  },
10
22
  {
23
+ "version": 2,
11
24
  "model_name": "bge-reranker-base",
12
25
  "type": "normal",
13
- "language": ["en", "zh"],
26
+ "language": [
27
+ "en",
28
+ "zh"
29
+ ],
14
30
  "max_tokens": 512,
15
- "model_id": "BAAI/bge-reranker-base",
16
- "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
31
+ "model_src": {
32
+ "huggingface": {
33
+ "model_id": "BAAI/bge-reranker-base",
34
+ "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
35
+ },
36
+ "modelscope": {
37
+ "model_id": "Xorbits/bge-reranker-base",
38
+ "model_revision": "v0.0.1"
39
+ }
40
+ }
17
41
  },
18
42
  {
43
+ "version": 2,
19
44
  "model_name": "bce-reranker-base_v1",
20
45
  "type": "normal",
21
- "language": ["en", "zh"],
46
+ "language": [
47
+ "en",
48
+ "zh"
49
+ ],
22
50
  "max_tokens": 512,
23
- "model_id": "maidalun1020/bce-reranker-base_v1",
24
- "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
51
+ "model_src": {
52
+ "huggingface": {
53
+ "model_id": "maidalun1020/bce-reranker-base_v1",
54
+ "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
55
+ },
56
+ "modelscope": {
57
+ "model_id": "maidalun/bce-reranker-base_v1",
58
+ "model_revision": "v0.0.1"
59
+ }
60
+ }
25
61
  },
26
62
  {
63
+ "version": 2,
27
64
  "model_name": "bge-reranker-v2-m3",
28
65
  "type": "normal",
29
- "language": ["en", "zh", "multilingual"],
66
+ "language": [
67
+ "en",
68
+ "zh",
69
+ "multilingual"
70
+ ],
30
71
  "max_tokens": 8192,
31
- "model_id": "BAAI/bge-reranker-v2-m3",
32
- "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4"
72
+ "model_src": {
73
+ "huggingface": {
74
+ "model_id": "BAAI/bge-reranker-v2-m3",
75
+ "model_revision": "12e974610ba9083ed95f3edf08d7e899581f4de4"
76
+ },
77
+ "modelscope": {
78
+ "model_id": "AI-ModelScope/bge-reranker-v2-m3"
79
+ }
80
+ }
33
81
  },
34
82
  {
83
+ "version": 2,
35
84
  "model_name": "bge-reranker-v2-gemma",
36
85
  "type": "LLM-based",
37
- "language": ["en", "zh", "multilingual"],
86
+ "language": [
87
+ "en",
88
+ "zh",
89
+ "multilingual"
90
+ ],
38
91
  "max_tokens": 8192,
39
- "model_id": "BAAI/bge-reranker-v2-gemma",
40
- "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17"
92
+ "model_src": {
93
+ "huggingface": {
94
+ "model_id": "BAAI/bge-reranker-v2-gemma",
95
+ "model_revision": "1787044f8b6fb740a9de4557c3a12377f84d9e17"
96
+ },
97
+ "modelscope": {
98
+ "model_id": "AI-ModelScope/bge-reranker-v2-gemma"
99
+ }
100
+ }
41
101
  },
42
102
  {
103
+ "version": 2,
43
104
  "model_name": "bge-reranker-v2-minicpm-layerwise",
44
105
  "type": "LLM-based layerwise",
45
- "language": ["en", "zh", "multilingual"],
106
+ "language": [
107
+ "en",
108
+ "zh",
109
+ "multilingual"
110
+ ],
46
111
  "max_tokens": 2048,
47
- "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
48
- "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881"
112
+ "model_src": {
113
+ "huggingface": {
114
+ "model_id": "BAAI/bge-reranker-v2-minicpm-layerwise",
115
+ "model_revision": "47b5332b296c4d8cb6ee2c60502cc62a0d708881"
116
+ },
117
+ "modelscope": {
118
+ "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise"
119
+ }
120
+ }
49
121
  },
50
122
  {
123
+ "version": 2,
51
124
  "model_name": "jina-reranker-v2",
52
125
  "type": "normal",
53
- "language": ["en", "zh", "multilingual"],
126
+ "language": [
127
+ "en",
128
+ "zh",
129
+ "multilingual"
130
+ ],
54
131
  "max_tokens": 1024,
55
- "model_id": "jinaai/jina-reranker-v2-base-multilingual",
56
- "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
132
+ "model_src": {
133
+ "huggingface": {
134
+ "model_id": "jinaai/jina-reranker-v2-base-multilingual",
135
+ "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
136
+ }
137
+ }
57
138
  },
58
139
  {
140
+ "version": 2,
59
141
  "model_name": "minicpm-reranker",
60
142
  "type": "normal",
61
- "language": ["en", "zh"],
143
+ "language": [
144
+ "en",
145
+ "zh"
146
+ ],
62
147
  "max_tokens": 1024,
63
- "model_id": "openbmb/MiniCPM-Reranker",
64
- "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
148
+ "model_src": {
149
+ "huggingface": {
150
+ "model_id": "openbmb/MiniCPM-Reranker",
151
+ "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
152
+ },
153
+ "modelscope": {
154
+ "model_id": "OpenBMB/MiniCPM-Reranker"
155
+ }
156
+ }
65
157
  },
66
158
  {
159
+ "version": 2,
67
160
  "model_name": "Qwen3-Reranker-0.6B",
68
161
  "type": "normal",
69
- "language": ["en", "zh"],
162
+ "language": [
163
+ "en",
164
+ "zh"
165
+ ],
70
166
  "max_tokens": 32768,
71
- "model_id": "Qwen/Qwen3-Reranker-0.6B",
72
- "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3"
167
+ "model_src": {
168
+ "huggingface": {
169
+ "model_id": "Qwen/Qwen3-Reranker-0.6B",
170
+ "model_revision": "6e9e69830b95c52b5fd889b7690dda3329508de3"
171
+ },
172
+ "modelscope": {
173
+ "model_id": "Qwen/Qwen3-Reranker-0.6B"
174
+ }
175
+ }
73
176
  },
74
177
  {
178
+ "version": 2,
75
179
  "model_name": "Qwen3-Reranker-4B",
76
180
  "type": "normal",
77
- "language": ["en", "zh"],
181
+ "language": [
182
+ "en",
183
+ "zh"
184
+ ],
78
185
  "max_tokens": 32768,
79
- "model_id": "Qwen/Qwen3-Reranker-4B",
80
- "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5"
186
+ "model_src": {
187
+ "huggingface": {
188
+ "model_id": "Qwen/Qwen3-Reranker-4B",
189
+ "model_revision": "f16fc5d5d2b9b1d0db8280929242745d79794ef5"
190
+ },
191
+ "modelscope": {
192
+ "model_id": "Qwen/Qwen3-Reranker-4B"
193
+ }
194
+ }
81
195
  },
82
196
  {
197
+ "version": 2,
83
198
  "model_name": "Qwen3-Reranker-8B",
84
199
  "type": "normal",
85
- "language": ["en", "zh"],
200
+ "language": [
201
+ "en",
202
+ "zh"
203
+ ],
86
204
  "max_tokens": 32768,
87
- "model_id": "Qwen/Qwen3-Reranker-8B",
88
- "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db"
205
+ "model_src": {
206
+ "huggingface": {
207
+ "model_id": "Qwen/Qwen3-Reranker-8B",
208
+ "model_revision": "5fa94080caafeaa45a15d11f969d7978e087a3db"
209
+ },
210
+ "modelscope": {
211
+ "model_id": "Qwen/Qwen3-Reranker-8B"
212
+ }
213
+ }
89
214
  }
90
215
  ]
@@ -14,10 +14,10 @@
14
14
  from typing import TYPE_CHECKING, Any
15
15
 
16
16
  if TYPE_CHECKING:
17
- from .core import RerankModelSpec
17
+ from .core import RerankModelFamilyV2
18
18
 
19
19
 
20
- def get_model_version(rerank_model: "RerankModelSpec") -> str:
20
+ def get_model_version(rerank_model: "RerankModelFamilyV2") -> str:
21
21
  return rerank_model.model_name
22
22
 
23
23
 
xinference/model/utils.py CHANGED
@@ -18,10 +18,22 @@ import logging
18
18
  import os
19
19
  import random
20
20
  import threading
21
+ from abc import ABC, abstractmethod
21
22
  from copy import deepcopy
22
23
  from json import JSONDecodeError
23
24
  from pathlib import Path
24
- from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
25
+ from typing import (
26
+ TYPE_CHECKING,
27
+ Any,
28
+ Callable,
29
+ Dict,
30
+ List,
31
+ Optional,
32
+ Set,
33
+ Tuple,
34
+ Type,
35
+ Union,
36
+ )
25
37
 
26
38
  import huggingface_hub
27
39
  import numpy as np
@@ -36,6 +48,10 @@ from ..constants import (
36
48
  from ..device_utils import get_available_device, is_device_available
37
49
  from .core import CacheableModelSpec
38
50
 
51
+ if TYPE_CHECKING:
52
+ from .embedding.core import LlamaCppEmbeddingSpecV1
53
+ from .llm.llm_family import LlamaCppLLMSpecV2
54
+
39
55
  logger = logging.getLogger(__name__)
40
56
  IS_NEW_HUGGINGFACE_HUB: bool = huggingface_hub.__version__ >= "0.23.0"
41
57
 
@@ -262,59 +278,6 @@ def cache_from_uri(model_spec: CacheableModelSpec) -> str:
262
278
  raise ValueError(f"Unsupported URL scheme: {src_scheme}")
263
279
 
264
280
 
265
- def cache(model_spec: CacheableModelSpec, model_description_type: type):
266
- if (
267
- hasattr(model_spec, "model_uri")
268
- and getattr(model_spec, "model_uri", None) is not None
269
- ):
270
- logger.info(f"Model caching from URI: {model_spec.model_uri}")
271
- return cache_from_uri(model_spec=model_spec)
272
-
273
- cache_dir = os.path.realpath(
274
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
275
- )
276
- if not os.path.exists(cache_dir):
277
- os.makedirs(cache_dir, exist_ok=True)
278
- meta_path = os.path.join(cache_dir, "__valid_download")
279
- if valid_model_revision(meta_path, model_spec.model_revision, model_spec.model_hub):
280
- return cache_dir
281
-
282
- from_modelscope: bool = model_spec.model_hub == "modelscope"
283
- if from_modelscope:
284
- from modelscope.hub.snapshot_download import snapshot_download as ms_download
285
-
286
- download_dir = retry_download(
287
- ms_download,
288
- model_spec.model_name,
289
- None,
290
- model_spec.model_id,
291
- revision=model_spec.model_revision,
292
- )
293
- create_symlink(download_dir, cache_dir)
294
- else:
295
- from huggingface_hub import snapshot_download as hf_download
296
-
297
- use_symlinks = {}
298
- if not IS_NEW_HUGGINGFACE_HUB:
299
- use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
300
- download_dir = retry_download(
301
- hf_download,
302
- model_spec.model_name,
303
- None,
304
- model_spec.model_id,
305
- revision=model_spec.model_revision,
306
- **use_symlinks,
307
- )
308
- if IS_NEW_HUGGINGFACE_HUB:
309
- create_symlink(download_dir, cache_dir)
310
- with open(meta_path, "w") as f:
311
- import json
312
-
313
- desc = model_description_type(None, None, model_spec)
314
- json.dump(desc.to_dict(), f)
315
- return cache_dir
316
-
317
-
318
281
  def select_device(device):
319
282
  try:
320
283
  import torch # noqa: F401
@@ -497,3 +460,101 @@ def get_engine_params_by_name(
497
460
  f"Cannot support model_engine for {model_type}, "
498
461
  f"only available for LLM, embedding"
499
462
  )
463
+
464
+
465
+ def generate_model_file_names_with_quantization_parts(
466
+ model_spec: Union["LlamaCppLLMSpecV2", "LlamaCppEmbeddingSpecV1"],
467
+ multimodal_projector: Optional[str] = None,
468
+ ) -> Tuple[List[str], str, bool]:
469
+ file_names = []
470
+ final_file_name = model_spec.model_file_name_template.format(
471
+ quantization=model_spec.quantization
472
+ )
473
+ need_merge = False
474
+
475
+ if (
476
+ model_spec.quantization_parts is None
477
+ or model_spec.quantization not in model_spec.quantization_parts
478
+ ):
479
+ file_names.append(final_file_name)
480
+ elif (
481
+ model_spec.quantization is not None
482
+ and model_spec.quantization in model_spec.quantization_parts
483
+ ):
484
+ parts = model_spec.quantization_parts[model_spec.quantization]
485
+ need_merge = True
486
+
487
+ logger.info(
488
+ f"Model {model_spec.model_id} {model_spec.model_format} {model_spec.quantization} has {len(parts)} parts."
489
+ )
490
+
491
+ if model_spec.model_file_name_split_template is None:
492
+ raise ValueError(
493
+ f"No model_file_name_split_template for model spec {model_spec.model_id}"
494
+ )
495
+
496
+ for part in parts:
497
+ file_name = model_spec.model_file_name_split_template.format(
498
+ quantization=model_spec.quantization, part=part
499
+ )
500
+ file_names.append(file_name)
501
+ if multimodal_projector:
502
+ file_names.append(multimodal_projector)
503
+
504
+ return file_names, final_file_name, need_merge
505
+
506
+
507
+ def merge_cached_files(
508
+ cache_dir: str, input_file_names: List[str], output_file_name: str
509
+ ):
510
+ # now llama.cpp can find the gguf parts automatically
511
+ # we only need to provide the first part
512
+ # thus we create the symlink to the first part
513
+ symlink_local_file(
514
+ os.path.join(cache_dir, input_file_names[0]), cache_dir, output_file_name
515
+ )
516
+
517
+ logger.info(f"Merge complete.")
518
+
519
+
520
+ def flatten_model_src(input_json: dict):
521
+ flattened = []
522
+ base_info = {key: value for key, value in input_json.items() if key != "model_src"}
523
+ for model_hub, hub_info in input_json["model_src"].items():
524
+ record = base_info.copy()
525
+ hub_info.pop("model_hub", None)
526
+ record.update(hub_info)
527
+ record["model_hub"] = model_hub
528
+ flattened.append(record)
529
+ return flattened
530
+
531
+
532
+ def flatten_quantizations(input_json: dict):
533
+ flattened = []
534
+
535
+ base_info = {key: value for key, value in input_json.items() if key != "model_src"}
536
+
537
+ for model_hub, hub_info in input_json["model_src"].items():
538
+ quantizations = hub_info["quantizations"]
539
+
540
+ for quant in quantizations:
541
+ record = base_info.copy()
542
+ record["model_hub"] = model_hub
543
+ record["quantization"] = quant
544
+
545
+ for key, value in hub_info.items():
546
+ if key != "quantizations":
547
+ record[key] = value
548
+
549
+ flattened.append(record)
550
+ return flattened
551
+
552
+
553
+ class ModelInstanceInfoMixin(ABC):
554
+ @abstractmethod
555
+ def to_description(self):
556
+ """"""
557
+
558
+ @abstractmethod
559
+ def to_version_info(self):
560
+ """"""
@@ -15,40 +15,36 @@
15
15
  import codecs
16
16
  import json
17
17
  import os
18
- from itertools import chain
19
18
 
19
+ from ..utils import flatten_model_src
20
20
  from .core import (
21
21
  BUILTIN_VIDEO_MODELS,
22
- MODEL_NAME_TO_REVISION,
23
- MODELSCOPE_VIDEO_MODELS,
24
22
  VIDEO_MODEL_DESCRIPTIONS,
25
- VideoModelFamilyV1,
23
+ VideoModelFamilyV2,
26
24
  generate_video_description,
27
- get_cache_status,
28
25
  get_video_model_descriptions,
29
26
  )
30
27
 
31
28
 
32
29
  def _install():
33
30
  load_model_family_from_json("model_spec.json", BUILTIN_VIDEO_MODELS)
34
- load_model_family_from_json("model_spec_modelscope.json", MODELSCOPE_VIDEO_MODELS)
35
31
 
36
32
  # register model description
37
- for model_name, model_spec in chain(
38
- MODELSCOPE_VIDEO_MODELS.items(), BUILTIN_VIDEO_MODELS.items()
39
- ):
33
+ for model_name, model_specs in BUILTIN_VIDEO_MODELS.items():
34
+ model_spec = [x for x in model_specs if x.model_hub == "huggingface"][0]
40
35
  VIDEO_MODEL_DESCRIPTIONS.update(generate_video_description(model_spec))
41
36
 
42
37
 
43
38
  def load_model_family_from_json(json_filename, target_families):
44
39
  json_path = os.path.join(os.path.dirname(__file__), json_filename)
45
- target_families.update(
46
- dict(
47
- (spec["model_name"], VideoModelFamilyV1(**spec))
48
- for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
49
- )
50
- )
51
- for model_name, model_spec in target_families.items():
52
- MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
40
+ flattened_model_specs = []
41
+ for spec in json.load(codecs.open(json_path, "r", encoding="utf-8")):
42
+ flattened_model_specs.extend(flatten_model_src(spec))
43
+
44
+ for spec in flattened_model_specs:
45
+ if spec["model_name"] not in target_families:
46
+ target_families[spec["model_name"]] = [VideoModelFamilyV2(**spec)]
47
+ else:
48
+ target_families[spec["model_name"]].append(VideoModelFamilyV2(**spec))
53
49
 
54
50
  del json_path