xinference 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/client/restful/restful_client.py +1 -1
- xinference/conftest.py +0 -7
- xinference/core/media_interface.py +9 -8
- xinference/core/model.py +13 -6
- xinference/core/scheduler.py +1 -10
- xinference/core/worker.py +0 -10
- xinference/model/audio/model_spec.json +53 -1
- xinference/model/audio/model_spec_modelscope.json +57 -1
- xinference/model/embedding/core.py +19 -11
- xinference/model/image/model_spec.json +10 -1
- xinference/model/image/model_spec_modelscope.json +20 -0
- xinference/model/llm/__init__.py +6 -54
- xinference/model/llm/core.py +19 -5
- xinference/model/llm/llama_cpp/core.py +59 -3
- xinference/model/llm/llama_cpp/memory.py +455 -0
- xinference/model/llm/llm_family.json +185 -397
- xinference/model/llm/llm_family.py +88 -16
- xinference/model/llm/llm_family_modelscope.json +199 -421
- xinference/model/llm/llm_family_openmind_hub.json +0 -34
- xinference/model/llm/sglang/core.py +4 -0
- xinference/model/llm/transformers/__init__.py +27 -6
- xinference/model/llm/transformers/chatglm.py +4 -2
- xinference/model/llm/transformers/core.py +49 -28
- xinference/model/llm/transformers/deepseek_v2.py +6 -49
- xinference/model/llm/transformers/gemma3.py +119 -164
- xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
- xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
- xinference/model/llm/transformers/multimodal/core.py +205 -0
- xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
- xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
- xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
- xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
- xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
- xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
- xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
- xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
- xinference/model/llm/transformers/opt.py +4 -2
- xinference/model/llm/transformers/utils.py +6 -37
- xinference/model/llm/vllm/core.py +4 -0
- xinference/model/rerank/core.py +7 -1
- xinference/model/rerank/utils.py +17 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
- xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
- xinference/web/ui/src/locales/en.json +3 -1
- xinference/web/ui/src/locales/zh.json +3 -1
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/METADATA +16 -14
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/cogvlm2.py +0 -442
- xinference/model/llm/transformers/cogvlm2_video.py +0 -333
- xinference/model/llm/transformers/deepseek_vl.py +0 -280
- xinference/model/llm/transformers/glm_edge_v.py +0 -213
- xinference/model/llm/transformers/intern_vl.py +0 -526
- xinference/model/llm/transformers/internlm2.py +0 -94
- xinference/model/llm/transformers/minicpmv25.py +0 -193
- xinference/model/llm/transformers/omnilmm.py +0 -132
- xinference/model/llm/transformers/qwen2_audio.py +0 -179
- xinference/model/llm/transformers/qwen_vl.py +0 -360
- xinference/thirdparty/omnilmm/LICENSE +0 -201
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +0 -218
- xinference/thirdparty/omnilmm/constants.py +0 -4
- xinference/thirdparty/omnilmm/conversation.py +0 -332
- xinference/thirdparty/omnilmm/model/__init__.py +0 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
- xinference/thirdparty/omnilmm/model/resampler.py +0 -166
- xinference/thirdparty/omnilmm/model/utils.py +0 -578
- xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
- xinference/thirdparty/omnilmm/utils.py +0 -134
- xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
- xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
- /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
|
@@ -191,42 +191,13 @@ def _get_pad_param(seq_len_idx: int, pad_len: int) -> Tuple:
|
|
|
191
191
|
return tuple(dimensions)
|
|
192
192
|
|
|
193
193
|
|
|
194
|
-
def _merge_kv_cache(
|
|
195
|
-
xinf_model_obj: "PytorchModel",
|
|
196
|
-
past_cache: DynamicCache,
|
|
197
|
-
new_cache: DynamicCache,
|
|
198
|
-
) -> DynamicCache:
|
|
199
|
-
from torch.nn.functional import pad
|
|
200
|
-
|
|
201
|
-
_, seq_len_idx = xinf_model_obj.get_batch_size_and_seq_len_indexes_from_kv()
|
|
202
|
-
past_seq_len = past_cache[0][0].shape[seq_len_idx]
|
|
203
|
-
new_seq_len = new_cache[0][0].shape[seq_len_idx]
|
|
204
|
-
if past_seq_len != new_seq_len:
|
|
205
|
-
padding_target = new_cache if past_seq_len > new_seq_len else past_cache
|
|
206
|
-
padding_len = abs(past_seq_len - new_seq_len)
|
|
207
|
-
pad_param = _get_pad_param(seq_len_idx, padding_len)
|
|
208
|
-
for idx in range(len(padding_target)):
|
|
209
|
-
k = padding_target.key_cache[idx]
|
|
210
|
-
v = padding_target.value_cache[idx]
|
|
211
|
-
_k = pad(k, pad_param)
|
|
212
|
-
_v = pad(v, pad_param)
|
|
213
|
-
padding_target.key_cache[idx] = _k
|
|
214
|
-
padding_target.value_cache[idx] = _v
|
|
215
|
-
|
|
216
|
-
ret_kv = DynamicCache()
|
|
217
|
-
for idx in range(len(past_cache)):
|
|
218
|
-
k1, k2 = new_cache.key_cache[idx], past_cache.key_cache[idx]
|
|
219
|
-
v1, v2 = new_cache.value_cache[idx], past_cache.value_cache[idx]
|
|
220
|
-
ret_kv.update(
|
|
221
|
-
torch.cat((k1, k2), 0).contiguous(),
|
|
222
|
-
torch.cat((v1, v2), 0).contiguous(),
|
|
223
|
-
idx,
|
|
224
|
-
)
|
|
225
|
-
return ret_kv
|
|
226
|
-
|
|
227
|
-
|
|
228
194
|
def get_batch_size_and_seq_len_from_kv_cache(kv, xinf_model_obj: "PytorchModel"):
|
|
195
|
+
from transformers import HybridCache
|
|
196
|
+
|
|
229
197
|
bs_idx, seq_len_idx = xinf_model_obj.get_batch_size_and_seq_len_indexes_from_kv()
|
|
198
|
+
|
|
199
|
+
if isinstance(kv, HybridCache):
|
|
200
|
+
return kv.key_cache[0].shape[bs_idx], kv.get_seq_length()
|
|
230
201
|
return kv[0][0].shape[bs_idx], kv[0][0].shape[seq_len_idx] + 1
|
|
231
202
|
|
|
232
203
|
|
|
@@ -304,9 +275,7 @@ def _batch_inference_one_step_internal(
|
|
|
304
275
|
if decode_reqs:
|
|
305
276
|
decode_kv = decode_reqs[0].kv_cache
|
|
306
277
|
# prefill and decode kv cache need to be merged at `batch_size` and `seq_len` dimensions.
|
|
307
|
-
merged_kv_cache =
|
|
308
|
-
xinf_model_obj, decode_kv, past_key_values
|
|
309
|
-
)
|
|
278
|
+
merged_kv_cache = xinf_model_obj.merge_kv_cache(decode_kv, past_key_values)
|
|
310
279
|
for r in valid_req_list:
|
|
311
280
|
r.kv_cache = merged_kv_cache
|
|
312
281
|
empty_cache()
|
|
@@ -199,7 +199,11 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
|
|
|
199
199
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
|
|
200
200
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
|
|
201
201
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v3")
|
|
202
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v3-0324")
|
|
202
203
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1")
|
|
204
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-0528")
|
|
205
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-prover-v2")
|
|
206
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-0528-qwen3")
|
|
203
207
|
|
|
204
208
|
if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
|
|
205
209
|
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
|
xinference/model/rerank/core.py
CHANGED
|
@@ -265,7 +265,13 @@ class RerankModel:
|
|
|
265
265
|
if max_chunks_per_doc is not None:
|
|
266
266
|
raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
|
|
267
267
|
logger.info("Rerank with kwargs: %s, model: %s", kwargs, self._model)
|
|
268
|
-
|
|
268
|
+
|
|
269
|
+
from .utils import preprocess_sentence
|
|
270
|
+
|
|
271
|
+
pre_query = preprocess_sentence(
|
|
272
|
+
query, kwargs.get("instruction", None), self._model_spec.model_name
|
|
273
|
+
)
|
|
274
|
+
sentence_combinations = [[pre_query, doc] for doc in documents]
|
|
269
275
|
# reset n tokens
|
|
270
276
|
self._model.model.n_tokens = 0
|
|
271
277
|
if self._model_spec.type == "normal":
|
xinference/model/rerank/utils.py
CHANGED
|
@@ -11,8 +11,25 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
14
16
|
from .core import RerankModelSpec
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def get_model_version(rerank_model: RerankModelSpec) -> str:
|
|
18
20
|
return rerank_model.model_name
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
instruction_cfg = {
|
|
24
|
+
"minicpm-reranker": "Query: ",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def preprocess_sentence(query: str, instruction: Any, model_name: str) -> str:
|
|
29
|
+
if instruction and isinstance(instruction, str):
|
|
30
|
+
return f"{instruction}{query}"
|
|
31
|
+
if instruction is None:
|
|
32
|
+
for k, v in instruction_cfg.items():
|
|
33
|
+
if k.lower() in model_name.lower():
|
|
34
|
+
return f"{v}{query}"
|
|
35
|
+
return query
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
3
|
"main.css": "./static/css/main.337afe76.css",
|
|
4
|
-
"main.js": "./static/js/main.
|
|
4
|
+
"main.js": "./static/js/main.ddf9eaee.js",
|
|
5
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
6
6
|
"index.html": "./index.html",
|
|
7
7
|
"main.337afe76.css.map": "./static/css/main.337afe76.css.map",
|
|
8
|
-
"main.
|
|
8
|
+
"main.ddf9eaee.js.map": "./static/js/main.ddf9eaee.js.map"
|
|
9
9
|
},
|
|
10
10
|
"entrypoints": [
|
|
11
11
|
"static/css/main.337afe76.css",
|
|
12
|
-
"static/js/main.
|
|
12
|
+
"static/js/main.ddf9eaee.js"
|
|
13
13
|
]
|
|
14
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.ddf9eaee.js"></script><link href="./static/css/main.337afe76.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|