xinference 1.6.0.post1__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (87) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/restful_client.py +1 -1
  3. xinference/conftest.py +0 -7
  4. xinference/core/media_interface.py +9 -8
  5. xinference/core/model.py +13 -6
  6. xinference/core/scheduler.py +1 -10
  7. xinference/core/worker.py +0 -10
  8. xinference/model/audio/model_spec.json +53 -1
  9. xinference/model/audio/model_spec_modelscope.json +57 -1
  10. xinference/model/embedding/core.py +19 -11
  11. xinference/model/image/model_spec.json +10 -1
  12. xinference/model/image/model_spec_modelscope.json +20 -0
  13. xinference/model/llm/__init__.py +6 -54
  14. xinference/model/llm/core.py +19 -5
  15. xinference/model/llm/llama_cpp/core.py +59 -3
  16. xinference/model/llm/llama_cpp/memory.py +455 -0
  17. xinference/model/llm/llm_family.json +185 -397
  18. xinference/model/llm/llm_family.py +88 -16
  19. xinference/model/llm/llm_family_modelscope.json +199 -421
  20. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  21. xinference/model/llm/sglang/core.py +4 -0
  22. xinference/model/llm/transformers/__init__.py +27 -6
  23. xinference/model/llm/transformers/chatglm.py +4 -2
  24. xinference/model/llm/transformers/core.py +49 -28
  25. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  26. xinference/model/llm/transformers/gemma3.py +119 -164
  27. xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
  28. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  29. xinference/model/llm/transformers/multimodal/core.py +205 -0
  30. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  31. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  32. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  33. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  34. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  35. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  36. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  37. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  38. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  39. xinference/model/llm/transformers/opt.py +4 -2
  40. xinference/model/llm/transformers/utils.py +6 -37
  41. xinference/model/llm/vllm/core.py +4 -0
  42. xinference/model/rerank/core.py +7 -1
  43. xinference/model/rerank/utils.py +17 -0
  44. xinference/web/ui/build/asset-manifest.json +3 -3
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
  47. xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
  52. xinference/web/ui/src/locales/en.json +3 -1
  53. xinference/web/ui/src/locales/zh.json +3 -1
  54. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/METADATA +6 -4
  55. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
  56. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
  57. xinference/model/llm/transformers/cogvlm2.py +0 -442
  58. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  59. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  60. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  61. xinference/model/llm/transformers/intern_vl.py +0 -526
  62. xinference/model/llm/transformers/internlm2.py +0 -94
  63. xinference/model/llm/transformers/minicpmv25.py +0 -193
  64. xinference/model/llm/transformers/omnilmm.py +0 -132
  65. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  66. xinference/model/llm/transformers/qwen_vl.py +0 -360
  67. xinference/thirdparty/omnilmm/LICENSE +0 -201
  68. xinference/thirdparty/omnilmm/__init__.py +0 -0
  69. xinference/thirdparty/omnilmm/chat.py +0 -218
  70. xinference/thirdparty/omnilmm/constants.py +0 -4
  71. xinference/thirdparty/omnilmm/conversation.py +0 -332
  72. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  73. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  74. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  75. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  76. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  77. xinference/thirdparty/omnilmm/utils.py +0 -134
  78. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  79. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  84. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
  85. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
  86. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
  87. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
@@ -191,42 +191,13 @@ def _get_pad_param(seq_len_idx: int, pad_len: int) -> Tuple:
191
191
  return tuple(dimensions)
192
192
 
193
193
 
194
- def _merge_kv_cache(
195
- xinf_model_obj: "PytorchModel",
196
- past_cache: DynamicCache,
197
- new_cache: DynamicCache,
198
- ) -> DynamicCache:
199
- from torch.nn.functional import pad
200
-
201
- _, seq_len_idx = xinf_model_obj.get_batch_size_and_seq_len_indexes_from_kv()
202
- past_seq_len = past_cache[0][0].shape[seq_len_idx]
203
- new_seq_len = new_cache[0][0].shape[seq_len_idx]
204
- if past_seq_len != new_seq_len:
205
- padding_target = new_cache if past_seq_len > new_seq_len else past_cache
206
- padding_len = abs(past_seq_len - new_seq_len)
207
- pad_param = _get_pad_param(seq_len_idx, padding_len)
208
- for idx in range(len(padding_target)):
209
- k = padding_target.key_cache[idx]
210
- v = padding_target.value_cache[idx]
211
- _k = pad(k, pad_param)
212
- _v = pad(v, pad_param)
213
- padding_target.key_cache[idx] = _k
214
- padding_target.value_cache[idx] = _v
215
-
216
- ret_kv = DynamicCache()
217
- for idx in range(len(past_cache)):
218
- k1, k2 = new_cache.key_cache[idx], past_cache.key_cache[idx]
219
- v1, v2 = new_cache.value_cache[idx], past_cache.value_cache[idx]
220
- ret_kv.update(
221
- torch.cat((k1, k2), 0).contiguous(),
222
- torch.cat((v1, v2), 0).contiguous(),
223
- idx,
224
- )
225
- return ret_kv
226
-
227
-
228
194
  def get_batch_size_and_seq_len_from_kv_cache(kv, xinf_model_obj: "PytorchModel"):
195
+ from transformers import HybridCache
196
+
229
197
  bs_idx, seq_len_idx = xinf_model_obj.get_batch_size_and_seq_len_indexes_from_kv()
198
+
199
+ if isinstance(kv, HybridCache):
200
+ return kv.key_cache[0].shape[bs_idx], kv.get_seq_length()
230
201
  return kv[0][0].shape[bs_idx], kv[0][0].shape[seq_len_idx] + 1
231
202
 
232
203
 
@@ -304,9 +275,7 @@ def _batch_inference_one_step_internal(
304
275
  if decode_reqs:
305
276
  decode_kv = decode_reqs[0].kv_cache
306
277
  # prefill and decode kv cache need to be merged at `batch_size` and `seq_len` dimensions.
307
- merged_kv_cache = _merge_kv_cache(
308
- xinf_model_obj, decode_kv, past_key_values
309
- )
278
+ merged_kv_cache = xinf_model_obj.merge_kv_cache(decode_kv, past_key_values)
310
279
  for r in valid_req_list:
311
280
  r.kv_cache = merged_kv_cache
312
281
  empty_cache()
@@ -199,7 +199,11 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
199
199
  VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
200
200
  VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
201
201
  VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v3")
202
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v3-0324")
202
203
  VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1")
204
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-0528")
205
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-prover-v2")
206
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-0528-qwen3")
203
207
 
204
208
  if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
205
209
  VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
@@ -265,7 +265,13 @@ class RerankModel:
265
265
  if max_chunks_per_doc is not None:
266
266
  raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
267
267
  logger.info("Rerank with kwargs: %s, model: %s", kwargs, self._model)
268
- sentence_combinations = [[query, doc] for doc in documents]
268
+
269
+ from .utils import preprocess_sentence
270
+
271
+ pre_query = preprocess_sentence(
272
+ query, kwargs.get("instruction", None), self._model_spec.model_name
273
+ )
274
+ sentence_combinations = [[pre_query, doc] for doc in documents]
269
275
  # reset n tokens
270
276
  self._model.model.n_tokens = 0
271
277
  if self._model_spec.type == "normal":
@@ -11,8 +11,25 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from typing import Any
15
+
14
16
  from .core import RerankModelSpec
15
17
 
16
18
 
17
19
  def get_model_version(rerank_model: RerankModelSpec) -> str:
18
20
  return rerank_model.model_name
21
+
22
+
23
+ instruction_cfg = {
24
+ "minicpm-reranker": "Query: ",
25
+ }
26
+
27
+
28
+ def preprocess_sentence(query: str, instruction: Any, model_name: str) -> str:
29
+ if instruction and isinstance(instruction, str):
30
+ return f"{instruction}{query}"
31
+ if instruction is None:
32
+ for k, v in instruction_cfg.items():
33
+ if k.lower() in model_name.lower():
34
+ return f"{v}{query}"
35
+ return query
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.337afe76.css",
4
- "main.js": "./static/js/main.ae579a97.js",
4
+ "main.js": "./static/js/main.ddf9eaee.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.337afe76.css.map": "./static/css/main.337afe76.css.map",
8
- "main.ae579a97.js.map": "./static/js/main.ae579a97.js.map"
8
+ "main.ddf9eaee.js.map": "./static/js/main.ddf9eaee.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.337afe76.css",
12
- "static/js/main.ae579a97.js"
12
+ "static/js/main.ddf9eaee.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.ae579a97.js"></script><link href="./static/css/main.337afe76.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.ddf9eaee.js"></script><link href="./static/css/main.337afe76.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>