xinference 1.7.0__py3-none-any.whl → 1.7.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-06-13T18:51:07+0800",
11
+ "date": "2025-06-14T01:13:55+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a362dba7334ef08c758bbc4a3d4904fe53cefe78",
15
- "version": "1.7.0"
14
+ "full-revisionid": "da2040e54c18c80ae88c64608de0081fa6df54c4",
15
+ "version": "1.7.0.post1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -6160,7 +6160,7 @@
6160
6160
  "quantizations": [
6161
6161
  "none"
6162
6162
  ],
6163
- "model_id": "openbmb/MiniCPM4-0.5B"
6163
+ "model_id": "JunHowie/MiniCPM4-0.5B"
6164
6164
  },
6165
6165
  {
6166
6166
  "model_format": "pytorch",
@@ -6168,7 +6168,7 @@
6168
6168
  "quantizations": [
6169
6169
  "none"
6170
6170
  ],
6171
- "model_id": "openbmb/MiniCPM4-8B"
6171
+ "model_id": "JunHowie/MiniCPM4-8B"
6172
6172
  },
6173
6173
  {
6174
6174
  "model_format": "mlx",
@@ -4295,7 +4295,7 @@
4295
4295
  "quantizations": [
4296
4296
  "none"
4297
4297
  ],
4298
- "model_id": "OpenBMB/MiniCPM4-0.5B",
4298
+ "model_id": "JunHowie/MiniCPM4-0.5B",
4299
4299
  "model_hub": "modelscope"
4300
4300
  },
4301
4301
  {
@@ -4304,7 +4304,7 @@
4304
4304
  "quantizations": [
4305
4305
  "none"
4306
4306
  ],
4307
- "model_id": "OpenBMB/MiniCPM4-8B",
4307
+ "model_id": "JunHowie/MiniCPM4-8B",
4308
4308
  "model_hub": "modelscope"
4309
4309
  },
4310
4310
  {
@@ -14,6 +14,7 @@
14
14
 
15
15
  import gc
16
16
  import importlib
17
+ import importlib.util
17
18
  import logging
18
19
  import os
19
20
  import threading
@@ -251,12 +252,21 @@ class RerankModel:
251
252
  tokenizer = AutoTokenizer.from_pretrained(
252
253
  self._model_path, padding_side="left"
253
254
  )
255
+ flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
256
+ model_kwargs = {"device_map": "auto"}
257
+ if flash_attn_installed:
258
+ model_kwargs["attn_implementation"] = "flash_attention_2"
259
+ model_kwargs["torch_dtype"] = torch.float16
254
260
  model = self._model = AutoModelForCausalLM.from_pretrained(
255
- self._model_path
261
+ self._model_path, **model_kwargs
256
262
  ).eval()
257
263
  max_length = getattr(self._model_spec, "max_tokens")
258
264
 
259
- prefix = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
265
+ prefix = (
266
+ "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query "
267
+ 'and the Instruct provided. Note that the answer can only be "yes" or "no".'
268
+ "<|im_end|>\n<|im_start|>user\n"
269
+ )
260
270
  suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
261
271
  prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
262
272
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xinference
3
- Version: 1.7.0
3
+ Version: 1.7.0.post1
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -1,6 +1,6 @@
1
1
  xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
2
2
  xinference/_compat.py,sha256=YF-lS6tX06zkFi2oFS0yq_LBn4hX_8u0Ft0vKxGALwA,4238
3
- xinference/_version.py,sha256=cTB6jB2VXLWZ_Heth-iG167tHE-bv24BvW3eaShba2k,497
3
+ xinference/_version.py,sha256=FLTvXc3gsm40inNtWJwpd9lC2ODsO7OetP5N4-FyECo,503
4
4
  xinference/conftest.py,sha256=vETDpRBVIlWbWi7OTwf7og89U25KyYGyI7yPIB3O8N8,9564
5
5
  xinference/constants.py,sha256=-QqMRJo1MvTgnLmRZLF3DVj3PMXUzg1rV-XIsuN_nGI,4319
6
6
  xinference/device_utils.py,sha256=ELsqvnjvz9wYthTyQFzKSV4mZsaASz6hj_IsfMmfMWc,4447
@@ -96,10 +96,10 @@ xinference/model/image/stable_diffusion/core.py,sha256=sTceL97g4qXz9ukPGIBmPkRGg
96
96
  xinference/model/image/stable_diffusion/mlx.py,sha256=GZsozzGB04NfHAdU9MI6gwWE1t_A-s_Ddn_ic8DlkKQ,7476
97
97
  xinference/model/llm/__init__.py,sha256=Pm8hxP3E1m9-H-JaicIsUCvZ77uD0ClL0KvqcreO1aY,9137
98
98
  xinference/model/llm/core.py,sha256=S8Ys69u6NWRjjHksBlGG50RPwHIDSQsUBiK-wRunfj0,10236
99
- xinference/model/llm/llm_family.json,sha256=VU3khmH9X8_aSDhJ5Nfr7u_ZgEd0wUZvMCjXaCBvnWE,412320
99
+ xinference/model/llm/llm_family.json,sha256=oL4aja5uZHaDetGKpEt8v0ZYG_GPQnpnMeGnA9Zyx10,412322
100
100
  xinference/model/llm/llm_family.py,sha256=Nex1zvegUH1TvGjy8yILYiehY2QFOziRASGBlAa7wPg,44510
101
101
  xinference/model/llm/llm_family_csghub.json,sha256=OYve65CgLVHdyTC3UMe7vqhFwC09OR7XhzDsVEpkJhY,6908
102
- xinference/model/llm/llm_family_modelscope.json,sha256=rOu1ouDLRMUNpuRQkB4QPRjaQdp5ScneXJAaUbVY24Y,357639
102
+ xinference/model/llm/llm_family_modelscope.json,sha256=7Zs4rW07xpvWfT_3bE6cjDsn_xu0wiXGE7JJD6yUjws,357641
103
103
  xinference/model/llm/llm_family_openmind_hub.json,sha256=sAWzttUasckGzNdcBHhdxvXB893tBhnm__w_1hr6JXw,58243
104
104
  xinference/model/llm/memory.py,sha256=GLNmXBI-AtMbuaJfEf50fnhN4rdbOZjLyT6L_Vjqa5g,10206
105
105
  xinference/model/llm/reasoning_parser.py,sha256=L9BGzsQiTw8QY6BtQfdEGJmCL_RpnYR1MXBkczOS6SU,17184
@@ -151,7 +151,7 @@ xinference/model/llm/vllm/xavier/transfer.py,sha256=2IbaiAhRnJMwa9qsMa5bowngqfjx
151
151
  xinference/model/llm/vllm/xavier/test/__init__.py,sha256=CyLLkbImZouAk4lePIgKXT4WQoqyauIEwdqea5IOUVU,581
152
152
  xinference/model/llm/vllm/xavier/test/test_xavier.py,sha256=lSfo4caaLdWRh0diJ1jd_sY85GV-_9BT40ySs_nirJQ,4695
153
153
  xinference/model/rerank/__init__.py,sha256=yxxv4OOHCl3UUOLENP45Eghf_dEsRaCkUUcW5bth_NE,3141
154
- xinference/model/rerank/core.py,sha256=j5AP4f1KFkDUUPLcjywKIHe4jfysJFlgpW1N1fEckNk,18996
154
+ xinference/model/rerank/core.py,sha256=mHgPvnhkD9CO8Lp_k5rUI9OmmrTKe2NPeKL1pHDOk9A,19411
155
155
  xinference/model/rerank/custom.py,sha256=wPKF3bHbGap9dHz9yYvXMXhozh4hRzS78RQijqvaRq8,3846
156
156
  xinference/model/rerank/model_spec.json,sha256=GC0aD3vcMMBaPoYn74AL4FlkgDugX3z56eWNBfqqRhk,2705
157
157
  xinference/model/rerank/model_spec_modelscope.json,sha256=lQ48VuQwokc564yKjYqCEPspW5EJryIJXtuo6crXKlQ,2211
@@ -15778,9 +15778,9 @@ xinference/web/ui/src/locales/en.json,sha256=R7JpAE_lXcF4j1ryOghE0_B8Y2Q0zIrax_v
15778
15778
  xinference/web/ui/src/locales/ja.json,sha256=nh-gFRtTpZAmTJNBa00cYfv6ftpR2FcI4ZmQZ0Y-ezc,11454
15779
15779
  xinference/web/ui/src/locales/ko.json,sha256=MJOPJFagJdjgLDb5g3kaSBEWmH5zF03QeLWhayGo_iQ,10333
15780
15780
  xinference/web/ui/src/locales/zh.json,sha256=7NRqta9ipMdoe19Zdp0-MGqbiiTxDoRopM1o2EOWb54,9424
15781
- xinference-1.7.0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15782
- xinference-1.7.0.dist-info/METADATA,sha256=iIVl2mTLWs8_vkEP5uDGS96ulOZEU2UdjW7uZBSoI8k,26050
15783
- xinference-1.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15784
- xinference-1.7.0.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15785
- xinference-1.7.0.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15786
- xinference-1.7.0.dist-info/RECORD,,
15781
+ xinference-1.7.0.post1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15782
+ xinference-1.7.0.post1.dist-info/METADATA,sha256=DAz2hgr8ToiguCmFIiTH13uFar15QjhyyQ7LCj9FFxo,26056
15783
+ xinference-1.7.0.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15784
+ xinference-1.7.0.post1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15785
+ xinference-1.7.0.post1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15786
+ xinference-1.7.0.post1.dist-info/RECORD,,