xinference 1.7.0.post1__py3-none-any.whl → 1.7.1.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +3 -4
- xinference/client/__init__.py +2 -0
- xinference/client/common.py +49 -2
- xinference/client/handlers.py +18 -0
- xinference/client/restful/async_restful_client.py +1760 -0
- xinference/client/restful/restful_client.py +74 -78
- xinference/core/media_interface.py +3 -1
- xinference/core/model.py +5 -4
- xinference/core/supervisor.py +10 -5
- xinference/core/worker.py +15 -14
- xinference/deploy/local.py +51 -9
- xinference/deploy/worker.py +5 -3
- xinference/device_utils.py +22 -3
- xinference/model/audio/fish_speech.py +23 -34
- xinference/model/audio/model_spec.json +4 -2
- xinference/model/audio/model_spec_modelscope.json +4 -2
- xinference/model/audio/utils.py +2 -2
- xinference/model/core.py +1 -0
- xinference/model/embedding/__init__.py +8 -8
- xinference/model/embedding/custom.py +6 -1
- xinference/model/embedding/embed_family.py +0 -41
- xinference/model/embedding/model_spec.json +10 -1
- xinference/model/embedding/model_spec_modelscope.json +10 -1
- xinference/model/embedding/sentence_transformers/core.py +30 -15
- xinference/model/flexible/core.py +1 -1
- xinference/model/flexible/launchers/__init__.py +2 -0
- xinference/model/flexible/launchers/image_process_launcher.py +1 -1
- xinference/model/flexible/launchers/modelscope_launcher.py +47 -0
- xinference/model/flexible/launchers/transformers_launcher.py +5 -5
- xinference/model/flexible/launchers/yolo_launcher.py +62 -0
- xinference/model/llm/__init__.py +7 -0
- xinference/model/llm/core.py +18 -1
- xinference/model/llm/llama_cpp/core.py +1 -1
- xinference/model/llm/llm_family.json +41 -1
- xinference/model/llm/llm_family.py +6 -0
- xinference/model/llm/llm_family_modelscope.json +43 -1
- xinference/model/llm/mlx/core.py +271 -18
- xinference/model/llm/mlx/distributed_models/__init__.py +13 -0
- xinference/model/llm/mlx/distributed_models/core.py +164 -0
- xinference/model/llm/mlx/distributed_models/deepseek_v3.py +75 -0
- xinference/model/llm/mlx/distributed_models/qwen2.py +82 -0
- xinference/model/llm/mlx/distributed_models/qwen3.py +82 -0
- xinference/model/llm/mlx/distributed_models/qwen3_moe.py +76 -0
- xinference/model/llm/reasoning_parser.py +12 -6
- xinference/model/llm/sglang/core.py +8 -4
- xinference/model/llm/transformers/chatglm.py +4 -1
- xinference/model/llm/transformers/core.py +4 -2
- xinference/model/llm/transformers/multimodal/cogagent.py +10 -4
- xinference/model/llm/transformers/multimodal/intern_vl.py +1 -1
- xinference/model/llm/utils.py +36 -17
- xinference/model/llm/vllm/core.py +142 -34
- xinference/model/llm/vllm/distributed_executor.py +96 -21
- xinference/model/llm/vllm/xavier/transfer.py +2 -2
- xinference/model/rerank/core.py +16 -9
- xinference/model/rerank/model_spec.json +3 -3
- xinference/model/rerank/model_spec_modelscope.json +3 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.9b12b7f9.js +3 -0
- xinference/web/ui/build/static/js/main.9b12b7f9.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0fd4820d93f99509e80d8702dc3f6f8272424acab5608fa7c0e82cb1d3250a87.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f75545479c17fdfe2a00235fa4a0e9da1ae95e6b3caafba87ded92de6b0240e4.json +1 -0
- xinference/web/ui/src/locales/en.json +3 -0
- xinference/web/ui/src/locales/ja.json +3 -0
- xinference/web/ui/src/locales/ko.json +3 -0
- xinference/web/ui/src/locales/zh.json +3 -0
- {xinference-1.7.0.post1.dist-info → xinference-1.7.1.post1.dist-info}/METADATA +4 -3
- {xinference-1.7.0.post1.dist-info → xinference-1.7.1.post1.dist-info}/RECORD +77 -67
- xinference/web/ui/build/static/js/main.8a9e3ba0.js +0 -3
- xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cc97b49285d7717c63374766c789141a4329a04582ab32756d7e0e614d4c5c7f.json +0 -1
- /xinference/web/ui/build/static/js/{main.8a9e3ba0.js.LICENSE.txt → main.9b12b7f9.js.LICENSE.txt} +0 -0
- {xinference-1.7.0.post1.dist-info → xinference-1.7.1.post1.dist-info}/WHEEL +0 -0
- {xinference-1.7.0.post1.dist-info → xinference-1.7.1.post1.dist-info}/entry_points.txt +0 -0
- {xinference-1.7.0.post1.dist-info → xinference-1.7.1.post1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.7.0.post1.dist-info → xinference-1.7.1.post1.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-06-
|
|
11
|
+
"date": "2025-06-30T19:23:32+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.7.
|
|
14
|
+
"full-revisionid": "84f10dcf287df295396d5988016452ad8db98dfa",
|
|
15
|
+
"version": "1.7.1.post1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -1994,10 +1994,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
1994
1994
|
payload = await request.json()
|
|
1995
1995
|
|
|
1996
1996
|
model_uid = payload.get("model")
|
|
1997
|
+
args = payload.get("args")
|
|
1997
1998
|
|
|
1998
|
-
exclude = {
|
|
1999
|
-
"model",
|
|
2000
|
-
}
|
|
1999
|
+
exclude = {"model", "args"}
|
|
2001
2000
|
kwargs = {key: value for key, value in payload.items() if key not in exclude}
|
|
2002
2001
|
|
|
2003
2002
|
try:
|
|
@@ -2012,7 +2011,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
2012
2011
|
raise HTTPException(status_code=500, detail=str(e))
|
|
2013
2012
|
|
|
2014
2013
|
try:
|
|
2015
|
-
result = await model.infer(**kwargs)
|
|
2014
|
+
result = await model.infer(*args, **kwargs)
|
|
2016
2015
|
return Response(result, media_type="application/json")
|
|
2017
2016
|
except Exception as e:
|
|
2018
2017
|
e = await self._get_model_last_error(model.uid, e)
|
xinference/client/__init__.py
CHANGED
|
@@ -12,7 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from .restful.async_restful_client import AsyncClient
|
|
15
16
|
from .restful.restful_client import Client
|
|
16
17
|
|
|
17
18
|
# For compatibility
|
|
18
19
|
RESTfulClient = Client
|
|
20
|
+
AsyncRESTfulClient = AsyncClient
|
xinference/client/common.py
CHANGED
|
@@ -13,7 +13,18 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
-
from typing import Any, Iterator
|
|
16
|
+
from typing import Any, AsyncIterator, Iterator, Union
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def convert_float_to_int_or_str(model_size: float) -> Union[int, str]:
|
|
20
|
+
"""convert float to int or string
|
|
21
|
+
|
|
22
|
+
if float can be presented as int, convert it to int, otherwise convert it to string
|
|
23
|
+
"""
|
|
24
|
+
if int(model_size) == model_size:
|
|
25
|
+
return int(model_size)
|
|
26
|
+
else:
|
|
27
|
+
return str(model_size)
|
|
17
28
|
|
|
18
29
|
|
|
19
30
|
def streaming_response_iterator(
|
|
@@ -46,7 +57,43 @@ def streaming_response_iterator(
|
|
|
46
57
|
if json_str == b"[DONE]":
|
|
47
58
|
continue
|
|
48
59
|
data = json.loads(json_str.decode("utf-8"))
|
|
49
|
-
error = data.get("error")
|
|
60
|
+
error = data.get("error", None)
|
|
61
|
+
if error is not None:
|
|
62
|
+
raise Exception(str(error))
|
|
63
|
+
yield data
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def async_streaming_response_iterator(
|
|
67
|
+
response_lines: AsyncIterator[bytes],
|
|
68
|
+
) -> AsyncIterator[Any]:
|
|
69
|
+
"""
|
|
70
|
+
Create an AsyncIterator to handle the streaming type of generation.
|
|
71
|
+
|
|
72
|
+
Note
|
|
73
|
+
----------
|
|
74
|
+
This method is for compatible with openai. Please refer to:
|
|
75
|
+
https://github.com/openai/openai-python/blob/v0.28.1/openai/api_requestor.py#L99
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
response_lines: AsyncIterator[bytes]
|
|
80
|
+
Generated lines by the Model Generator.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
AsyncIterator["CompletionChunk"]
|
|
85
|
+
AsyncIterator of CompletionChunks generated by models.
|
|
86
|
+
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
async for line in response_lines:
|
|
90
|
+
line = line.strip()
|
|
91
|
+
if line.startswith(b"data:"):
|
|
92
|
+
json_str = line[len(b"data:") :].strip()
|
|
93
|
+
if json_str == b"[DONE]":
|
|
94
|
+
continue
|
|
95
|
+
data = json.loads(json_str.decode("utf-8"))
|
|
96
|
+
error = data.get("error", None)
|
|
50
97
|
if error is not None:
|
|
51
98
|
raise Exception(str(error))
|
|
52
99
|
yield data
|
xinference/client/handlers.py
CHANGED
|
@@ -1,3 +1,21 @@
|
|
|
1
|
+
from .restful.async_restful_client import ( # noqa: F401
|
|
2
|
+
AsyncRESTfulAudioModelHandle as AsyncAudioModelHandle,
|
|
3
|
+
)
|
|
4
|
+
from .restful.async_restful_client import ( # noqa: F401
|
|
5
|
+
AsyncRESTfulChatModelHandle as AsyncChatModelHandle,
|
|
6
|
+
)
|
|
7
|
+
from .restful.async_restful_client import ( # noqa: F401
|
|
8
|
+
AsyncRESTfulEmbeddingModelHandle as AsyncEmbeddingModelHandle,
|
|
9
|
+
)
|
|
10
|
+
from .restful.async_restful_client import ( # noqa: F401
|
|
11
|
+
AsyncRESTfulGenerateModelHandle as AsyncGenerateModelHandle,
|
|
12
|
+
)
|
|
13
|
+
from .restful.async_restful_client import ( # noqa: F401
|
|
14
|
+
AsyncRESTfulImageModelHandle as AsyncImageModelHandle,
|
|
15
|
+
)
|
|
16
|
+
from .restful.async_restful_client import ( # noqa: F401
|
|
17
|
+
AsyncRESTfulVideoModelHandle as AsyncVideoModelHandle,
|
|
18
|
+
)
|
|
1
19
|
from .restful.restful_client import ( # noqa: F401
|
|
2
20
|
RESTfulAudioModelHandle as AudioModelHandle,
|
|
3
21
|
)
|