xinference 1.7.0__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (83) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +3 -4
  3. xinference/client/__init__.py +2 -0
  4. xinference/client/common.py +49 -2
  5. xinference/client/handlers.py +18 -0
  6. xinference/client/restful/async_restful_client.py +1760 -0
  7. xinference/client/restful/restful_client.py +74 -78
  8. xinference/core/media_interface.py +3 -1
  9. xinference/core/model.py +5 -4
  10. xinference/core/supervisor.py +10 -5
  11. xinference/core/worker.py +15 -14
  12. xinference/deploy/local.py +51 -9
  13. xinference/deploy/worker.py +5 -3
  14. xinference/device_utils.py +22 -3
  15. xinference/model/audio/fish_speech.py +23 -34
  16. xinference/model/audio/model_spec.json +4 -2
  17. xinference/model/audio/model_spec_modelscope.json +4 -2
  18. xinference/model/audio/utils.py +2 -2
  19. xinference/model/core.py +1 -0
  20. xinference/model/embedding/__init__.py +8 -8
  21. xinference/model/embedding/custom.py +6 -1
  22. xinference/model/embedding/embed_family.py +0 -41
  23. xinference/model/embedding/model_spec.json +10 -1
  24. xinference/model/embedding/model_spec_modelscope.json +10 -1
  25. xinference/model/embedding/sentence_transformers/core.py +30 -15
  26. xinference/model/flexible/core.py +1 -1
  27. xinference/model/flexible/launchers/__init__.py +2 -0
  28. xinference/model/flexible/launchers/image_process_launcher.py +1 -1
  29. xinference/model/flexible/launchers/modelscope_launcher.py +47 -0
  30. xinference/model/flexible/launchers/transformers_launcher.py +5 -5
  31. xinference/model/flexible/launchers/yolo_launcher.py +62 -0
  32. xinference/model/llm/__init__.py +7 -0
  33. xinference/model/llm/core.py +18 -1
  34. xinference/model/llm/llama_cpp/core.py +1 -1
  35. xinference/model/llm/llm_family.json +43 -3
  36. xinference/model/llm/llm_family.py +6 -0
  37. xinference/model/llm/llm_family_modelscope.json +45 -3
  38. xinference/model/llm/mlx/core.py +271 -18
  39. xinference/model/llm/mlx/distributed_models/__init__.py +13 -0
  40. xinference/model/llm/mlx/distributed_models/core.py +164 -0
  41. xinference/model/llm/mlx/distributed_models/deepseek_v3.py +75 -0
  42. xinference/model/llm/mlx/distributed_models/qwen2.py +82 -0
  43. xinference/model/llm/mlx/distributed_models/qwen3.py +82 -0
  44. xinference/model/llm/mlx/distributed_models/qwen3_moe.py +76 -0
  45. xinference/model/llm/reasoning_parser.py +12 -6
  46. xinference/model/llm/sglang/core.py +8 -4
  47. xinference/model/llm/transformers/chatglm.py +4 -1
  48. xinference/model/llm/transformers/core.py +4 -2
  49. xinference/model/llm/transformers/multimodal/cogagent.py +10 -4
  50. xinference/model/llm/transformers/multimodal/intern_vl.py +1 -1
  51. xinference/model/llm/utils.py +36 -17
  52. xinference/model/llm/vllm/core.py +142 -34
  53. xinference/model/llm/vllm/distributed_executor.py +96 -21
  54. xinference/model/llm/vllm/xavier/transfer.py +2 -2
  55. xinference/model/rerank/core.py +26 -9
  56. xinference/model/rerank/model_spec.json +3 -3
  57. xinference/model/rerank/model_spec_modelscope.json +3 -3
  58. xinference/web/ui/build/asset-manifest.json +3 -3
  59. xinference/web/ui/build/index.html +1 -1
  60. xinference/web/ui/build/static/js/main.9b12b7f9.js +3 -0
  61. xinference/web/ui/build/static/js/main.9b12b7f9.js.map +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/0fd4820d93f99509e80d8702dc3f6f8272424acab5608fa7c0e82cb1d3250a87.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/f75545479c17fdfe2a00235fa4a0e9da1ae95e6b3caafba87ded92de6b0240e4.json +1 -0
  67. xinference/web/ui/src/locales/en.json +3 -0
  68. xinference/web/ui/src/locales/ja.json +3 -0
  69. xinference/web/ui/src/locales/ko.json +3 -0
  70. xinference/web/ui/src/locales/zh.json +3 -0
  71. {xinference-1.7.0.dist-info → xinference-1.7.1.dist-info}/METADATA +4 -3
  72. {xinference-1.7.0.dist-info → xinference-1.7.1.dist-info}/RECORD +77 -67
  73. xinference/web/ui/build/static/js/main.8a9e3ba0.js +0 -3
  74. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +0 -1
  75. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +0 -1
  76. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +0 -1
  77. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +0 -1
  78. xinference/web/ui/node_modules/.cache/babel-loader/cc97b49285d7717c63374766c789141a4329a04582ab32756d7e0e614d4c5c7f.json +0 -1
  79. /xinference/web/ui/build/static/js/{main.8a9e3ba0.js.LICENSE.txt → main.9b12b7f9.js.LICENSE.txt} +0 -0
  80. {xinference-1.7.0.dist-info → xinference-1.7.1.dist-info}/WHEEL +0 -0
  81. {xinference-1.7.0.dist-info → xinference-1.7.1.dist-info}/entry_points.txt +0 -0
  82. {xinference-1.7.0.dist-info → xinference-1.7.1.dist-info}/licenses/LICENSE +0 -0
  83. {xinference-1.7.0.dist-info → xinference-1.7.1.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-06-13T18:51:07+0800",
11
+ "date": "2025-06-27T20:03:38+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a362dba7334ef08c758bbc4a3d4904fe53cefe78",
15
- "version": "1.7.0"
14
+ "full-revisionid": "cf64a869f69ace1f575eca74e1ae86bc53356acf",
15
+ "version": "1.7.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -1994,10 +1994,9 @@ class RESTfulAPI(CancelMixin):
1994
1994
  payload = await request.json()
1995
1995
 
1996
1996
  model_uid = payload.get("model")
1997
+ args = payload.get("args")
1997
1998
 
1998
- exclude = {
1999
- "model",
2000
- }
1999
+ exclude = {"model", "args"}
2001
2000
  kwargs = {key: value for key, value in payload.items() if key not in exclude}
2002
2001
 
2003
2002
  try:
@@ -2012,7 +2011,7 @@ class RESTfulAPI(CancelMixin):
2012
2011
  raise HTTPException(status_code=500, detail=str(e))
2013
2012
 
2014
2013
  try:
2015
- result = await model.infer(**kwargs)
2014
+ result = await model.infer(*args, **kwargs)
2016
2015
  return Response(result, media_type="application/json")
2017
2016
  except Exception as e:
2018
2017
  e = await self._get_model_last_error(model.uid, e)
@@ -12,7 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from .restful.async_restful_client import AsyncClient
15
16
  from .restful.restful_client import Client
16
17
 
17
18
  # For compatibility
18
19
  RESTfulClient = Client
20
+ AsyncRESTfulClient = AsyncClient
@@ -13,7 +13,18 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import json
16
- from typing import Any, Iterator
16
+ from typing import Any, AsyncIterator, Iterator, Union
17
+
18
+
19
+ def convert_float_to_int_or_str(model_size: float) -> Union[int, str]:
20
+ """convert float to int or string
21
+
22
+ if float can be presented as int, convert it to int, otherwise convert it to string
23
+ """
24
+ if int(model_size) == model_size:
25
+ return int(model_size)
26
+ else:
27
+ return str(model_size)
17
28
 
18
29
 
19
30
  def streaming_response_iterator(
@@ -46,7 +57,43 @@ def streaming_response_iterator(
46
57
  if json_str == b"[DONE]":
47
58
  continue
48
59
  data = json.loads(json_str.decode("utf-8"))
49
- error = data.get("error")
60
+ error = data.get("error", None)
61
+ if error is not None:
62
+ raise Exception(str(error))
63
+ yield data
64
+
65
+
66
+ async def async_streaming_response_iterator(
67
+ response_lines: AsyncIterator[bytes],
68
+ ) -> AsyncIterator[Any]:
69
+ """
70
+ Create an AsyncIterator to handle the streaming type of generation.
71
+
72
+ Note
73
+ ----------
74
+ This method is for compatible with openai. Please refer to:
75
+ https://github.com/openai/openai-python/blob/v0.28.1/openai/api_requestor.py#L99
76
+
77
+ Parameters
78
+ ----------
79
+ response_lines: AsyncIterator[bytes]
80
+ Generated lines by the Model Generator.
81
+
82
+ Returns
83
+ -------
84
+ AsyncIterator["CompletionChunk"]
85
+ AsyncIterator of CompletionChunks generated by models.
86
+
87
+ """
88
+
89
+ async for line in response_lines:
90
+ line = line.strip()
91
+ if line.startswith(b"data:"):
92
+ json_str = line[len(b"data:") :].strip()
93
+ if json_str == b"[DONE]":
94
+ continue
95
+ data = json.loads(json_str.decode("utf-8"))
96
+ error = data.get("error", None)
50
97
  if error is not None:
51
98
  raise Exception(str(error))
52
99
  yield data
@@ -1,3 +1,21 @@
1
+ from .restful.async_restful_client import ( # noqa: F401
2
+ AsyncRESTfulAudioModelHandle as AsyncAudioModelHandle,
3
+ )
4
+ from .restful.async_restful_client import ( # noqa: F401
5
+ AsyncRESTfulChatModelHandle as AsyncChatModelHandle,
6
+ )
7
+ from .restful.async_restful_client import ( # noqa: F401
8
+ AsyncRESTfulEmbeddingModelHandle as AsyncEmbeddingModelHandle,
9
+ )
10
+ from .restful.async_restful_client import ( # noqa: F401
11
+ AsyncRESTfulGenerateModelHandle as AsyncGenerateModelHandle,
12
+ )
13
+ from .restful.async_restful_client import ( # noqa: F401
14
+ AsyncRESTfulImageModelHandle as AsyncImageModelHandle,
15
+ )
16
+ from .restful.async_restful_client import ( # noqa: F401
17
+ AsyncRESTfulVideoModelHandle as AsyncVideoModelHandle,
18
+ )
1
19
  from .restful.restful_client import ( # noqa: F401
2
20
  RESTfulAudioModelHandle as AudioModelHandle,
3
21
  )