xinference 1.5.1__py3-none-any.whl → 1.6.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (96) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +97 -8
  3. xinference/client/restful/restful_client.py +51 -11
  4. xinference/core/media_interface.py +758 -0
  5. xinference/core/model.py +49 -9
  6. xinference/core/worker.py +31 -37
  7. xinference/deploy/utils.py +0 -3
  8. xinference/model/audio/__init__.py +16 -27
  9. xinference/model/audio/core.py +1 -0
  10. xinference/model/audio/cosyvoice.py +4 -2
  11. xinference/model/audio/model_spec.json +20 -3
  12. xinference/model/audio/model_spec_modelscope.json +18 -1
  13. xinference/model/embedding/__init__.py +16 -24
  14. xinference/model/image/__init__.py +15 -25
  15. xinference/model/llm/__init__.py +37 -110
  16. xinference/model/llm/core.py +15 -6
  17. xinference/model/llm/llama_cpp/core.py +25 -353
  18. xinference/model/llm/llm_family.json +613 -89
  19. xinference/model/llm/llm_family.py +9 -1
  20. xinference/model/llm/llm_family_modelscope.json +540 -90
  21. xinference/model/llm/mlx/core.py +6 -3
  22. xinference/model/llm/reasoning_parser.py +281 -5
  23. xinference/model/llm/sglang/core.py +16 -3
  24. xinference/model/llm/transformers/chatglm.py +2 -2
  25. xinference/model/llm/transformers/cogagent.py +1 -1
  26. xinference/model/llm/transformers/cogvlm2.py +1 -1
  27. xinference/model/llm/transformers/core.py +9 -3
  28. xinference/model/llm/transformers/glm4v.py +1 -1
  29. xinference/model/llm/transformers/minicpmv26.py +1 -1
  30. xinference/model/llm/transformers/qwen-omni.py +6 -0
  31. xinference/model/llm/transformers/qwen_vl.py +1 -1
  32. xinference/model/llm/utils.py +68 -45
  33. xinference/model/llm/vllm/core.py +38 -18
  34. xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -10
  35. xinference/model/rerank/__init__.py +13 -24
  36. xinference/model/video/__init__.py +15 -25
  37. xinference/model/video/core.py +3 -3
  38. xinference/model/video/diffusers.py +133 -16
  39. xinference/model/video/model_spec.json +54 -0
  40. xinference/model/video/model_spec_modelscope.json +56 -0
  41. xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
  42. xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
  43. xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
  44. xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
  45. xinference/thirdparty/cosyvoice/bin/train.py +7 -2
  46. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
  47. xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
  48. xinference/thirdparty/cosyvoice/cli/model.py +140 -155
  49. xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
  50. xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
  51. xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
  52. xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
  53. xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
  54. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
  55. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
  56. xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
  57. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
  58. xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
  59. xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
  60. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
  61. xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
  62. xinference/thirdparty/cosyvoice/utils/common.py +1 -1
  63. xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
  64. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
  65. xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
  66. xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
  67. xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
  68. xinference/types.py +0 -71
  69. xinference/web/ui/build/asset-manifest.json +3 -3
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
  72. xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
  79. xinference/web/ui/src/locales/en.json +6 -4
  80. xinference/web/ui/src/locales/zh.json +6 -4
  81. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/METADATA +59 -39
  82. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/RECORD +87 -87
  83. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/WHEEL +1 -1
  84. xinference/core/image_interface.py +0 -377
  85. xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
  86. xinference/web/ui/build/static/js/main.91e77b5c.js +0 -3
  87. xinference/web/ui/build/static/js/main.91e77b5c.js.map +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
  93. /xinference/web/ui/build/static/js/{main.91e77b5c.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
  94. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/entry_points.txt +0 -0
  95. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/licenses/LICENSE +0 -0
  96. {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-04-30T21:28:49+0800",
11
+ "date": "2025-05-17T15:09:06+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "1c11c609971e5a5095ce8be73f0e1bba04a3132f",
15
- "version": "1.5.1"
14
+ "full-revisionid": "1adc5d3e5cffb2752cd3e05ca782c4cfe3c0ce57",
15
+ "version": "1.6.0.post1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -202,13 +202,13 @@ class BuildGradioInterfaceRequest(BaseModel):
202
202
  model_lang: List[str]
203
203
 
204
204
 
205
- class BuildGradioImageInterfaceRequest(BaseModel):
205
+ class BuildGradioMediaInterfaceRequest(BaseModel):
206
206
  model_type: str
207
207
  model_name: str
208
208
  model_family: str
209
209
  model_id: str
210
210
  controlnet: Union[None, List[Dict[str, Union[str, dict, None]]]]
211
- model_revision: str
211
+ model_revision: Optional[str]
212
212
  model_ability: List[str]
213
213
 
214
214
 
@@ -353,7 +353,27 @@ class RESTfulAPI(CancelMixin):
353
353
  )
354
354
  self._router.add_api_route(
355
355
  "/v1/ui/images/{model_uid}",
356
- self.build_gradio_images_interface,
356
+ self.build_gradio_media_interface,
357
+ methods=["POST"],
358
+ dependencies=(
359
+ [Security(self._auth_service, scopes=["models:read"])]
360
+ if self.is_authenticated()
361
+ else None
362
+ ),
363
+ )
364
+ self._router.add_api_route(
365
+ "/v1/ui/audios/{model_uid}",
366
+ self.build_gradio_media_interface,
367
+ methods=["POST"],
368
+ dependencies=(
369
+ [Security(self._auth_service, scopes=["models:read"])]
370
+ if self.is_authenticated()
371
+ else None
372
+ ),
373
+ )
374
+ self._router.add_api_route(
375
+ "/v1/ui/videos/{model_uid}",
376
+ self.build_gradio_media_interface,
357
377
  methods=["POST"],
358
378
  dependencies=(
359
379
  [Security(self._auth_service, scopes=["models:read"])]
@@ -677,6 +697,17 @@ class RESTfulAPI(CancelMixin):
677
697
  else None
678
698
  ),
679
699
  )
700
+ self._router.add_api_route(
701
+ "/v1/video/generations/image",
702
+ self.create_videos_from_images,
703
+ methods=["POST"],
704
+ response_model=VideoList,
705
+ dependencies=(
706
+ [Security(self._auth_service, scopes=["models:read"])]
707
+ if self.is_authenticated()
708
+ else None
709
+ ),
710
+ )
680
711
  self._router.add_api_route(
681
712
  "/v1/chat/completions",
682
713
  self.create_chat_completion,
@@ -1184,16 +1215,16 @@ class RESTfulAPI(CancelMixin):
1184
1215
 
1185
1216
  return JSONResponse(content={"model_uid": model_uid})
1186
1217
 
1187
- async def build_gradio_images_interface(
1218
+ async def build_gradio_media_interface(
1188
1219
  self, model_uid: str, request: Request
1189
1220
  ) -> JSONResponse:
1190
1221
  """
1191
1222
  Build a Gradio interface for image processing models.
1192
1223
  """
1193
1224
  payload = await request.json()
1194
- body = BuildGradioImageInterfaceRequest.parse_obj(payload)
1225
+ body = BuildGradioMediaInterfaceRequest.parse_obj(payload)
1195
1226
  assert self._app is not None
1196
- assert body.model_type == "image"
1227
+ assert body.model_type in ("image", "video", "audio")
1197
1228
 
1198
1229
  # asyncio.Lock() behaves differently in 3.9 than 3.10+
1199
1230
  # A event loop is required in 3.9 but not 3.10+
@@ -1207,12 +1238,12 @@ class RESTfulAPI(CancelMixin):
1207
1238
  )
1208
1239
  asyncio.set_event_loop(asyncio.new_event_loop())
1209
1240
 
1210
- from ..core.image_interface import ImageInterface
1241
+ from ..core.media_interface import MediaInterface
1211
1242
 
1212
1243
  try:
1213
1244
  access_token = request.headers.get("Authorization")
1214
1245
  internal_host = "localhost" if self._host == "0.0.0.0" else self._host
1215
- interface = ImageInterface(
1246
+ interface = MediaInterface(
1216
1247
  endpoint=f"http://{internal_host}:{self._port}",
1217
1248
  model_uid=model_uid,
1218
1249
  model_family=body.model_family,
@@ -1222,6 +1253,7 @@ class RESTfulAPI(CancelMixin):
1222
1253
  controlnet=body.controlnet,
1223
1254
  access_token=access_token,
1224
1255
  model_ability=body.model_ability,
1256
+ model_type=body.model_type,
1225
1257
  ).build()
1226
1258
 
1227
1259
  gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
@@ -1980,14 +2012,22 @@ class RESTfulAPI(CancelMixin):
1980
2012
  await self._report_error_event(model_uid, str(e))
1981
2013
  raise HTTPException(status_code=500, detail=str(e))
1982
2014
 
2015
+ request_id = None
1983
2016
  try:
1984
2017
  kwargs = json.loads(body.kwargs) if body.kwargs else {}
2018
+ request_id = kwargs.get("request_id")
2019
+ self._add_running_task(request_id)
1985
2020
  video_list = await model.text_to_video(
1986
2021
  prompt=body.prompt,
1987
2022
  n=body.n,
1988
2023
  **kwargs,
1989
2024
  )
1990
2025
  return Response(content=video_list, media_type="application/json")
2026
+ except asyncio.CancelledError:
2027
+ err_str = f"The request has been cancelled: {request_id}"
2028
+ logger.error(err_str)
2029
+ await self._report_error_event(model_uid, err_str)
2030
+ raise HTTPException(status_code=409, detail=err_str)
1991
2031
  except Exception as e:
1992
2032
  e = await self._get_model_last_error(model.uid, e)
1993
2033
  logger.error(e, exc_info=True)
@@ -1995,6 +2035,55 @@ class RESTfulAPI(CancelMixin):
1995
2035
  self.handle_request_limit_error(e)
1996
2036
  raise HTTPException(status_code=500, detail=str(e))
1997
2037
 
2038
+ async def create_videos_from_images(
2039
+ self,
2040
+ model: str = Form(...),
2041
+ image: UploadFile = File(media_type="application/octet-stream"),
2042
+ prompt: Optional[Union[str, List[str]]] = Form(None),
2043
+ negative_prompt: Optional[Union[str, List[str]]] = Form(None),
2044
+ n: Optional[int] = Form(1),
2045
+ kwargs: Optional[str] = Form(None),
2046
+ ) -> Response:
2047
+ model_uid = model
2048
+ try:
2049
+ model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
2050
+ except ValueError as ve:
2051
+ logger.error(str(ve), exc_info=True)
2052
+ await self._report_error_event(model_uid, str(ve))
2053
+ raise HTTPException(status_code=400, detail=str(ve))
2054
+ except Exception as e:
2055
+ logger.error(e, exc_info=True)
2056
+ await self._report_error_event(model_uid, str(e))
2057
+ raise HTTPException(status_code=500, detail=str(e))
2058
+
2059
+ request_id = None
2060
+ try:
2061
+ if kwargs is not None:
2062
+ parsed_kwargs = json.loads(kwargs)
2063
+ else:
2064
+ parsed_kwargs = {}
2065
+ request_id = parsed_kwargs.get("request_id")
2066
+ self._add_running_task(request_id)
2067
+ video_list = await model_ref.image_to_video(
2068
+ image=Image.open(image.file),
2069
+ prompt=prompt,
2070
+ negative_prompt=negative_prompt,
2071
+ n=n,
2072
+ **parsed_kwargs,
2073
+ )
2074
+ return Response(content=video_list, media_type="application/json")
2075
+ except asyncio.CancelledError:
2076
+ err_str = f"The request has been cancelled: {request_id}"
2077
+ logger.error(err_str)
2078
+ await self._report_error_event(model_uid, err_str)
2079
+ raise HTTPException(status_code=409, detail=err_str)
2080
+ except Exception as e:
2081
+ e = await self._get_model_last_error(model_ref.uid, e)
2082
+ logger.error(e, exc_info=True)
2083
+ await self._report_error_event(model_uid, str(e))
2084
+ self.handle_request_limit_error(e)
2085
+ raise HTTPException(status_code=500, detail=str(e))
2086
+
1998
2087
  async def create_chat_completion(self, request: Request) -> Response:
1999
2088
  raw_body = await request.json()
2000
2089
  body = CreateChatCompletion.parse_obj(raw_body)
@@ -28,7 +28,6 @@ if TYPE_CHECKING:
28
28
  CompletionChunk,
29
29
  Embedding,
30
30
  ImageList,
31
- LlamaCppGenerateConfig,
32
31
  PytorchGenerateConfig,
33
32
  VideoList,
34
33
  )
@@ -464,14 +463,59 @@ class RESTfulVideoModelHandle(RESTfulModelHandle):
464
463
  response_data = response.json()
465
464
  return response_data
466
465
 
466
+ def image_to_video(
467
+ self,
468
+ image: Union[str, bytes],
469
+ prompt: str,
470
+ negative_prompt: Optional[str] = None,
471
+ n: int = 1,
472
+ **kwargs,
473
+ ) -> "VideoList":
474
+ """
475
+ Creates a video by the input image and text.
476
+
477
+ Parameters
478
+ ----------
479
+ image: `Union[str, bytes]`
480
+ The input image to condition the generation on.
481
+ prompt: `str` or `List[str]`
482
+ The prompt or prompts to guide video generation. If not defined, you need to pass `prompt_embeds`.
483
+ negative_prompt (`str` or `List[str]`, *optional*):
484
+ The prompt or prompts not to guide the image generation.
485
+ n: `int`, defaults to 1
486
+ The number of videos to generate per prompt. Must be between 1 and 10.
487
+ Returns
488
+ -------
489
+ VideoList
490
+ A list of video objects.
491
+ """
492
+ url = f"{self._base_url}/v1/video/generations/image"
493
+ params = {
494
+ "model": self._model_uid,
495
+ "prompt": prompt,
496
+ "negative_prompt": negative_prompt,
497
+ "n": n,
498
+ "kwargs": json.dumps(kwargs),
499
+ }
500
+ files: List[Any] = []
501
+ for key, value in params.items():
502
+ files.append((key, (None, value)))
503
+ files.append(("image", ("image", image, "application/octet-stream")))
504
+ response = requests.post(url, files=files, headers=self.auth_headers)
505
+ if response.status_code != 200:
506
+ raise RuntimeError(
507
+ f"Failed to create the video from image, detail: {_get_error_string(response)}"
508
+ )
509
+
510
+ response_data = response.json()
511
+ return response_data
512
+
467
513
 
468
514
  class RESTfulGenerateModelHandle(RESTfulModelHandle):
469
515
  def generate(
470
516
  self,
471
517
  prompt: str,
472
- generate_config: Optional[
473
- Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
474
- ] = None,
518
+ generate_config: Optional["PytorchGenerateConfig"] = None,
475
519
  ) -> Union["Completion", Iterator["CompletionChunk"]]:
476
520
  """
477
521
  Creates a completion for the provided prompt and parameters via RESTful APIs.
@@ -480,9 +524,8 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
480
524
  ----------
481
525
  prompt: str
482
526
  The user's message or user's input.
483
- generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
527
+ generate_config: Optional["PytorchGenerateConfig"]
484
528
  Additional configuration for the chat generation.
485
- "LlamaCppGenerateConfig" -> Configuration for llama-cpp-python model
486
529
  "PytorchGenerateConfig" -> Configuration for pytorch model
487
530
 
488
531
  Returns
@@ -528,9 +571,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
528
571
  self,
529
572
  messages: List[Dict],
530
573
  tools: Optional[List[Dict]] = None,
531
- generate_config: Optional[
532
- Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
533
- ] = None,
574
+ generate_config: Optional["PytorchGenerateConfig"] = None,
534
575
  ) -> Union["ChatCompletion", Iterator["ChatCompletionChunk"]]:
535
576
  """
536
577
  Given a list of messages comprising a conversation, the model will return a response via RESTful APIs.
@@ -541,9 +582,8 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
541
582
  A list of messages comprising the conversation so far.
542
583
  tools: Optional[List[Dict]]
543
584
  A tool list.
544
- generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
585
+ generate_config: Optional["PytorchGenerateConfig"]
545
586
  Additional configuration for the chat generation.
546
- "LlamaCppGenerateConfig" -> configuration for llama-cpp-python model
547
587
  "PytorchGenerateConfig" -> configuration for pytorch model
548
588
 
549
589
  Returns