xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +107 -11
  3. xinference/client/restful/restful_client.py +51 -11
  4. xinference/constants.py +5 -1
  5. xinference/core/media_interface.py +758 -0
  6. xinference/core/model.py +49 -9
  7. xinference/core/supervisor.py +1 -1
  8. xinference/core/utils.py +1 -1
  9. xinference/core/worker.py +33 -39
  10. xinference/deploy/cmdline.py +17 -0
  11. xinference/deploy/utils.py +0 -3
  12. xinference/model/audio/__init__.py +16 -27
  13. xinference/model/audio/core.py +2 -1
  14. xinference/model/audio/cosyvoice.py +4 -2
  15. xinference/model/audio/model_spec.json +63 -46
  16. xinference/model/audio/model_spec_modelscope.json +31 -14
  17. xinference/model/embedding/__init__.py +16 -24
  18. xinference/model/image/__init__.py +15 -25
  19. xinference/model/llm/__init__.py +40 -115
  20. xinference/model/llm/core.py +29 -6
  21. xinference/model/llm/llama_cpp/core.py +30 -347
  22. xinference/model/llm/llm_family.json +1674 -2203
  23. xinference/model/llm/llm_family.py +71 -7
  24. xinference/model/llm/llm_family_csghub.json +0 -32
  25. xinference/model/llm/llm_family_modelscope.json +1838 -2016
  26. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  27. xinference/model/llm/lmdeploy/core.py +7 -2
  28. xinference/model/llm/mlx/core.py +23 -7
  29. xinference/model/llm/reasoning_parser.py +281 -5
  30. xinference/model/llm/sglang/core.py +39 -11
  31. xinference/model/llm/transformers/chatglm.py +9 -2
  32. xinference/model/llm/transformers/cogagent.py +10 -12
  33. xinference/model/llm/transformers/cogvlm2.py +6 -3
  34. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  35. xinference/model/llm/transformers/core.py +58 -60
  36. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  37. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  38. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  39. xinference/model/llm/transformers/gemma3.py +4 -5
  40. xinference/model/llm/transformers/glm4v.py +3 -21
  41. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  42. xinference/model/llm/transformers/intern_vl.py +3 -6
  43. xinference/model/llm/transformers/internlm2.py +1 -1
  44. xinference/model/llm/transformers/minicpmv25.py +4 -2
  45. xinference/model/llm/transformers/minicpmv26.py +5 -3
  46. xinference/model/llm/transformers/omnilmm.py +1 -1
  47. xinference/model/llm/transformers/opt.py +1 -1
  48. xinference/model/llm/transformers/ovis2.py +302 -0
  49. xinference/model/llm/transformers/qwen-omni.py +8 -1
  50. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  51. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  52. xinference/model/llm/transformers/qwen_vl.py +5 -2
  53. xinference/model/llm/utils.py +96 -45
  54. xinference/model/llm/vllm/core.py +108 -24
  55. xinference/model/llm/vllm/distributed_executor.py +8 -7
  56. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  57. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  58. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  59. xinference/model/llm/vllm/xavier/executor.py +1 -1
  60. xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
  61. xinference/model/rerank/__init__.py +13 -24
  62. xinference/model/video/__init__.py +15 -25
  63. xinference/model/video/core.py +3 -3
  64. xinference/model/video/diffusers.py +157 -13
  65. xinference/model/video/model_spec.json +100 -0
  66. xinference/model/video/model_spec_modelscope.json +104 -0
  67. xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
  68. xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
  69. xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
  70. xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
  71. xinference/thirdparty/cosyvoice/bin/train.py +7 -2
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
  74. xinference/thirdparty/cosyvoice/cli/model.py +140 -155
  75. xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
  76. xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
  77. xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
  78. xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
  79. xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
  80. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
  81. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
  84. xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
  85. xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
  86. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
  87. xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
  88. xinference/thirdparty/cosyvoice/utils/common.py +1 -1
  89. xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
  90. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
  91. xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
  92. xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
  93. xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
  94. xinference/types.py +2 -71
  95. xinference/web/ui/build/asset-manifest.json +6 -6
  96. xinference/web/ui/build/index.html +1 -1
  97. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  98. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  99. xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
  100. xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
  102. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  112. xinference/web/ui/src/locales/en.json +7 -4
  113. xinference/web/ui/src/locales/zh.json +7 -4
  114. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
  115. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
  116. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
  117. xinference/core/image_interface.py +0 -377
  118. xinference/model/llm/transformers/compression.py +0 -258
  119. xinference/model/llm/transformers/yi_vl.py +0 -239
  120. xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
  121. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  122. xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
  123. xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
  124. xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
  133. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  134. /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
  135. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
  136. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
  137. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-04-21T17:53:44+0800",
11
+ "date": "2025-05-16T20:05:54+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a5d4be9f970137bde1d402420f71961826392224",
15
- "version": "1.5.0.post2"
14
+ "full-revisionid": "81a24f4646ace8f41c85a810237491d9c0ad5282",
15
+ "version": "1.6.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -56,6 +56,7 @@ from ..constants import (
56
56
  XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
57
57
  XINFERENCE_DEFAULT_ENDPOINT_PORT,
58
58
  XINFERENCE_DISABLE_METRICS,
59
+ XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
59
60
  )
60
61
  from ..core.event import Event, EventCollectorActor, EventType
61
62
  from ..core.supervisor import SupervisorActor
@@ -201,13 +202,13 @@ class BuildGradioInterfaceRequest(BaseModel):
201
202
  model_lang: List[str]
202
203
 
203
204
 
204
- class BuildGradioImageInterfaceRequest(BaseModel):
205
+ class BuildGradioMediaInterfaceRequest(BaseModel):
205
206
  model_type: str
206
207
  model_name: str
207
208
  model_family: str
208
209
  model_id: str
209
210
  controlnet: Union[None, List[Dict[str, Union[str, dict, None]]]]
210
- model_revision: str
211
+ model_revision: Optional[str]
211
212
  model_ability: List[str]
212
213
 
213
214
 
@@ -352,7 +353,27 @@ class RESTfulAPI(CancelMixin):
352
353
  )
353
354
  self._router.add_api_route(
354
355
  "/v1/ui/images/{model_uid}",
355
- self.build_gradio_images_interface,
356
+ self.build_gradio_media_interface,
357
+ methods=["POST"],
358
+ dependencies=(
359
+ [Security(self._auth_service, scopes=["models:read"])]
360
+ if self.is_authenticated()
361
+ else None
362
+ ),
363
+ )
364
+ self._router.add_api_route(
365
+ "/v1/ui/audios/{model_uid}",
366
+ self.build_gradio_media_interface,
367
+ methods=["POST"],
368
+ dependencies=(
369
+ [Security(self._auth_service, scopes=["models:read"])]
370
+ if self.is_authenticated()
371
+ else None
372
+ ),
373
+ )
374
+ self._router.add_api_route(
375
+ "/v1/ui/videos/{model_uid}",
376
+ self.build_gradio_media_interface,
356
377
  methods=["POST"],
357
378
  dependencies=(
358
379
  [Security(self._auth_service, scopes=["models:read"])]
@@ -676,6 +697,17 @@ class RESTfulAPI(CancelMixin):
676
697
  else None
677
698
  ),
678
699
  )
700
+ self._router.add_api_route(
701
+ "/v1/video/generations/image",
702
+ self.create_videos_from_images,
703
+ methods=["POST"],
704
+ response_model=VideoList,
705
+ dependencies=(
706
+ [Security(self._auth_service, scopes=["models:read"])]
707
+ if self.is_authenticated()
708
+ else None
709
+ ),
710
+ )
679
711
  self._router.add_api_route(
680
712
  "/v1/chat/completions",
681
713
  self.create_chat_completion,
@@ -1183,16 +1215,16 @@ class RESTfulAPI(CancelMixin):
1183
1215
 
1184
1216
  return JSONResponse(content={"model_uid": model_uid})
1185
1217
 
1186
- async def build_gradio_images_interface(
1218
+ async def build_gradio_media_interface(
1187
1219
  self, model_uid: str, request: Request
1188
1220
  ) -> JSONResponse:
1189
1221
  """
1190
1222
  Build a Gradio interface for image processing models.
1191
1223
  """
1192
1224
  payload = await request.json()
1193
- body = BuildGradioImageInterfaceRequest.parse_obj(payload)
1225
+ body = BuildGradioMediaInterfaceRequest.parse_obj(payload)
1194
1226
  assert self._app is not None
1195
- assert body.model_type == "image"
1227
+ assert body.model_type in ("image", "video", "audio")
1196
1228
 
1197
1229
  # asyncio.Lock() behaves differently in 3.9 than 3.10+
1198
1230
  # A event loop is required in 3.9 but not 3.10+
@@ -1206,12 +1238,12 @@ class RESTfulAPI(CancelMixin):
1206
1238
  )
1207
1239
  asyncio.set_event_loop(asyncio.new_event_loop())
1208
1240
 
1209
- from ..core.image_interface import ImageInterface
1241
+ from ..core.media_interface import MediaInterface
1210
1242
 
1211
1243
  try:
1212
1244
  access_token = request.headers.get("Authorization")
1213
1245
  internal_host = "localhost" if self._host == "0.0.0.0" else self._host
1214
- interface = ImageInterface(
1246
+ interface = MediaInterface(
1215
1247
  endpoint=f"http://{internal_host}:{self._port}",
1216
1248
  model_uid=model_uid,
1217
1249
  model_family=body.model_family,
@@ -1221,6 +1253,7 @@ class RESTfulAPI(CancelMixin):
1221
1253
  controlnet=body.controlnet,
1222
1254
  access_token=access_token,
1223
1255
  model_ability=body.model_ability,
1256
+ model_type=body.model_type,
1224
1257
  ).build()
1225
1258
 
1226
1259
  gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
@@ -1338,7 +1371,9 @@ class RESTfulAPI(CancelMixin):
1338
1371
  finally:
1339
1372
  await model.decrease_serve_count()
1340
1373
 
1341
- return EventSourceResponse(stream_results())
1374
+ return EventSourceResponse(
1375
+ stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
1376
+ )
1342
1377
  else:
1343
1378
  try:
1344
1379
  data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
@@ -1606,7 +1641,9 @@ class RESTfulAPI(CancelMixin):
1606
1641
  await model.decrease_serve_count()
1607
1642
 
1608
1643
  return EventSourceResponse(
1609
- media_type="application/octet-stream", content=stream_results()
1644
+ media_type="application/octet-stream",
1645
+ content=stream_results(),
1646
+ ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
1610
1647
  )
1611
1648
  else:
1612
1649
  return Response(media_type="application/octet-stream", content=out)
@@ -1975,14 +2012,22 @@ class RESTfulAPI(CancelMixin):
1975
2012
  await self._report_error_event(model_uid, str(e))
1976
2013
  raise HTTPException(status_code=500, detail=str(e))
1977
2014
 
2015
+ request_id = None
1978
2016
  try:
1979
2017
  kwargs = json.loads(body.kwargs) if body.kwargs else {}
2018
+ request_id = kwargs.get("request_id")
2019
+ self._add_running_task(request_id)
1980
2020
  video_list = await model.text_to_video(
1981
2021
  prompt=body.prompt,
1982
2022
  n=body.n,
1983
2023
  **kwargs,
1984
2024
  )
1985
2025
  return Response(content=video_list, media_type="application/json")
2026
+ except asyncio.CancelledError:
2027
+ err_str = f"The request has been cancelled: {request_id}"
2028
+ logger.error(err_str)
2029
+ await self._report_error_event(model_uid, err_str)
2030
+ raise HTTPException(status_code=409, detail=err_str)
1986
2031
  except Exception as e:
1987
2032
  e = await self._get_model_last_error(model.uid, e)
1988
2033
  logger.error(e, exc_info=True)
@@ -1990,6 +2035,55 @@ class RESTfulAPI(CancelMixin):
1990
2035
  self.handle_request_limit_error(e)
1991
2036
  raise HTTPException(status_code=500, detail=str(e))
1992
2037
 
2038
+ async def create_videos_from_images(
2039
+ self,
2040
+ model: str = Form(...),
2041
+ image: UploadFile = File(media_type="application/octet-stream"),
2042
+ prompt: Optional[Union[str, List[str]]] = Form(None),
2043
+ negative_prompt: Optional[Union[str, List[str]]] = Form(None),
2044
+ n: Optional[int] = Form(1),
2045
+ kwargs: Optional[str] = Form(None),
2046
+ ) -> Response:
2047
+ model_uid = model
2048
+ try:
2049
+ model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
2050
+ except ValueError as ve:
2051
+ logger.error(str(ve), exc_info=True)
2052
+ await self._report_error_event(model_uid, str(ve))
2053
+ raise HTTPException(status_code=400, detail=str(ve))
2054
+ except Exception as e:
2055
+ logger.error(e, exc_info=True)
2056
+ await self._report_error_event(model_uid, str(e))
2057
+ raise HTTPException(status_code=500, detail=str(e))
2058
+
2059
+ request_id = None
2060
+ try:
2061
+ if kwargs is not None:
2062
+ parsed_kwargs = json.loads(kwargs)
2063
+ else:
2064
+ parsed_kwargs = {}
2065
+ request_id = parsed_kwargs.get("request_id")
2066
+ self._add_running_task(request_id)
2067
+ video_list = await model_ref.image_to_video(
2068
+ image=Image.open(image.file),
2069
+ prompt=prompt,
2070
+ negative_prompt=negative_prompt,
2071
+ n=n,
2072
+ **parsed_kwargs,
2073
+ )
2074
+ return Response(content=video_list, media_type="application/json")
2075
+ except asyncio.CancelledError:
2076
+ err_str = f"The request has been cancelled: {request_id}"
2077
+ logger.error(err_str)
2078
+ await self._report_error_event(model_uid, err_str)
2079
+ raise HTTPException(status_code=409, detail=err_str)
2080
+ except Exception as e:
2081
+ e = await self._get_model_last_error(model_ref.uid, e)
2082
+ logger.error(e, exc_info=True)
2083
+ await self._report_error_event(model_uid, str(e))
2084
+ self.handle_request_limit_error(e)
2085
+ raise HTTPException(status_code=500, detail=str(e))
2086
+
1993
2087
  async def create_chat_completion(self, request: Request) -> Response:
1994
2088
  raw_body = await request.json()
1995
2089
  body = CreateChatCompletion.parse_obj(raw_body)
@@ -2122,7 +2216,9 @@ class RESTfulAPI(CancelMixin):
2122
2216
  finally:
2123
2217
  await model.decrease_serve_count()
2124
2218
 
2125
- return EventSourceResponse(stream_results())
2219
+ return EventSourceResponse(
2220
+ stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
2221
+ )
2126
2222
  else:
2127
2223
  try:
2128
2224
  data = await model.chat(
@@ -28,7 +28,6 @@ if TYPE_CHECKING:
28
28
  CompletionChunk,
29
29
  Embedding,
30
30
  ImageList,
31
- LlamaCppGenerateConfig,
32
31
  PytorchGenerateConfig,
33
32
  VideoList,
34
33
  )
@@ -464,14 +463,59 @@ class RESTfulVideoModelHandle(RESTfulModelHandle):
464
463
  response_data = response.json()
465
464
  return response_data
466
465
 
466
+ def image_to_video(
467
+ self,
468
+ image: Union[str, bytes],
469
+ prompt: str,
470
+ negative_prompt: Optional[str] = None,
471
+ n: int = 1,
472
+ **kwargs,
473
+ ) -> "VideoList":
474
+ """
475
+ Creates a video by the input image and text.
476
+
477
+ Parameters
478
+ ----------
479
+ image: `Union[str, bytes]`
480
+ The input image to condition the generation on.
481
+ prompt: `str` or `List[str]`
482
+ The prompt or prompts to guide video generation. If not defined, you need to pass `prompt_embeds`.
483
+ negative_prompt (`str` or `List[str]`, *optional*):
484
+ The prompt or prompts not to guide the image generation.
485
+ n: `int`, defaults to 1
486
+ The number of videos to generate per prompt. Must be between 1 and 10.
487
+ Returns
488
+ -------
489
+ VideoList
490
+ A list of video objects.
491
+ """
492
+ url = f"{self._base_url}/v1/video/generations/image"
493
+ params = {
494
+ "model": self._model_uid,
495
+ "prompt": prompt,
496
+ "negative_prompt": negative_prompt,
497
+ "n": n,
498
+ "kwargs": json.dumps(kwargs),
499
+ }
500
+ files: List[Any] = []
501
+ for key, value in params.items():
502
+ files.append((key, (None, value)))
503
+ files.append(("image", ("image", image, "application/octet-stream")))
504
+ response = requests.post(url, files=files, headers=self.auth_headers)
505
+ if response.status_code != 200:
506
+ raise RuntimeError(
507
+ f"Failed to create the video from image, detail: {_get_error_string(response)}"
508
+ )
509
+
510
+ response_data = response.json()
511
+ return response_data
512
+
467
513
 
468
514
  class RESTfulGenerateModelHandle(RESTfulModelHandle):
469
515
  def generate(
470
516
  self,
471
517
  prompt: str,
472
- generate_config: Optional[
473
- Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
474
- ] = None,
518
+ generate_config: Optional["PytorchGenerateConfig"] = None,
475
519
  ) -> Union["Completion", Iterator["CompletionChunk"]]:
476
520
  """
477
521
  Creates a completion for the provided prompt and parameters via RESTful APIs.
@@ -480,9 +524,8 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
480
524
  ----------
481
525
  prompt: str
482
526
  The user's message or user's input.
483
- generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
527
+ generate_config: Optional["PytorchGenerateConfig"]
484
528
  Additional configuration for the chat generation.
485
- "LlamaCppGenerateConfig" -> Configuration for llama-cpp-python model
486
529
  "PytorchGenerateConfig" -> Configuration for pytorch model
487
530
 
488
531
  Returns
@@ -528,9 +571,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
528
571
  self,
529
572
  messages: List[Dict],
530
573
  tools: Optional[List[Dict]] = None,
531
- generate_config: Optional[
532
- Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
533
- ] = None,
574
+ generate_config: Optional["PytorchGenerateConfig"] = None,
534
575
  ) -> Union["ChatCompletion", Iterator["ChatCompletionChunk"]]:
535
576
  """
536
577
  Given a list of messages comprising a conversation, the model will return a response via RESTful APIs.
@@ -541,9 +582,8 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
541
582
  A list of messages comprising the conversation so far.
542
583
  tools: Optional[List[Dict]]
543
584
  A tool list.
544
- generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
585
+ generate_config: Optional["PytorchGenerateConfig"]
545
586
  Additional configuration for the chat generation.
546
- "LlamaCppGenerateConfig" -> configuration for llama-cpp-python model
547
587
  "PytorchGenerateConfig" -> configuration for pytorch model
548
588
 
549
589
  Returns
xinference/constants.py CHANGED
@@ -29,7 +29,8 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
29
29
  XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
30
30
  XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
31
31
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
32
- XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_EANBLE_VIRTUAL_ENV"
32
+ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
33
+ XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
33
34
 
34
35
 
35
36
  def get_xinference_home() -> str:
@@ -89,6 +90,9 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
89
90
  XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
90
91
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
91
92
  )
93
+ XINFERENCE_SSE_PING_ATTEMPTS_SECONDS = int(
94
+ os.environ.get(XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS, 600)
95
+ )
92
96
  XINFERENCE_LAUNCH_MODEL_RETRY = 3
93
97
  XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
94
98
  XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))