xinference 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (132) hide show
  1. xinference/_compat.py +1 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +54 -1
  4. xinference/client/restful/restful_client.py +82 -2
  5. xinference/constants.py +3 -0
  6. xinference/core/chat_interface.py +297 -83
  7. xinference/core/model.py +24 -3
  8. xinference/core/progress_tracker.py +16 -8
  9. xinference/core/supervisor.py +51 -1
  10. xinference/core/worker.py +315 -47
  11. xinference/deploy/cmdline.py +33 -1
  12. xinference/model/audio/core.py +11 -1
  13. xinference/model/audio/megatts.py +105 -0
  14. xinference/model/audio/model_spec.json +24 -1
  15. xinference/model/audio/model_spec_modelscope.json +26 -1
  16. xinference/model/core.py +14 -0
  17. xinference/model/embedding/core.py +6 -1
  18. xinference/model/flexible/core.py +6 -1
  19. xinference/model/image/core.py +6 -1
  20. xinference/model/image/model_spec.json +17 -1
  21. xinference/model/image/model_spec_modelscope.json +17 -1
  22. xinference/model/llm/__init__.py +4 -6
  23. xinference/model/llm/core.py +5 -0
  24. xinference/model/llm/llama_cpp/core.py +46 -17
  25. xinference/model/llm/llm_family.json +530 -85
  26. xinference/model/llm/llm_family.py +24 -1
  27. xinference/model/llm/llm_family_modelscope.json +572 -1
  28. xinference/model/llm/mlx/core.py +16 -2
  29. xinference/model/llm/reasoning_parser.py +3 -3
  30. xinference/model/llm/sglang/core.py +111 -13
  31. xinference/model/llm/transformers/__init__.py +14 -0
  32. xinference/model/llm/transformers/core.py +31 -6
  33. xinference/model/llm/transformers/deepseek_vl.py +1 -1
  34. xinference/model/llm/transformers/deepseek_vl2.py +287 -0
  35. xinference/model/llm/transformers/gemma3.py +17 -2
  36. xinference/model/llm/transformers/intern_vl.py +28 -18
  37. xinference/model/llm/transformers/minicpmv26.py +21 -2
  38. xinference/model/llm/transformers/qwen-omni.py +308 -0
  39. xinference/model/llm/transformers/qwen2_audio.py +1 -1
  40. xinference/model/llm/transformers/qwen2_vl.py +20 -4
  41. xinference/model/llm/utils.py +37 -15
  42. xinference/model/llm/vllm/core.py +184 -8
  43. xinference/model/llm/vllm/distributed_executor.py +320 -0
  44. xinference/model/rerank/core.py +22 -12
  45. xinference/model/utils.py +118 -1
  46. xinference/model/video/core.py +6 -1
  47. xinference/thirdparty/deepseek_vl2/__init__.py +31 -0
  48. xinference/thirdparty/deepseek_vl2/models/__init__.py +26 -0
  49. xinference/thirdparty/deepseek_vl2/models/configuration_deepseek.py +210 -0
  50. xinference/thirdparty/deepseek_vl2/models/conversation.py +310 -0
  51. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek.py +1975 -0
  52. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek_vl_v2.py +697 -0
  53. xinference/thirdparty/deepseek_vl2/models/processing_deepseek_vl_v2.py +675 -0
  54. xinference/thirdparty/deepseek_vl2/models/siglip_vit.py +661 -0
  55. xinference/thirdparty/deepseek_vl2/serve/__init__.py +0 -0
  56. xinference/thirdparty/deepseek_vl2/serve/app_modules/__init__.py +0 -0
  57. xinference/thirdparty/deepseek_vl2/serve/app_modules/gradio_utils.py +83 -0
  58. xinference/thirdparty/deepseek_vl2/serve/app_modules/overwrites.py +81 -0
  59. xinference/thirdparty/deepseek_vl2/serve/app_modules/presets.py +115 -0
  60. xinference/thirdparty/deepseek_vl2/serve/app_modules/utils.py +333 -0
  61. xinference/thirdparty/deepseek_vl2/serve/assets/Kelpy-Codos.js +100 -0
  62. xinference/thirdparty/deepseek_vl2/serve/assets/avatar.png +0 -0
  63. xinference/thirdparty/deepseek_vl2/serve/assets/custom.css +355 -0
  64. xinference/thirdparty/deepseek_vl2/serve/assets/custom.js +22 -0
  65. xinference/thirdparty/deepseek_vl2/serve/assets/favicon.ico +0 -0
  66. xinference/thirdparty/deepseek_vl2/serve/assets/simsun.ttc +0 -0
  67. xinference/thirdparty/deepseek_vl2/serve/inference.py +197 -0
  68. xinference/thirdparty/deepseek_vl2/utils/__init__.py +18 -0
  69. xinference/thirdparty/deepseek_vl2/utils/io.py +80 -0
  70. xinference/thirdparty/megatts3/__init__.py +0 -0
  71. xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
  72. xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
  73. xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
  74. xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
  75. xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
  76. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
  77. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
  78. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
  79. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
  80. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
  81. xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
  82. xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
  83. xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
  84. xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
  85. xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
  86. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
  87. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
  88. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
  89. xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
  90. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
  91. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
  92. xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
  93. xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
  94. xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
  95. xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
  96. xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
  97. xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
  98. xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
  99. xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
  100. xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
  101. xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
  102. xinference/types.py +10 -0
  103. xinference/utils.py +54 -0
  104. xinference/web/ui/build/asset-manifest.json +6 -6
  105. xinference/web/ui/build/index.html +1 -1
  106. xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
  107. xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
  108. xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
  109. xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
  116. xinference/web/ui/src/locales/en.json +2 -1
  117. xinference/web/ui/src/locales/zh.json +2 -1
  118. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/METADATA +128 -115
  119. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/RECORD +124 -63
  120. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
  121. xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
  122. xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
  123. xinference/web/ui/build/static/js/main.3cea968e.js +0 -3
  124. xinference/web/ui/build/static/js/main.3cea968e.js.map +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
  129. /xinference/web/ui/build/static/js/{main.3cea968e.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
  130. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
  131. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
  132. {xinference-1.4.0.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
xinference/_compat.py CHANGED
@@ -102,6 +102,7 @@ class CreateChatCompletionOpenAI(BaseModel):
102
102
  frequency_penalty: Optional[float]
103
103
  logit_bias: Optional[Dict[str, int]]
104
104
  logprobs: Optional[bool]
105
+ max_completion_tokens: Optional[int]
105
106
  max_tokens: Optional[int]
106
107
  n: Optional[int]
107
108
  parallel_tool_calls: Optional[bool]
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-03-21T14:33:52+0800",
11
+ "date": "2025-04-19T20:32:22+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "ac88d425e3d5fc12166e22c4032286327871f5f2",
15
- "version": "1.4.0"
14
+ "full-revisionid": "ee8d025e1c046b22b3b148e5e97c0e107c979ee3",
15
+ "version": "1.5.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -468,6 +468,26 @@ class RESTfulAPI(CancelMixin):
468
468
  else None
469
469
  ),
470
470
  )
471
+ self._router.add_api_route(
472
+ "/v1/models/{model_uid}/progress",
473
+ self.get_launch_model_progress,
474
+ methods=["GET"],
475
+ dependencies=(
476
+ [Security(self._auth_service, scopes=["models:read"])]
477
+ if self.is_authenticated()
478
+ else None
479
+ ),
480
+ )
481
+ self._router.add_api_route(
482
+ "/v1/models/{model_uid}/cancel",
483
+ self.cancel_launch_model,
484
+ methods=["POST"],
485
+ dependencies=(
486
+ [Security(self._auth_service, scopes=["models:stop"])]
487
+ if self.is_authenticated()
488
+ else None
489
+ ),
490
+ )
471
491
  self._router.add_api_route(
472
492
  "/v1/completions",
473
493
  self.create_completion,
@@ -1024,6 +1044,10 @@ class RESTfulAPI(CancelMixin):
1024
1044
  except RuntimeError as re:
1025
1045
  logger.error(str(re), exc_info=True)
1026
1046
  raise HTTPException(status_code=503, detail=str(re))
1047
+ except asyncio.CancelledError as ce:
1048
+ # cancelled by user
1049
+ logger.error(str(ce), exc_info=True)
1050
+ raise HTTPException(status_code=499, detail=str(ce))
1027
1051
  except Exception as e:
1028
1052
  logger.error(str(e), exc_info=True)
1029
1053
  raise HTTPException(status_code=500, detail=str(e))
@@ -1044,6 +1068,26 @@ class RESTfulAPI(CancelMixin):
1044
1068
  raise HTTPException(status_code=500, detail=str(e))
1045
1069
  return JSONResponse(content=infos)
1046
1070
 
1071
+ async def get_launch_model_progress(self, model_uid: str) -> JSONResponse:
1072
+ try:
1073
+ progress = await (
1074
+ await self._get_supervisor_ref()
1075
+ ).get_launch_builtin_model_progress(model_uid)
1076
+ except Exception as e:
1077
+ logger.error(str(e), exc_info=True)
1078
+ raise HTTPException(status_code=500, detail=str(e))
1079
+ return JSONResponse(content={"progress": progress})
1080
+
1081
+ async def cancel_launch_model(self, model_uid: str) -> JSONResponse:
1082
+ try:
1083
+ await (await self._get_supervisor_ref()).cancel_launch_builtin_model(
1084
+ model_uid
1085
+ )
1086
+ except Exception as e:
1087
+ logger.error(str(e), exc_info=True)
1088
+ raise HTTPException(status_code=500, detail=str(e))
1089
+ return JSONResponse(content=None)
1090
+
1047
1091
  async def launch_model_by_version(
1048
1092
  self, request: Request, wait_ready: bool = Query(True)
1049
1093
  ) -> JSONResponse:
@@ -1514,8 +1558,11 @@ class RESTfulAPI(CancelMixin):
1514
1558
  prompt_speech: Optional[UploadFile] = File(
1515
1559
  None, media_type="application/octet-stream"
1516
1560
  ),
1561
+ prompt_latent: Optional[UploadFile] = File(
1562
+ None, media_type="application/octet-stream"
1563
+ ),
1517
1564
  ) -> Response:
1518
- if prompt_speech:
1565
+ if prompt_speech or prompt_latent:
1519
1566
  f = await request.form()
1520
1567
  else:
1521
1568
  f = await request.json()
@@ -1539,6 +1586,8 @@ class RESTfulAPI(CancelMixin):
1539
1586
  parsed_kwargs = {}
1540
1587
  if prompt_speech is not None:
1541
1588
  parsed_kwargs["prompt_speech"] = await prompt_speech.read()
1589
+ if prompt_latent is not None:
1590
+ parsed_kwargs["prompt_latent"] = await prompt_latent.read()
1542
1591
  out = await model.speech(
1543
1592
  input=body.input,
1544
1593
  voice=body.voice,
@@ -1952,6 +2001,7 @@ class RESTfulAPI(CancelMixin):
1952
2001
  "logit_bias",
1953
2002
  "logit_bias_type",
1954
2003
  "user",
2004
+ "max_completion_tokens",
1955
2005
  }
1956
2006
 
1957
2007
  raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
@@ -1964,6 +2014,9 @@ class RESTfulAPI(CancelMixin):
1964
2014
  if body.max_tokens is None:
1965
2015
  kwargs["max_tokens"] = max_tokens_field.default
1966
2016
 
2017
+ if body.max_completion_tokens is not None:
2018
+ kwargs["max_tokens"] = body.max_completion_tokens
2019
+
1967
2020
  if body.logit_bias is not None:
1968
2021
  raise HTTPException(status_code=501, detail="Not implemented")
1969
2022
 
@@ -723,6 +723,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
723
723
  speed: float = 1.0,
724
724
  stream: bool = False,
725
725
  prompt_speech: Optional[bytes] = None,
726
+ prompt_latent: Optional[bytes] = None,
726
727
  **kwargs,
727
728
  ):
728
729
  """
@@ -743,6 +744,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
743
744
  Use stream or not.
744
745
  prompt_speech: bytes
745
746
  The audio bytes to be provided to the model.
747
+ prompt_latent: bytes
748
+ The latent bytes to be provided to the model.
746
749
 
747
750
  Returns
748
751
  -------
@@ -759,14 +762,22 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
759
762
  "stream": stream,
760
763
  "kwargs": json.dumps(kwargs),
761
764
  }
765
+ files: List[Any] = []
762
766
  if prompt_speech:
763
- files: List[Any] = []
764
767
  files.append(
765
768
  (
766
769
  "prompt_speech",
767
770
  ("prompt_speech", prompt_speech, "application/octet-stream"),
768
771
  )
769
772
  )
773
+ if prompt_latent:
774
+ files.append(
775
+ (
776
+ "prompt_latent",
777
+ ("prompt_latent", prompt_latent, "application/octet-stream"),
778
+ )
779
+ )
780
+ if files:
770
781
  response = requests.post(
771
782
  url, data=params, files=files, headers=self.auth_headers, stream=stream
772
783
  )
@@ -999,10 +1010,17 @@ class Client:
999
1010
  "model_path": model_path,
1000
1011
  }
1001
1012
 
1013
+ wait_ready = kwargs.pop("wait_ready", True)
1014
+
1002
1015
  for key, value in kwargs.items():
1003
1016
  payload[str(key)] = value
1004
1017
 
1005
- response = requests.post(url, json=payload, headers=self._headers)
1018
+ if wait_ready:
1019
+ response = requests.post(url, json=payload, headers=self._headers)
1020
+ else:
1021
+ response = requests.post(
1022
+ url, json=payload, headers=self._headers, params={"wait_ready": False}
1023
+ )
1006
1024
  if response.status_code != 200:
1007
1025
  raise RuntimeError(
1008
1026
  f"Failed to launch model, detail: {_get_error_string(response)}"
@@ -1035,6 +1053,68 @@ class Client:
1035
1053
  f"Failed to terminate model, detail: {_get_error_string(response)}"
1036
1054
  )
1037
1055
 
1056
+ def get_launch_model_progress(self, model_uid: str) -> dict:
1057
+ """
1058
+ Get progress of the specific model.
1059
+
1060
+ Parameters
1061
+ ----------
1062
+ model_uid: str
1063
+ The unique id that identify the model we want.
1064
+
1065
+ Returns
1066
+ -------
1067
+ result: dict
1068
+ Result that contains progress.
1069
+
1070
+ Raises
1071
+ ------
1072
+ RuntimeError
1073
+ Report failure to get the wanted model with given model_uid. Provide details of failure through error message.
1074
+ """
1075
+ url = f"{self.base_url}/v1/models/{model_uid}/progress"
1076
+
1077
+ response = requests.get(url, headers=self._headers)
1078
+ if response.status_code != 200:
1079
+ raise RuntimeError(
1080
+ f"Fail to get model launching progress, detail: {_get_error_string(response)}"
1081
+ )
1082
+ return response.json()
1083
+
1084
+ def cancel_launch_model(self, model_uid: str):
1085
+ """
1086
+ Cancel launching model.
1087
+
1088
+ Parameters
1089
+ ----------
1090
+ model_uid: str
1091
+ The unique id that identify the model we want.
1092
+
1093
+ Raises
1094
+ ------
1095
+ RuntimeError
1096
+ Report failure to get the wanted model with given model_uid. Provide details of failure through error message.
1097
+ """
1098
+ url = f"{self.base_url}/v1/models/{model_uid}/cancel"
1099
+
1100
+ response = requests.post(url, headers=self._headers)
1101
+ if response.status_code != 200:
1102
+ raise RuntimeError(
1103
+ f"Fail to cancel launching model, detail: {_get_error_string(response)}"
1104
+ )
1105
+
1106
+ def get_instance_info(self, model_name: str, model_uid: str):
1107
+ url = f"{self.base_url}/v1/models/instances"
1108
+ response = requests.get(
1109
+ url,
1110
+ headers=self._headers,
1111
+ params={"model_name": model_name, "model_uid": model_uid},
1112
+ )
1113
+ if response.status_code != 200:
1114
+ raise RuntimeError("Failed to get instance info")
1115
+ response_data = response.json()
1116
+ return response_data
1117
+
1038
1118
  def _get_supervisor_internal_address(self):
1039
1119
  url = f"{self.base_url}/v1/address"
1040
1120
  response = requests.get(url, headers=self._headers)
xinference/constants.py CHANGED
@@ -29,6 +29,7 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
29
29
  XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
30
30
  XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
31
31
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
32
+ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_EANBLE_VIRTUAL_ENV"
32
33
 
33
34
 
34
35
  def get_xinference_home() -> str:
@@ -55,6 +56,7 @@ XINFERENCE_LOG_DIR = os.path.join(XINFERENCE_HOME, "logs")
55
56
  XINFERENCE_IMAGE_DIR = os.path.join(XINFERENCE_HOME, "image")
56
57
  XINFERENCE_VIDEO_DIR = os.path.join(XINFERENCE_HOME, "video")
57
58
  XINFERENCE_AUTH_DIR = os.path.join(XINFERENCE_HOME, "auth")
59
+ XINFERENCE_VIRTUAL_ENV_DIR = os.path.join(XINFERENCE_HOME, "virtualenv")
58
60
  XINFERENCE_CSG_ENDPOINT = str(
59
61
  os.environ.get(XINFERENCE_ENV_CSG_ENDPOINT, "https://hub-stg.opencsg.com/")
60
62
  )
@@ -89,3 +91,4 @@ XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
89
91
  )
90
92
  XINFERENCE_LAUNCH_MODEL_RETRY = 3
91
93
  XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
94
+ XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))