xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show
  1. xinference/_compat.py +51 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +209 -40
  4. xinference/client/restful/restful_client.py +7 -26
  5. xinference/conftest.py +1 -1
  6. xinference/constants.py +5 -0
  7. xinference/core/cache_tracker.py +1 -1
  8. xinference/core/chat_interface.py +8 -14
  9. xinference/core/event.py +1 -1
  10. xinference/core/image_interface.py +28 -0
  11. xinference/core/model.py +110 -31
  12. xinference/core/scheduler.py +37 -37
  13. xinference/core/status_guard.py +1 -1
  14. xinference/core/supervisor.py +17 -10
  15. xinference/core/utils.py +80 -22
  16. xinference/core/worker.py +17 -16
  17. xinference/deploy/cmdline.py +8 -16
  18. xinference/deploy/local.py +1 -1
  19. xinference/deploy/supervisor.py +1 -1
  20. xinference/deploy/utils.py +1 -1
  21. xinference/deploy/worker.py +1 -1
  22. xinference/model/audio/cosyvoice.py +86 -41
  23. xinference/model/audio/fish_speech.py +9 -9
  24. xinference/model/audio/model_spec.json +9 -9
  25. xinference/model/audio/whisper.py +4 -1
  26. xinference/model/embedding/core.py +52 -31
  27. xinference/model/image/core.py +2 -1
  28. xinference/model/image/model_spec.json +16 -4
  29. xinference/model/image/model_spec_modelscope.json +16 -4
  30. xinference/model/image/sdapi.py +136 -0
  31. xinference/model/image/stable_diffusion/core.py +164 -19
  32. xinference/model/llm/__init__.py +29 -11
  33. xinference/model/llm/llama_cpp/core.py +16 -33
  34. xinference/model/llm/llm_family.json +1011 -1296
  35. xinference/model/llm/llm_family.py +34 -53
  36. xinference/model/llm/llm_family_csghub.json +18 -35
  37. xinference/model/llm/llm_family_modelscope.json +981 -1122
  38. xinference/model/llm/lmdeploy/core.py +56 -88
  39. xinference/model/llm/mlx/core.py +46 -69
  40. xinference/model/llm/sglang/core.py +36 -18
  41. xinference/model/llm/transformers/chatglm.py +168 -306
  42. xinference/model/llm/transformers/cogvlm2.py +36 -63
  43. xinference/model/llm/transformers/cogvlm2_video.py +33 -223
  44. xinference/model/llm/transformers/core.py +55 -50
  45. xinference/model/llm/transformers/deepseek_v2.py +340 -0
  46. xinference/model/llm/transformers/deepseek_vl.py +53 -96
  47. xinference/model/llm/transformers/glm4v.py +55 -111
  48. xinference/model/llm/transformers/intern_vl.py +39 -70
  49. xinference/model/llm/transformers/internlm2.py +32 -54
  50. xinference/model/llm/transformers/minicpmv25.py +22 -55
  51. xinference/model/llm/transformers/minicpmv26.py +158 -68
  52. xinference/model/llm/transformers/omnilmm.py +5 -28
  53. xinference/model/llm/transformers/qwen2_audio.py +168 -0
  54. xinference/model/llm/transformers/qwen2_vl.py +234 -0
  55. xinference/model/llm/transformers/qwen_vl.py +34 -86
  56. xinference/model/llm/transformers/utils.py +32 -38
  57. xinference/model/llm/transformers/yi_vl.py +32 -72
  58. xinference/model/llm/utils.py +280 -554
  59. xinference/model/llm/vllm/core.py +161 -100
  60. xinference/model/rerank/core.py +41 -8
  61. xinference/model/rerank/model_spec.json +7 -0
  62. xinference/model/rerank/model_spec_modelscope.json +7 -1
  63. xinference/model/utils.py +1 -31
  64. xinference/thirdparty/cosyvoice/bin/export_jit.py +64 -0
  65. xinference/thirdparty/cosyvoice/bin/export_trt.py +8 -0
  66. xinference/thirdparty/cosyvoice/bin/inference.py +5 -2
  67. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +38 -22
  68. xinference/thirdparty/cosyvoice/cli/model.py +139 -26
  69. xinference/thirdparty/cosyvoice/flow/flow.py +15 -9
  70. xinference/thirdparty/cosyvoice/flow/length_regulator.py +20 -1
  71. xinference/thirdparty/cosyvoice/hifigan/generator.py +8 -4
  72. xinference/thirdparty/cosyvoice/llm/llm.py +14 -13
  73. xinference/thirdparty/cosyvoice/transformer/attention.py +7 -3
  74. xinference/thirdparty/cosyvoice/transformer/decoder.py +1 -1
  75. xinference/thirdparty/cosyvoice/transformer/embedding.py +4 -3
  76. xinference/thirdparty/cosyvoice/transformer/encoder.py +4 -2
  77. xinference/thirdparty/cosyvoice/utils/common.py +36 -0
  78. xinference/thirdparty/cosyvoice/utils/file_utils.py +16 -0
  79. xinference/thirdparty/deepseek_vl/serve/assets/Kelpy-Codos.js +100 -0
  80. xinference/thirdparty/deepseek_vl/serve/assets/avatar.png +0 -0
  81. xinference/thirdparty/deepseek_vl/serve/assets/custom.css +355 -0
  82. xinference/thirdparty/deepseek_vl/serve/assets/custom.js +22 -0
  83. xinference/thirdparty/deepseek_vl/serve/assets/favicon.ico +0 -0
  84. xinference/thirdparty/deepseek_vl/serve/examples/app.png +0 -0
  85. xinference/thirdparty/deepseek_vl/serve/examples/chart.png +0 -0
  86. xinference/thirdparty/deepseek_vl/serve/examples/mirror.png +0 -0
  87. xinference/thirdparty/deepseek_vl/serve/examples/pipeline.png +0 -0
  88. xinference/thirdparty/deepseek_vl/serve/examples/puzzle.png +0 -0
  89. xinference/thirdparty/deepseek_vl/serve/examples/rap.jpeg +0 -0
  90. xinference/thirdparty/fish_speech/fish_speech/configs/base.yaml +87 -0
  91. xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +33 -0
  92. xinference/thirdparty/fish_speech/fish_speech/configs/lora/r_8_alpha_16.yaml +4 -0
  93. xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +83 -0
  94. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text-data.proto +24 -0
  95. xinference/thirdparty/fish_speech/fish_speech/i18n/README.md +27 -0
  96. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
  97. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
  98. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
  99. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
  100. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
  101. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
  102. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
  103. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
  104. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
  105. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/.gitignore +114 -0
  106. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/README.md +36 -0
  107. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
  108. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
  109. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
  110. xinference/thirdparty/fish_speech/fish_speech/webui/css/style.css +161 -0
  111. xinference/thirdparty/fish_speech/fish_speech/webui/html/footer.html +11 -0
  112. xinference/thirdparty/fish_speech/fish_speech/webui/js/animate.js +69 -0
  113. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
  114. xinference/thirdparty/fish_speech/tools/api.py +79 -134
  115. xinference/thirdparty/fish_speech/tools/commons.py +35 -0
  116. xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
  117. xinference/thirdparty/fish_speech/tools/file.py +17 -0
  118. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
  119. xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
  120. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
  121. xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
  122. xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
  123. xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
  124. xinference/thirdparty/fish_speech/tools/sensevoice/README.md +59 -0
  125. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
  126. xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
  127. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
  128. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
  129. xinference/thirdparty/fish_speech/tools/webui.py +12 -146
  130. xinference/thirdparty/matcha/VERSION +1 -0
  131. xinference/thirdparty/matcha/hifigan/LICENSE +21 -0
  132. xinference/thirdparty/matcha/hifigan/README.md +101 -0
  133. xinference/thirdparty/omnilmm/LICENSE +201 -0
  134. xinference/thirdparty/whisper/__init__.py +156 -0
  135. xinference/thirdparty/whisper/__main__.py +3 -0
  136. xinference/thirdparty/whisper/assets/gpt2.tiktoken +50256 -0
  137. xinference/thirdparty/whisper/assets/mel_filters.npz +0 -0
  138. xinference/thirdparty/whisper/assets/multilingual.tiktoken +50257 -0
  139. xinference/thirdparty/whisper/audio.py +157 -0
  140. xinference/thirdparty/whisper/decoding.py +826 -0
  141. xinference/thirdparty/whisper/model.py +314 -0
  142. xinference/thirdparty/whisper/normalizers/__init__.py +2 -0
  143. xinference/thirdparty/whisper/normalizers/basic.py +76 -0
  144. xinference/thirdparty/whisper/normalizers/english.json +1741 -0
  145. xinference/thirdparty/whisper/normalizers/english.py +550 -0
  146. xinference/thirdparty/whisper/timing.py +386 -0
  147. xinference/thirdparty/whisper/tokenizer.py +395 -0
  148. xinference/thirdparty/whisper/transcribe.py +605 -0
  149. xinference/thirdparty/whisper/triton_ops.py +109 -0
  150. xinference/thirdparty/whisper/utils.py +316 -0
  151. xinference/thirdparty/whisper/version.py +1 -0
  152. xinference/types.py +14 -53
  153. xinference/web/ui/build/asset-manifest.json +6 -6
  154. xinference/web/ui/build/index.html +1 -1
  155. xinference/web/ui/build/static/css/{main.4bafd904.css → main.5061c4c3.css} +2 -2
  156. xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
  157. xinference/web/ui/build/static/js/main.754740c0.js +3 -0
  158. xinference/web/ui/build/static/js/{main.eb13fe95.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +2 -0
  159. xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
  160. xinference/web/ui/node_modules/.cache/babel-loader/10c69dc7a296779fcffedeff9393d832dfcb0013c36824adf623d3c518b801ff.json +1 -0
  161. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +1 -0
  162. xinference/web/ui/node_modules/.cache/babel-loader/77d50223f3e734d4485cca538cb098a8c3a7a0a1a9f01f58cdda3af42fe1adf5.json +1 -0
  163. xinference/web/ui/node_modules/.cache/babel-loader/a56d5a642409a84988891089c98ca28ad0546432dfbae8aaa51bc5a280e1cdd2.json +1 -0
  164. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
  165. xinference/web/ui/node_modules/.cache/babel-loader/d9ff696a3e3471f01b46c63d18af32e491eb5dc0e43cb30202c96871466df57f.json +1 -0
  166. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
  167. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +1 -0
  168. xinference/web/ui/node_modules/.package-lock.json +37 -0
  169. xinference/web/ui/node_modules/a-sync-waterfall/package.json +21 -0
  170. xinference/web/ui/node_modules/nunjucks/node_modules/commander/package.json +48 -0
  171. xinference/web/ui/node_modules/nunjucks/package.json +112 -0
  172. xinference/web/ui/package-lock.json +38 -0
  173. xinference/web/ui/package.json +1 -0
  174. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/METADATA +16 -10
  175. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/RECORD +179 -127
  176. xinference/model/llm/transformers/llama_2.py +0 -108
  177. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
  178. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
  179. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
  180. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
  181. xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
  182. xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
  183. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
  184. xinference/web/ui/build/static/css/main.4bafd904.css.map +0 -1
  185. xinference/web/ui/build/static/js/main.eb13fe95.js +0 -3
  186. xinference/web/ui/build/static/js/main.eb13fe95.js.map +0 -1
  187. xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +0 -1
  188. xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +0 -1
  189. xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +0 -1
  190. xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +0 -1
  191. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
  192. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
  193. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
  194. {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py CHANGED
@@ -11,6 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from typing import Dict, Iterable, List, Literal, Optional, Union
15
+
14
16
  from pydantic.version import VERSION as PYDANTIC_VERSION
15
17
 
16
18
  PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
@@ -50,3 +52,52 @@ else:
50
52
  from pydantic.parse import load_str_bytes # noqa: F401
51
53
  from pydantic.types import StrBytes # noqa: F401
52
54
  from pydantic.utils import ROOT_KEY # noqa: F401
55
+
56
+ from openai.types.chat.chat_completion_named_tool_choice_param import (
57
+ ChatCompletionNamedToolChoiceParam,
58
+ )
59
+ from openai.types.chat.chat_completion_stream_options_param import (
60
+ ChatCompletionStreamOptionsParam,
61
+ )
62
+ from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
63
+
64
+ OpenAIChatCompletionStreamOptionsParam = create_model_from_typeddict(
65
+ ChatCompletionStreamOptionsParam
66
+ )
67
+ OpenAIChatCompletionToolParam = create_model_from_typeddict(ChatCompletionToolParam)
68
+ OpenAIChatCompletionNamedToolChoiceParam = create_model_from_typeddict(
69
+ ChatCompletionNamedToolChoiceParam
70
+ )
71
+
72
+
73
+ class CreateChatCompletionOpenAI(BaseModel):
74
+ """
75
+ Comes from source code: https://github.com/openai/openai-python/blob/main/src/openai/types/chat/completion_create_params.py
76
+ """
77
+
78
+ messages: List[Dict]
79
+ model: str
80
+ frequency_penalty: Optional[float]
81
+ logit_bias: Optional[Dict[str, int]]
82
+ logprobs: Optional[bool]
83
+ max_tokens: Optional[int]
84
+ n: Optional[int]
85
+ parallel_tool_calls: Optional[bool]
86
+ presence_penalty: Optional[float]
87
+ # we do not support this
88
+ # response_format: ResponseFormat
89
+ seed: Optional[int]
90
+ service_tier: Optional[Literal["auto", "default"]]
91
+ stop: Union[Optional[str], List[str]]
92
+ stream_options: Optional[OpenAIChatCompletionStreamOptionsParam] # type: ignore
93
+ temperature: Optional[float]
94
+ tool_choice: Optional[ # type: ignore
95
+ Union[
96
+ Literal["none", "auto", "required"],
97
+ OpenAIChatCompletionNamedToolChoiceParam,
98
+ ]
99
+ ]
100
+ tools: Optional[Iterable[OpenAIChatCompletionToolParam]] # type: ignore
101
+ top_logprobs: Optional[int]
102
+ top_p: Optional[float]
103
+ user: Optional[str]
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-09-03T15:42:58+0800",
11
+ "date": "2024-09-14T13:22:13+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "b1b7c44e6f0ad934eb8366d531c87f29cfa239a7",
15
- "version": "0.14.4.post1"
14
+ "full-revisionid": "961d355102007e3cd7963a353105b2422a31d4fd",
15
+ "version": "0.15.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -57,14 +57,13 @@ from ..core.event import Event, EventCollectorActor, EventType
57
57
  from ..core.supervisor import SupervisorActor
58
58
  from ..core.utils import json_dumps
59
59
  from ..types import (
60
- SPECIAL_TOOL_PROMPT,
61
60
  ChatCompletion,
62
- ChatCompletionMessage,
63
61
  Completion,
64
62
  CreateChatCompletion,
65
63
  CreateCompletion,
66
64
  ImageList,
67
65
  PeftModelConfig,
66
+ SDAPIResult,
68
67
  VideoList,
69
68
  max_tokens_field,
70
69
  )
@@ -124,6 +123,43 @@ class TextToImageRequest(BaseModel):
124
123
  user: Optional[str] = None
125
124
 
126
125
 
126
+ class SDAPIOptionsRequest(BaseModel):
127
+ sd_model_checkpoint: Optional[str] = None
128
+
129
+
130
+ class SDAPITxt2imgRequst(BaseModel):
131
+ model: Optional[str]
132
+ prompt: Optional[str] = ""
133
+ negative_prompt: Optional[str] = ""
134
+ steps: Optional[int] = None
135
+ seed: Optional[int] = -1
136
+ cfg_scale: Optional[float] = 7.0
137
+ override_settings: Optional[dict] = {}
138
+ width: Optional[int] = 512
139
+ height: Optional[int] = 512
140
+ sampler_name: Optional[str] = None
141
+ denoising_strength: Optional[float] = None
142
+ kwargs: Optional[str] = None
143
+ user: Optional[str] = None
144
+
145
+
146
+ class SDAPIImg2imgRequst(BaseModel):
147
+ model: Optional[str]
148
+ init_images: Optional[list]
149
+ prompt: Optional[str] = ""
150
+ negative_prompt: Optional[str] = ""
151
+ steps: Optional[int] = None
152
+ seed: Optional[int] = -1
153
+ cfg_scale: Optional[float] = 7.0
154
+ override_settings: Optional[dict] = {}
155
+ width: Optional[int] = 512
156
+ height: Optional[int] = 512
157
+ sampler_name: Optional[str] = None
158
+ denoising_strength: Optional[float] = None
159
+ kwargs: Optional[str] = None
160
+ user: Optional[str] = None
161
+
162
+
127
163
  class TextToVideoRequest(BaseModel):
128
164
  model: str
129
165
  prompt: Union[str, List[str]] = Field(description="The input to embed.")
@@ -165,7 +201,7 @@ class BuildGradioImageInterfaceRequest(BaseModel):
165
201
  model_name: str
166
202
  model_family: str
167
203
  model_id: str
168
- controlnet: Union[None, List[Dict[str, Union[str, None]]]]
204
+ controlnet: Union[None, List[Dict[str, Union[str, dict, None]]]]
169
205
  model_revision: str
170
206
  model_ability: List[str]
171
207
 
@@ -199,14 +235,14 @@ class RESTfulAPI:
199
235
  async def _get_supervisor_ref(self) -> xo.ActorRefType[SupervisorActor]:
200
236
  if self._supervisor_ref is None:
201
237
  self._supervisor_ref = await xo.actor_ref(
202
- address=self._supervisor_address, uid=SupervisorActor.uid()
238
+ address=self._supervisor_address, uid=SupervisorActor.default_uid()
203
239
  )
204
240
  return self._supervisor_ref
205
241
 
206
242
  async def _get_event_collector_ref(self) -> xo.ActorRefType[EventCollectorActor]:
207
243
  if self._event_collector_ref is None:
208
244
  self._event_collector_ref = await xo.actor_ref(
209
- address=self._supervisor_address, uid=EventCollectorActor.uid()
245
+ address=self._supervisor_address, uid=EventCollectorActor.default_uid()
210
246
  )
211
247
  return self._event_collector_ref
212
248
 
@@ -521,6 +557,59 @@ class RESTfulAPI:
521
557
  else None
522
558
  ),
523
559
  )
560
+ # SD WebUI API
561
+ self._router.add_api_route(
562
+ "/sdapi/v1/options",
563
+ self.sdapi_options,
564
+ methods=["POST"],
565
+ dependencies=(
566
+ [Security(self._auth_service, scopes=["models:read"])]
567
+ if self.is_authenticated()
568
+ else None
569
+ ),
570
+ )
571
+ self._router.add_api_route(
572
+ "/sdapi/v1/sd-models",
573
+ self.sdapi_sd_models,
574
+ methods=["GET"],
575
+ dependencies=(
576
+ [Security(self._auth_service, scopes=["models:read"])]
577
+ if self.is_authenticated()
578
+ else None
579
+ ),
580
+ )
581
+ self._router.add_api_route(
582
+ "/sdapi/v1/samplers",
583
+ self.sdapi_samplers,
584
+ methods=["GET"],
585
+ dependencies=(
586
+ [Security(self._auth_service, scopes=["models:read"])]
587
+ if self.is_authenticated()
588
+ else None
589
+ ),
590
+ )
591
+ self._router.add_api_route(
592
+ "/sdapi/v1/txt2img",
593
+ self.sdapi_txt2img,
594
+ methods=["POST"],
595
+ response_model=SDAPIResult,
596
+ dependencies=(
597
+ [Security(self._auth_service, scopes=["models:read"])]
598
+ if self.is_authenticated()
599
+ else None
600
+ ),
601
+ )
602
+ self._router.add_api_route(
603
+ "/sdapi/v1/img2img",
604
+ self.sdapi_img2img,
605
+ methods=["POST"],
606
+ response_model=SDAPIResult,
607
+ dependencies=(
608
+ [Security(self._auth_service, scopes=["models:read"])]
609
+ if self.is_authenticated()
610
+ else None
611
+ ),
612
+ )
524
613
  self._router.add_api_route(
525
614
  "/v1/video/generations",
526
615
  self.create_videos,
@@ -1431,6 +1520,118 @@ class RESTfulAPI:
1431
1520
  await self._report_error_event(model_uid, str(e))
1432
1521
  raise HTTPException(status_code=500, detail=str(e))
1433
1522
 
1523
+ async def sdapi_options(self, request: Request) -> Response:
1524
+ body = SDAPIOptionsRequest.parse_obj(await request.json())
1525
+ model_uid = body.sd_model_checkpoint
1526
+
1527
+ try:
1528
+ if not model_uid:
1529
+ raise ValueError("Unknown model")
1530
+ await (await self._get_supervisor_ref()).get_model(model_uid)
1531
+ return Response()
1532
+ except ValueError as ve:
1533
+ logger.error(str(ve), exc_info=True)
1534
+ await self._report_error_event(model_uid, str(ve))
1535
+ raise HTTPException(status_code=400, detail=str(ve))
1536
+ except Exception as e:
1537
+ logger.error(e, exc_info=True)
1538
+ await self._report_error_event(model_uid, str(e))
1539
+ raise HTTPException(status_code=500, detail=str(e))
1540
+
1541
+ async def sdapi_sd_models(self, request: Request) -> Response:
1542
+ try:
1543
+ models = await (await self._get_supervisor_ref()).list_models()
1544
+ sd_models = []
1545
+ for model_name, info in models.items():
1546
+ if info["model_type"] != "image":
1547
+ continue
1548
+ sd_models.append({"model_name": model_name, "config": None})
1549
+ return JSONResponse(content=sd_models)
1550
+ except Exception as e:
1551
+ logger.error(e, exc_info=True)
1552
+ raise HTTPException(status_code=500, detail=str(e))
1553
+
1554
+ async def sdapi_samplers(self, request: Request) -> Response:
1555
+ try:
1556
+ from ..model.image.stable_diffusion.core import SAMPLING_METHODS
1557
+
1558
+ samplers = [
1559
+ {"name": sample_method, "alias": [], "options": {}}
1560
+ for sample_method in SAMPLING_METHODS
1561
+ ]
1562
+ return JSONResponse(content=samplers)
1563
+ except Exception as e:
1564
+ logger.error(e, exc_info=True)
1565
+ raise HTTPException(status_code=500, detail=str(e))
1566
+
1567
+ async def sdapi_txt2img(self, request: Request) -> Response:
1568
+ body = SDAPITxt2imgRequst.parse_obj(await request.json())
1569
+ model_uid = body.model or body.override_settings.get("sd_model_checkpoint")
1570
+
1571
+ try:
1572
+ if not model_uid:
1573
+ raise ValueError("Unknown model")
1574
+ model = await (await self._get_supervisor_ref()).get_model(model_uid)
1575
+ except ValueError as ve:
1576
+ logger.error(str(ve), exc_info=True)
1577
+ await self._report_error_event(model_uid, str(ve))
1578
+ raise HTTPException(status_code=400, detail=str(ve))
1579
+ except Exception as e:
1580
+ logger.error(e, exc_info=True)
1581
+ await self._report_error_event(model_uid, str(e))
1582
+ raise HTTPException(status_code=500, detail=str(e))
1583
+
1584
+ try:
1585
+ kwargs = dict(body)
1586
+ kwargs.update(json.loads(body.kwargs) if body.kwargs else {})
1587
+ image_list = await model.txt2img(
1588
+ **kwargs,
1589
+ )
1590
+ return Response(content=image_list, media_type="application/json")
1591
+ except RuntimeError as re:
1592
+ logger.error(re, exc_info=True)
1593
+ await self._report_error_event(model_uid, str(re))
1594
+ self.handle_request_limit_error(re)
1595
+ raise HTTPException(status_code=400, detail=str(re))
1596
+ except Exception as e:
1597
+ logger.error(e, exc_info=True)
1598
+ await self._report_error_event(model_uid, str(e))
1599
+ raise HTTPException(status_code=500, detail=str(e))
1600
+
1601
+ async def sdapi_img2img(self, request: Request) -> Response:
1602
+ body = SDAPIImg2imgRequst.parse_obj(await request.json())
1603
+ model_uid = body.model or body.override_settings.get("sd_model_checkpoint")
1604
+
1605
+ try:
1606
+ if not model_uid:
1607
+ raise ValueError("Unknown model")
1608
+ model = await (await self._get_supervisor_ref()).get_model(model_uid)
1609
+ except ValueError as ve:
1610
+ logger.error(str(ve), exc_info=True)
1611
+ await self._report_error_event(model_uid, str(ve))
1612
+ raise HTTPException(status_code=400, detail=str(ve))
1613
+ except Exception as e:
1614
+ logger.error(e, exc_info=True)
1615
+ await self._report_error_event(model_uid, str(e))
1616
+ raise HTTPException(status_code=500, detail=str(e))
1617
+
1618
+ try:
1619
+ kwargs = dict(body)
1620
+ kwargs.update(json.loads(body.kwargs) if body.kwargs else {})
1621
+ image_list = await model.img2img(
1622
+ **kwargs,
1623
+ )
1624
+ return Response(content=image_list, media_type="application/json")
1625
+ except RuntimeError as re:
1626
+ logger.error(re, exc_info=True)
1627
+ await self._report_error_event(model_uid, str(re))
1628
+ self.handle_request_limit_error(re)
1629
+ raise HTTPException(status_code=400, detail=str(re))
1630
+ except Exception as e:
1631
+ logger.error(e, exc_info=True)
1632
+ await self._report_error_event(model_uid, str(e))
1633
+ raise HTTPException(status_code=500, detail=str(e))
1634
+
1434
1635
  async def create_variations(
1435
1636
  self,
1436
1637
  model: str = Form(...),
@@ -1627,33 +1828,7 @@ class RESTfulAPI:
1627
1828
  status_code=400, detail="Invalid input. Please specify the prompt."
1628
1829
  )
1629
1830
 
1630
- system_messages: List["ChatCompletionMessage"] = []
1631
- system_messages_contents = []
1632
- non_system_messages = []
1633
- for msg in messages:
1634
- assert (
1635
- msg.get("content") != SPECIAL_TOOL_PROMPT
1636
- ), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
1637
- if msg["role"] == "system":
1638
- system_messages_contents.append(msg["content"])
1639
- else:
1640
- non_system_messages.append(msg)
1641
- system_messages.append(
1642
- {"role": "system", "content": ". ".join(system_messages_contents)}
1643
- )
1644
-
1645
1831
  has_tool_message = messages[-1].get("role") == "tool"
1646
- if has_tool_message:
1647
- prompt = SPECIAL_TOOL_PROMPT
1648
- system_prompt = system_messages[0]["content"] if system_messages else None
1649
- chat_history = non_system_messages # exclude the prompt
1650
- else:
1651
- prompt = None
1652
- if non_system_messages:
1653
- prompt = non_system_messages[-1]["content"]
1654
- system_prompt = system_messages[0]["content"] if system_messages else None
1655
- chat_history = non_system_messages[:-1] # exclude the prompt
1656
-
1657
1832
  model_uid = body.model
1658
1833
 
1659
1834
  try:
@@ -1681,9 +1856,7 @@ class RESTfulAPI:
1681
1856
  from ..model.llm.utils import GLM4_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY
1682
1857
 
1683
1858
  model_family = desc.get("model_family", "")
1684
- function_call_models = (
1685
- ["gorilla-openfunctions-v1"] + QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
1686
- )
1859
+ function_call_models = QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
1687
1860
 
1688
1861
  if model_family not in function_call_models:
1689
1862
  if body.tools:
@@ -1716,9 +1889,7 @@ class RESTfulAPI:
1716
1889
  try:
1717
1890
  try:
1718
1891
  iterator = await model.chat(
1719
- prompt,
1720
- system_prompt,
1721
- chat_history,
1892
+ messages,
1722
1893
  kwargs,
1723
1894
  raw_params=raw_kwargs,
1724
1895
  )
@@ -1750,9 +1921,7 @@ class RESTfulAPI:
1750
1921
  else:
1751
1922
  try:
1752
1923
  data = await model.chat(
1753
- prompt,
1754
- system_prompt,
1755
- chat_history,
1924
+ messages,
1756
1925
  kwargs,
1757
1926
  raw_params=raw_kwargs,
1758
1927
  )
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  import json
15
15
  import typing
16
- import warnings
17
16
  from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
18
17
 
19
18
  import requests
@@ -470,9 +469,7 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
470
469
  class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
471
470
  def chat(
472
471
  self,
473
- prompt: str,
474
- system_prompt: Optional[str] = None,
475
- chat_history: Optional[List["ChatCompletionMessage"]] = None,
472
+ messages: List[Dict],
476
473
  tools: Optional[List[Dict]] = None,
477
474
  generate_config: Optional[
478
475
  Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
@@ -483,11 +480,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
483
480
 
484
481
  Parameters
485
482
  ----------
486
- prompt: str
487
- The user's input.
488
- system_prompt: Optional[str]
489
- The system context provide to Model prior to any chats.
490
- chat_history: Optional[List["ChatCompletionMessage"]]
483
+ messages: List[Dict]
491
484
  A list of messages comprising the conversation so far.
492
485
  tools: Optional[List[Dict]]
493
486
  A tool list.
@@ -509,25 +502,11 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
509
502
  Report the failure to generate the chat from the server. Detailed information provided in error message.
510
503
 
511
504
  """
512
- warnings.warn(
513
- "The parameters `prompt`, `system_prompt` and `chat_history` will be deprecated in version v0.15.0, "
514
- "and will be replaced by the parameter `messages`, "
515
- "similar to the OpenAI API: https://platform.openai.com/docs/guides/chat-completions/getting-started",
516
- category=DeprecationWarning,
517
- stacklevel=2,
518
- )
519
-
520
505
  url = f"{self._base_url}/v1/chat/completions"
521
506
 
522
- if chat_history is None:
523
- chat_history = []
524
-
525
- chat_history = handle_system_prompts(chat_history, system_prompt)
526
- chat_history.append({"role": "user", "content": prompt}) # type: ignore
527
-
528
507
  request_body: Dict[str, Any] = {
529
508
  "model": self._model_uid,
530
- "messages": chat_history,
509
+ "messages": messages,
531
510
  }
532
511
  if tools is not None:
533
512
  request_body["tools"] = tools
@@ -730,10 +709,12 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
730
709
  )
731
710
  )
732
711
  response = requests.post(
733
- url, data=params, files=files, headers=self.auth_headers
712
+ url, data=params, files=files, headers=self.auth_headers, stream=stream
734
713
  )
735
714
  else:
736
- response = requests.post(url, json=params, headers=self.auth_headers)
715
+ response = requests.post(
716
+ url, json=params, headers=self.auth_headers, stream=stream
717
+ )
737
718
  if response.status_code != 200:
738
719
  raise RuntimeError(
739
720
  f"Failed to speech the text, detail: {_get_error_string(response)}"
xinference/conftest.py CHANGED
@@ -144,7 +144,7 @@ async def _start_test_cluster(
144
144
  address=f"test://{address}", logging_conf=logging_conf
145
145
  )
146
146
  await xo.create_actor(
147
- SupervisorActor, address=address, uid=SupervisorActor.uid()
147
+ SupervisorActor, address=address, uid=SupervisorActor.default_uid()
148
148
  )
149
149
  await start_worker_components(
150
150
  address=address,
xinference/constants.py CHANGED
@@ -38,6 +38,10 @@ def get_xinference_home() -> str:
38
38
  # if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
39
39
  os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
40
40
  os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
41
+ # In multi-tenant mode,
42
+ # gradio's temporary files are stored in their respective home directories,
43
+ # to prevent insufficient permissions
44
+ os.environ["GRADIO_TEMP_DIR"] = os.path.join(home_path, "tmp", "gradio")
41
45
  return home_path
42
46
 
43
47
 
@@ -59,6 +63,7 @@ XINFERENCE_DEFAULT_ENDPOINT_PORT = 9997
59
63
  XINFERENCE_DEFAULT_LOG_FILE_NAME = "xinference.log"
60
64
  XINFERENCE_LOG_MAX_BYTES = 100 * 1024 * 1024
61
65
  XINFERENCE_LOG_BACKUP_COUNT = 30
66
+ XINFERENCE_LOG_ARG_MAX_LENGTH = 100
62
67
  XINFERENCE_HEALTH_CHECK_FAILURE_THRESHOLD = int(
63
68
  os.environ.get(XINFERENCE_ENV_HEALTH_CHECK_FAILURE_THRESHOLD, 5)
64
69
  )
@@ -25,7 +25,7 @@ class CacheTrackerActor(xo.Actor):
25
25
  self._model_name_to_version_info: Dict[str, List[Dict]] = {} # type: ignore
26
26
 
27
27
  @classmethod
28
- def uid(cls) -> str:
28
+ def default_uid(cls) -> str:
29
29
  return "cache_tracker"
30
30
 
31
31
  @staticmethod
@@ -16,7 +16,7 @@ import base64
16
16
  import logging
17
17
  import os
18
18
  from io import BytesIO
19
- from typing import Generator, List, Optional
19
+ from typing import Dict, Generator, List, Optional
20
20
 
21
21
  import gradio as gr
22
22
  import PIL.Image
@@ -27,7 +27,6 @@ from ..client.restful.restful_client import (
27
27
  RESTfulChatModelHandle,
28
28
  RESTfulGenerateModelHandle,
29
29
  )
30
- from ..types import ChatCompletionMessage
31
30
 
32
31
  logger = logging.getLogger(__name__)
33
32
 
@@ -96,11 +95,11 @@ class GradioInterface:
96
95
  flat_list += row
97
96
  return flat_list
98
97
 
99
- def to_chat(lst: List[str]) -> List[ChatCompletionMessage]:
98
+ def to_chat(lst: List[str]) -> List[Dict]:
100
99
  res = []
101
100
  for i in range(len(lst)):
102
101
  role = "assistant" if i % 2 == 1 else "user"
103
- res.append(ChatCompletionMessage(role=role, content=lst[i]))
102
+ res.append(dict(role=role, content=lst[i]))
104
103
  return res
105
104
 
106
105
  def generate_wrapper(
@@ -116,11 +115,12 @@ class GradioInterface:
116
115
  client._set_token(self._access_token)
117
116
  model = client.get_model(self.model_uid)
118
117
  assert isinstance(model, RESTfulChatModelHandle)
118
+ messages = to_chat(flatten(history))
119
+ messages.append(dict(role="user", content=message))
119
120
 
120
121
  response_content = ""
121
122
  for chunk in model.chat(
122
- prompt=message,
123
- chat_history=to_chat(flatten(history)),
123
+ messages,
124
124
  generate_config={
125
125
  "max_tokens": int(max_tokens),
126
126
  "temperature": temperature,
@@ -191,15 +191,10 @@ class GradioInterface:
191
191
  model = client.get_model(self.model_uid)
192
192
  assert isinstance(model, RESTfulChatModelHandle)
193
193
 
194
- prompt = history[-1]
195
- assert prompt["role"] == "user"
196
- prompt = prompt["content"]
197
- # multimodal chat does not support stream.
198
194
  if stream:
199
195
  response_content = ""
200
196
  for chunk in model.chat(
201
- prompt=prompt,
202
- chat_history=history[:-1],
197
+ messages=history,
203
198
  generate_config={
204
199
  "max_tokens": max_tokens,
205
200
  "temperature": temperature,
@@ -224,8 +219,7 @@ class GradioInterface:
224
219
  yield history, bot
225
220
  else:
226
221
  response = model.chat(
227
- prompt=prompt,
228
- chat_history=history[:-1],
222
+ messages=history,
229
223
  generate_config={
230
224
  "max_tokens": max_tokens,
231
225
  "temperature": temperature,
xinference/core/event.py CHANGED
@@ -41,7 +41,7 @@ class EventCollectorActor(xo.StatelessActor):
41
41
  )
42
42
 
43
43
  @classmethod
44
- def uid(cls) -> str:
44
+ def default_uid(cls) -> str:
45
45
  return "event_collector"
46
46
 
47
47
  def get_model_events(self, model_uid: str) -> List[Dict]: