xinference 0.14.0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (53) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +62 -1
  3. xinference/client/handlers.py +0 -3
  4. xinference/client/restful/restful_client.py +51 -134
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +1 -4
  7. xinference/core/image_interface.py +33 -5
  8. xinference/core/model.py +28 -2
  9. xinference/core/supervisor.py +37 -0
  10. xinference/core/worker.py +130 -84
  11. xinference/deploy/cmdline.py +1 -4
  12. xinference/model/audio/core.py +11 -3
  13. xinference/model/audio/funasr.py +114 -0
  14. xinference/model/audio/model_spec.json +20 -0
  15. xinference/model/audio/model_spec_modelscope.json +21 -0
  16. xinference/model/audio/whisper.py +1 -1
  17. xinference/model/core.py +12 -0
  18. xinference/model/embedding/core.py +6 -6
  19. xinference/model/image/core.py +3 -4
  20. xinference/model/image/model_spec.json +41 -13
  21. xinference/model/image/model_spec_modelscope.json +30 -10
  22. xinference/model/image/stable_diffusion/core.py +53 -2
  23. xinference/model/llm/__init__.py +2 -0
  24. xinference/model/llm/llm_family.json +83 -1
  25. xinference/model/llm/llm_family_modelscope.json +85 -1
  26. xinference/model/llm/pytorch/core.py +1 -0
  27. xinference/model/llm/pytorch/minicpmv26.py +247 -0
  28. xinference/model/llm/sglang/core.py +72 -34
  29. xinference/model/llm/vllm/core.py +38 -0
  30. xinference/model/video/__init__.py +62 -0
  31. xinference/model/video/core.py +178 -0
  32. xinference/model/video/diffusers.py +180 -0
  33. xinference/model/video/model_spec.json +11 -0
  34. xinference/model/video/model_spec_modelscope.json +12 -0
  35. xinference/types.py +10 -24
  36. xinference/web/ui/build/asset-manifest.json +3 -3
  37. xinference/web/ui/build/index.html +1 -1
  38. xinference/web/ui/build/static/js/{main.af906659.js → main.17ca0398.js} +3 -3
  39. xinference/web/ui/build/static/js/main.17ca0398.js.map +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +1 -0
  43. {xinference-0.14.0.dist-info → xinference-0.14.1.dist-info}/METADATA +128 -122
  44. {xinference-0.14.0.dist-info → xinference-0.14.1.dist-info}/RECORD +49 -42
  45. {xinference-0.14.0.dist-info → xinference-0.14.1.dist-info}/WHEEL +1 -1
  46. xinference/web/ui/build/static/js/main.af906659.js.map +0 -1
  47. xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +0 -1
  48. xinference/web/ui/node_modules/.cache/babel-loader/2cd5e4279ad7e13a1f41d486e9fca7756295bfad5bd77d90992f4ac3e10b496d.json +0 -1
  49. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +0 -1
  50. /xinference/web/ui/build/static/js/{main.af906659.js.LICENSE.txt → main.17ca0398.js.LICENSE.txt} +0 -0
  51. {xinference-0.14.0.dist-info → xinference-0.14.1.dist-info}/LICENSE +0 -0
  52. {xinference-0.14.0.dist-info → xinference-0.14.1.dist-info}/entry_points.txt +0 -0
  53. {xinference-0.14.0.dist-info → xinference-0.14.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,180 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import base64
16
+ import logging
17
+ import os
18
+ import sys
19
+ import time
20
+ import uuid
21
+ from concurrent.futures import ThreadPoolExecutor
22
+ from functools import partial
23
+ from typing import TYPE_CHECKING, List, Union
24
+
25
+ import numpy as np
26
+ import PIL.Image
27
+ import torch
28
+
29
+ from ...constants import XINFERENCE_VIDEO_DIR
30
+ from ...device_utils import move_model_to_available_device
31
+ from ...types import Video, VideoList
32
+
33
+ if TYPE_CHECKING:
34
+ from .core import VideoModelFamilyV1
35
+
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ def export_to_video_imageio(
41
+ video_frames: Union[List[np.ndarray], List["PIL.Image.Image"]],
42
+ output_video_path: str,
43
+ fps: int = 8,
44
+ ) -> str:
45
+ """
46
+ Export the video frames to a video file using imageio lib to Avoid "green screen" issue (for example CogVideoX)
47
+ """
48
+ import imageio
49
+
50
+ if isinstance(video_frames[0], PIL.Image.Image):
51
+ video_frames = [np.array(frame) for frame in video_frames]
52
+ with imageio.get_writer(output_video_path, fps=fps) as writer:
53
+ for frame in video_frames:
54
+ writer.append_data(frame)
55
+ return output_video_path
56
+
57
+
58
+ class DiffUsersVideoModel:
59
+ def __init__(
60
+ self,
61
+ model_uid: str,
62
+ model_path: str,
63
+ model_spec: "VideoModelFamilyV1",
64
+ **kwargs,
65
+ ):
66
+ self._model_uid = model_uid
67
+ self._model_path = model_path
68
+ self._model_spec = model_spec
69
+ self._model = None
70
+ self._kwargs = kwargs
71
+
72
+ @property
73
+ def model_spec(self):
74
+ return self._model_spec
75
+
76
+ def load(self):
77
+ import torch
78
+
79
+ torch_dtype = self._kwargs.get("torch_dtype")
80
+ if sys.platform != "darwin" and torch_dtype is None:
81
+ # The following params crashes on Mac M2
82
+ self._kwargs["torch_dtype"] = torch.float16
83
+ self._kwargs["variant"] = "fp16"
84
+ self._kwargs["use_safetensors"] = True
85
+ if isinstance(torch_dtype, str):
86
+ self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
87
+
88
+ if self._model_spec.model_family == "CogVideoX":
89
+ from diffusers import CogVideoXPipeline
90
+
91
+ self._model = CogVideoXPipeline.from_pretrained(
92
+ self._model_path, **self._kwargs
93
+ )
94
+ else:
95
+ raise Exception(
96
+ f"Unsupported model family: {self._model_spec.model_family}"
97
+ )
98
+
99
+ if self._kwargs.get("cpu_offload", False):
100
+ logger.debug("CPU offloading model")
101
+ self._model.enable_model_cpu_offload()
102
+ elif not self._kwargs.get("device_map"):
103
+ logger.debug("Loading model to available device")
104
+ self._model = move_model_to_available_device(self._model)
105
+ # Recommended if your computer has < 64 GB of RAM
106
+ self._model.enable_attention_slicing()
107
+
108
+ def text_to_video(
109
+ self,
110
+ prompt: str,
111
+ n: int = 1,
112
+ num_inference_steps: int = 50,
113
+ guidance_scale: int = 6,
114
+ response_format: str = "b64_json",
115
+ **kwargs,
116
+ ) -> VideoList:
117
+ import gc
118
+
119
+ # cv2 bug will cause the video cannot be normally displayed
120
+ # thus we use the imageio one
121
+ # from diffusers.utils import export_to_video
122
+ from ...device_utils import empty_cache
123
+
124
+ logger.debug(
125
+ "diffusers text_to_video args: %s",
126
+ kwargs,
127
+ )
128
+ assert self._model is not None
129
+ if self._kwargs.get("cpu_offload"):
130
+ # if enabled cpu offload,
131
+ # the model.device would be CPU
132
+ device = "cuda"
133
+ else:
134
+ device = self._model.device
135
+ prompt_embeds, _ = self._model.encode_prompt(
136
+ prompt=prompt,
137
+ do_classifier_free_guidance=True,
138
+ num_videos_per_prompt=n,
139
+ max_sequence_length=226,
140
+ device=device,
141
+ dtype=torch.float16,
142
+ )
143
+ assert callable(self._model)
144
+ output = self._model(
145
+ num_inference_steps=num_inference_steps,
146
+ guidance_scale=guidance_scale,
147
+ prompt_embeds=prompt_embeds,
148
+ **kwargs,
149
+ )
150
+
151
+ # clean cache
152
+ gc.collect()
153
+ empty_cache()
154
+
155
+ os.makedirs(XINFERENCE_VIDEO_DIR, exist_ok=True)
156
+ urls = []
157
+ for f in output.frames:
158
+ path = os.path.join(XINFERENCE_VIDEO_DIR, uuid.uuid4().hex + ".mp4")
159
+ p = export_to_video_imageio(f, path, fps=8)
160
+ urls.append(p)
161
+ if response_format == "url":
162
+ return VideoList(
163
+ created=int(time.time()),
164
+ data=[Video(url=url, b64_json=None) for url in urls],
165
+ )
166
+ elif response_format == "b64_json":
167
+
168
+ def _gen_base64_video(_video_url):
169
+ try:
170
+ with open(_video_url, "rb") as f:
171
+ return base64.b64encode(f.read()).decode()
172
+ finally:
173
+ os.remove(_video_url)
174
+
175
+ with ThreadPoolExecutor() as executor:
176
+ results = list(map(partial(executor.submit, _gen_base64_video), urls)) # type: ignore
177
+ video_list = [Video(url=None, b64_json=s.result()) for s in results]
178
+ return VideoList(created=int(time.time()), data=video_list)
179
+ else:
180
+ raise ValueError(f"Unsupported response format: {response_format}")
@@ -0,0 +1,11 @@
1
+ [
2
+ {
3
+ "model_name": "CogVideoX-2b",
4
+ "model_family": "CogVideoX",
5
+ "model_id": "THUDM/CogVideoX-2b",
6
+ "model_revision": "4bbfb1de622b80bc1b77b6e9aced75f816be0e38",
7
+ "model_ability": [
8
+ "text2video"
9
+ ]
10
+ }
11
+ ]
@@ -0,0 +1,12 @@
1
+ [
2
+ {
3
+ "model_name": "CogVideoX-2b",
4
+ "model_family": "CogVideoX",
5
+ "model_hub": "modelscope",
6
+ "model_id": "ZhipuAI/CogVideoX-2b",
7
+ "model_revision": "master",
8
+ "model_ability": [
9
+ "text2video"
10
+ ]
11
+ }
12
+ ]
xinference/types.py CHANGED
@@ -52,6 +52,16 @@ class ImageList(TypedDict):
52
52
  data: List[Image]
53
53
 
54
54
 
55
+ class Video(TypedDict):
56
+ url: Optional[str]
57
+ b64_json: Optional[str]
58
+
59
+
60
+ class VideoList(TypedDict):
61
+ created: int
62
+ data: List[Video]
63
+
64
+
55
65
  class EmbeddingUsage(TypedDict):
56
66
  prompt_tokens: int
57
67
  total_tokens: int
@@ -206,30 +216,6 @@ class ChatCompletionChunk(TypedDict):
206
216
  usage: NotRequired[CompletionUsage]
207
217
 
208
218
 
209
- class ChatglmCppModelConfig(TypedDict, total=False):
210
- pass
211
-
212
-
213
- class ChatglmCppGenerateConfig(TypedDict, total=False):
214
- max_tokens: int
215
- top_p: float
216
- temperature: float
217
- stream: bool
218
- lora_name: Optional[str]
219
- stream_options: Optional[Union[dict, None]]
220
-
221
-
222
- class QWenCppModelConfig(TypedDict, total=False):
223
- pass
224
-
225
-
226
- class QWenCppGenerateConfig(TypedDict, total=False):
227
- max_tokens: int
228
- top_p: float
229
- temperature: float
230
- stream: bool
231
-
232
-
233
219
  StoppingCriteria = Callable[[List[int], List[float]], bool]
234
220
 
235
221
 
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.4bafd904.css",
4
- "main.js": "./static/js/main.af906659.js",
4
+ "main.js": "./static/js/main.17ca0398.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.4bafd904.css.map": "./static/css/main.4bafd904.css.map",
8
- "main.af906659.js.map": "./static/js/main.af906659.js.map"
8
+ "main.17ca0398.js.map": "./static/js/main.17ca0398.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.4bafd904.css",
12
- "static/js/main.af906659.js"
12
+ "static/js/main.17ca0398.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.af906659.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.17ca0398.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>