xinference 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/__init__.py +13 -0
- xinference/api/oauth2/common.py +14 -0
- xinference/api/oauth2/core.py +93 -0
- xinference/api/oauth2/types.py +36 -0
- xinference/api/oauth2/utils.py +44 -0
- xinference/api/restful_api.py +216 -27
- xinference/client/oscar/actor_client.py +18 -18
- xinference/client/restful/restful_client.py +96 -33
- xinference/conftest.py +63 -1
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +143 -3
- xinference/core/metrics.py +83 -0
- xinference/core/model.py +244 -181
- xinference/core/status_guard.py +86 -0
- xinference/core/supervisor.py +57 -7
- xinference/core/worker.py +134 -13
- xinference/deploy/cmdline.py +142 -16
- xinference/deploy/local.py +39 -7
- xinference/deploy/supervisor.py +2 -0
- xinference/deploy/worker.py +33 -5
- xinference/fields.py +4 -1
- xinference/model/core.py +8 -1
- xinference/model/embedding/core.py +3 -2
- xinference/model/embedding/model_spec_modelscope.json +60 -18
- xinference/model/image/stable_diffusion/core.py +4 -3
- xinference/model/llm/__init__.py +7 -0
- xinference/model/llm/ggml/llamacpp.py +3 -2
- xinference/model/llm/llm_family.json +87 -3
- xinference/model/llm/llm_family.py +15 -5
- xinference/model/llm/llm_family_modelscope.json +92 -3
- xinference/model/llm/pytorch/chatglm.py +70 -28
- xinference/model/llm/pytorch/core.py +11 -30
- xinference/model/llm/pytorch/internlm2.py +155 -0
- xinference/model/llm/pytorch/utils.py +0 -153
- xinference/model/llm/utils.py +37 -8
- xinference/model/llm/vllm/core.py +15 -3
- xinference/model/multimodal/__init__.py +15 -8
- xinference/model/multimodal/core.py +8 -1
- xinference/model/multimodal/model_spec.json +9 -0
- xinference/model/multimodal/model_spec_modelscope.json +45 -0
- xinference/model/multimodal/qwen_vl.py +5 -9
- xinference/model/utils.py +7 -2
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.b83095c2.js +3 -0
- xinference/web/ui/build/static/js/{main.236e72e7.js.LICENSE.txt → main.b83095c2.js.LICENSE.txt} +7 -0
- xinference/web/ui/build/static/js/main.b83095c2.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0a853b2fa1902551e262a2f1a4b7894341f27b3dd9587f2ef7aaea195af89518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22858de5265f2d279fca9f2f54dfb147e4b2704200dfb5d2ad3ec9769417328f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27696db5fcd4fcf0e7974cadf1e4a2ab89690474045c3188eafd586323ad13bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27bdbe25deab8cf08f7fab8f05f8f26cf84a98809527a37986a4ab73a57ba96a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/30670751f55508ef3b861e13dd71b9e5a10d2561373357a12fc3831a0b77fd93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3605cd3a96ff2a3b443c70a101575482279ad26847924cab0684d165ba0d2492.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3789ef437d3ecbf945bb9cea39093d1f16ebbfa32dbe6daf35abcfb6d48de6f1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4d933e35e0fe79867d3aa6c46db28804804efddf5490347cb6c2c2879762a157.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4d96f071168af43965e0fab2ded658fa0a15b8d9ca03789a5ef9c5c16a4e3cee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/663adbcb60b942e9cf094c8d9fabe57517f5e5e6e722d28b4948a40b7445a3b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/666bb2e1b250dc731311a7e4880886177885dfa768508d2ed63e02630cc78725.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8b246d79cd3f6fc78f11777e6a6acca6a2c5d4ecce7f2dd4dcf9a48126440d3c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b4e4fccaf8f2489a29081f0bf3b191656bd452fb3c8b5e3c6d92d94f680964d5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b53eb7c7967f6577bd3e678293c44204fb03ffa7fdc1dd59d3099015c68f6f7f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06af85a84e5c5a29d3acf2dbb5b30c0cf75c8aec4ab5f975e6096f944ee4324.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d5e150bff31715977d8f537c970f06d4fe3de9909d7e8342244a83a9f6447121.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de36e5c08fd524e341d664883dda6cb1745acc852a4f1b011a35a0b4615f72fa.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f23ab356a8603d4a2aaa74388c2f381675c207d37c4d1c832df922e9655c9a6b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f7c23b0922f4087b9e2e3e46f15c946b772daa46c28c3a12426212ecaf481deb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +36 -0
- xinference/web/ui/node_modules/@types/cookie/package.json +30 -0
- xinference/web/ui/node_modules/@types/hoist-non-react-statics/package.json +33 -0
- xinference/web/ui/node_modules/react-cookie/package.json +55 -0
- xinference/web/ui/node_modules/universal-cookie/package.json +48 -0
- xinference/web/ui/package-lock.json +37 -0
- xinference/web/ui/package.json +3 -2
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/METADATA +17 -6
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/RECORD +101 -66
- xinference/web/ui/build/static/js/main.236e72e7.js +0 -3
- xinference/web/ui/build/static/js/main.236e72e7.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f3b6cc71b7c83bdc85aa4835927aeb86af2ce0d2ac241917ecfbf90f75c6d27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/42bb623f337ad08ed076484185726e072ca52bb88e373d72c7b052db4c273342.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/57af83639c604bd3362d0f03f7505e81c6f67ff77bee7c6bb31f6e5523eba185.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/66ed1bd4c06748c1b176a625c25c856997edc787856c73162f82f2b465c5d956.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/78f2521da2e2a98b075a2666cb782c7e2c019cd3c72199eecd5901c82d8655df.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9427ae7f1e94ae8dcd2333fb361e381f4054fde07394fe5448658e3417368476.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bcee2b4e76b07620f9087989eb86d43c645ba3c7a74132cf926260af1164af0e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cc2ddd02ccc1dad1a2737ac247c79e6f6ed2c7836c6b68e511e3048f666b64af.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d2e8e6665a7efc832b43907dadf4e3c896a59eaf8129f9a520882466c8f2e489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +0 -1
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0
|
@@ -12,11 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import base64
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
17
|
-
from
|
|
18
|
+
from io import BytesIO
|
|
19
|
+
from typing import Generator, List, Optional
|
|
18
20
|
|
|
19
21
|
import gradio as gr
|
|
22
|
+
import PIL.Image
|
|
20
23
|
from gradio.components import Markdown, Textbox
|
|
21
24
|
from gradio.layouts import Accordion, Column, Row
|
|
22
25
|
|
|
@@ -24,39 +27,48 @@ from ..client.restful.restful_client import (
|
|
|
24
27
|
RESTfulChatglmCppChatModelHandle,
|
|
25
28
|
RESTfulChatModelHandle,
|
|
26
29
|
RESTfulGenerateModelHandle,
|
|
30
|
+
RESTfulMultimodalModelHandle,
|
|
27
31
|
)
|
|
28
32
|
from ..types import ChatCompletionMessage
|
|
29
33
|
|
|
30
34
|
logger = logging.getLogger(__name__)
|
|
31
35
|
|
|
32
36
|
|
|
33
|
-
class
|
|
37
|
+
class GradioInterface:
|
|
34
38
|
def __init__(
|
|
35
39
|
self,
|
|
36
40
|
endpoint: str,
|
|
37
41
|
model_uid: str,
|
|
38
42
|
model_name: str,
|
|
39
43
|
model_size_in_billions: int,
|
|
44
|
+
model_type: str,
|
|
40
45
|
model_format: str,
|
|
41
46
|
quantization: str,
|
|
42
47
|
context_length: int,
|
|
43
48
|
model_ability: List[str],
|
|
44
49
|
model_description: str,
|
|
45
50
|
model_lang: List[str],
|
|
51
|
+
access_token: Optional[str],
|
|
46
52
|
):
|
|
47
53
|
self.endpoint = endpoint
|
|
48
54
|
self.model_uid = model_uid
|
|
49
55
|
self.model_name = model_name
|
|
50
56
|
self.model_size_in_billions = model_size_in_billions
|
|
57
|
+
self.model_type = model_type
|
|
51
58
|
self.model_format = model_format
|
|
52
59
|
self.quantization = quantization
|
|
53
60
|
self.context_length = context_length
|
|
54
61
|
self.model_ability = model_ability
|
|
55
62
|
self.model_description = model_description
|
|
56
63
|
self.model_lang = model_lang
|
|
64
|
+
self._access_token = (
|
|
65
|
+
access_token.replace("Bearer ", "") if access_token is not None else None
|
|
66
|
+
)
|
|
57
67
|
|
|
58
68
|
def build(self) -> "gr.Blocks":
|
|
59
|
-
if "
|
|
69
|
+
if self.model_type == "multimodal":
|
|
70
|
+
interface = self.build_chat_vl_interface()
|
|
71
|
+
elif "chat" in self.model_ability:
|
|
60
72
|
interface = self.build_chat_interface()
|
|
61
73
|
else:
|
|
62
74
|
interface = self.build_generate_interface()
|
|
@@ -102,6 +114,7 @@ class LLMInterface:
|
|
|
102
114
|
from ..client import RESTfulClient
|
|
103
115
|
|
|
104
116
|
client = RESTfulClient(self.endpoint)
|
|
117
|
+
client._set_token(self._access_token)
|
|
105
118
|
model = client.get_model(self.model_uid)
|
|
106
119
|
assert isinstance(
|
|
107
120
|
model, (RESTfulChatModelHandle, RESTfulChatglmCppChatModelHandle)
|
|
@@ -168,6 +181,131 @@ class LLMInterface:
|
|
|
168
181
|
analytics_enabled=False,
|
|
169
182
|
)
|
|
170
183
|
|
|
184
|
+
def build_chat_vl_interface(
|
|
185
|
+
self,
|
|
186
|
+
) -> "gr.Blocks":
|
|
187
|
+
def predict(history, bot):
|
|
188
|
+
logger.debug("Predict model: %s, history: %s", self.model_uid, history)
|
|
189
|
+
from ..client import RESTfulClient
|
|
190
|
+
|
|
191
|
+
client = RESTfulClient(self.endpoint)
|
|
192
|
+
client._set_token(self._access_token)
|
|
193
|
+
model = client.get_model(self.model_uid)
|
|
194
|
+
assert isinstance(model, RESTfulMultimodalModelHandle)
|
|
195
|
+
|
|
196
|
+
prompt = history[-1]
|
|
197
|
+
assert prompt["role"] == "user"
|
|
198
|
+
prompt = prompt["content"]
|
|
199
|
+
# multimodal chat does not support stream.
|
|
200
|
+
response = model.chat(prompt=prompt, chat_history=history[:-1])
|
|
201
|
+
history.append(response["choices"][0]["message"])
|
|
202
|
+
bot[-1][1] = history[-1]["content"]
|
|
203
|
+
return history, bot
|
|
204
|
+
|
|
205
|
+
def add_text(history, bot, text, image):
|
|
206
|
+
logger.debug("Add text, text: %s, image: %s", text, image)
|
|
207
|
+
if image:
|
|
208
|
+
buffered = BytesIO()
|
|
209
|
+
with PIL.Image.open(image) as img:
|
|
210
|
+
img.thumbnail((500, 500))
|
|
211
|
+
img.save(buffered, format="JPEG")
|
|
212
|
+
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
|
|
213
|
+
display_content = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />\n{text}'
|
|
214
|
+
message = {
|
|
215
|
+
"role": "user",
|
|
216
|
+
"content": [
|
|
217
|
+
{"type": "text", "text": text},
|
|
218
|
+
{"type": "image_url", "image_url": {"url": image}},
|
|
219
|
+
],
|
|
220
|
+
}
|
|
221
|
+
else:
|
|
222
|
+
display_content = text
|
|
223
|
+
message = {"role": "user", "content": text}
|
|
224
|
+
history = history + [message]
|
|
225
|
+
bot = bot + [(display_content, None)]
|
|
226
|
+
return history, bot, "", None
|
|
227
|
+
|
|
228
|
+
def clear_history():
|
|
229
|
+
logger.debug("Clear history.")
|
|
230
|
+
return [], None, "", None
|
|
231
|
+
|
|
232
|
+
def update_button(text):
|
|
233
|
+
return gr.update(interactive=bool(text))
|
|
234
|
+
|
|
235
|
+
with gr.Blocks(
|
|
236
|
+
title=f"🚀 Xinference Chat Bot : {self.model_name} 🚀",
|
|
237
|
+
css="""
|
|
238
|
+
.center{
|
|
239
|
+
display: flex;
|
|
240
|
+
justify-content: center;
|
|
241
|
+
align-items: center;
|
|
242
|
+
padding: 0px;
|
|
243
|
+
color: #9ea4b0 !important;
|
|
244
|
+
}
|
|
245
|
+
""",
|
|
246
|
+
analytics_enabled=False,
|
|
247
|
+
) as chat_vl_interface:
|
|
248
|
+
Markdown(
|
|
249
|
+
f"""
|
|
250
|
+
<h1 style='text-align: center; margin-bottom: 1rem'>🚀 Xinference Chat Bot : {self.model_name} 🚀</h1>
|
|
251
|
+
"""
|
|
252
|
+
)
|
|
253
|
+
Markdown(
|
|
254
|
+
f"""
|
|
255
|
+
<div class="center">
|
|
256
|
+
Model ID: {self.model_uid}
|
|
257
|
+
</div>
|
|
258
|
+
<div class="center">
|
|
259
|
+
Model Size: {self.model_size_in_billions} Billion Parameters
|
|
260
|
+
</div>
|
|
261
|
+
<div class="center">
|
|
262
|
+
Model Format: {self.model_format}
|
|
263
|
+
</div>
|
|
264
|
+
<div class="center">
|
|
265
|
+
Model Quantization: {self.quantization}
|
|
266
|
+
</div>
|
|
267
|
+
"""
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
state = gr.State([])
|
|
271
|
+
with gr.Row():
|
|
272
|
+
chatbot = gr.Chatbot(
|
|
273
|
+
elem_id="chatbot", label=self.model_name, height=550, scale=7
|
|
274
|
+
)
|
|
275
|
+
with gr.Column(scale=3):
|
|
276
|
+
imagebox = gr.Image(type="filepath")
|
|
277
|
+
textbox = gr.Textbox(
|
|
278
|
+
show_label=False,
|
|
279
|
+
placeholder="Enter text and press ENTER",
|
|
280
|
+
container=False,
|
|
281
|
+
)
|
|
282
|
+
submit_btn = gr.Button(
|
|
283
|
+
value="Send", variant="primary", interactive=False
|
|
284
|
+
)
|
|
285
|
+
clear_btn = gr.Button(value="Clear")
|
|
286
|
+
|
|
287
|
+
textbox.change(update_button, [textbox], [submit_btn], queue=False)
|
|
288
|
+
|
|
289
|
+
textbox.submit(
|
|
290
|
+
add_text,
|
|
291
|
+
[state, chatbot, textbox, imagebox],
|
|
292
|
+
[state, chatbot, textbox, imagebox],
|
|
293
|
+
queue=False,
|
|
294
|
+
).then(predict, [state, chatbot], [state, chatbot])
|
|
295
|
+
|
|
296
|
+
submit_btn.click(
|
|
297
|
+
add_text,
|
|
298
|
+
[state, chatbot, textbox, imagebox],
|
|
299
|
+
[state, chatbot, textbox, imagebox],
|
|
300
|
+
queue=False,
|
|
301
|
+
).then(predict, [state, chatbot], [state, chatbot])
|
|
302
|
+
|
|
303
|
+
clear_btn.click(
|
|
304
|
+
clear_history, None, [state, chatbot, textbox, imagebox], queue=False
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
return chat_vl_interface
|
|
308
|
+
|
|
171
309
|
def build_generate_interface(
|
|
172
310
|
self,
|
|
173
311
|
):
|
|
@@ -198,6 +336,7 @@ class LLMInterface:
|
|
|
198
336
|
from ..client import RESTfulClient
|
|
199
337
|
|
|
200
338
|
client = RESTfulClient(self.endpoint)
|
|
339
|
+
client._set_token(self._access_token)
|
|
201
340
|
model = client.get_model(self.model_uid)
|
|
202
341
|
assert isinstance(model, RESTfulGenerateModelHandle)
|
|
203
342
|
|
|
@@ -234,6 +373,7 @@ class LLMInterface:
|
|
|
234
373
|
from ..client import RESTfulClient
|
|
235
374
|
|
|
236
375
|
client = RESTfulClient(self.endpoint)
|
|
376
|
+
client._set_token(self._access_token)
|
|
237
377
|
model = client.get_model(self.model_uid)
|
|
238
378
|
assert isinstance(model, RESTfulGenerateModelHandle)
|
|
239
379
|
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
|
|
17
|
+
import uvicorn
|
|
18
|
+
from aioprometheus import Counter, Gauge
|
|
19
|
+
from aioprometheus.asgi.starlette import metrics
|
|
20
|
+
from fastapi import FastAPI
|
|
21
|
+
from fastapi.responses import RedirectResponse
|
|
22
|
+
|
|
23
|
+
DEFAULT_METRICS_SERVER_LOG_LEVEL = "warning"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
generate_throughput = Gauge(
|
|
27
|
+
"xinference:generate_tokens_per_s", "Generate throughput in tokens/s."
|
|
28
|
+
)
|
|
29
|
+
# Latency
|
|
30
|
+
time_to_first_token = Gauge(
|
|
31
|
+
"xinference:time_to_first_token_ms", "First token latency in ms."
|
|
32
|
+
)
|
|
33
|
+
# Tokens counter
|
|
34
|
+
input_tokens_total_counter = Counter(
|
|
35
|
+
"xinference:input_tokens_total_counter", "Total number of input tokens."
|
|
36
|
+
)
|
|
37
|
+
output_tokens_total_counter = Counter(
|
|
38
|
+
"xinference:output_tokens_total_counter", "Total number of output tokens."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def record_metrics(name, op, kwargs):
|
|
43
|
+
collector = globals().get(name)
|
|
44
|
+
getattr(collector, op)(**kwargs)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def launch_metrics_export_server(q, host=None, port=None):
|
|
48
|
+
app = FastAPI()
|
|
49
|
+
app.add_route("/metrics", metrics)
|
|
50
|
+
|
|
51
|
+
@app.get("/")
|
|
52
|
+
async def root():
|
|
53
|
+
response = RedirectResponse(url="/metrics")
|
|
54
|
+
return response
|
|
55
|
+
|
|
56
|
+
async def main():
|
|
57
|
+
if host is not None and port is not None:
|
|
58
|
+
config = uvicorn.Config(
|
|
59
|
+
app, host=host, port=port, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL
|
|
60
|
+
)
|
|
61
|
+
elif host is not None:
|
|
62
|
+
config = uvicorn.Config(
|
|
63
|
+
app, host=host, port=0, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL
|
|
64
|
+
)
|
|
65
|
+
elif port is not None:
|
|
66
|
+
config = uvicorn.Config(
|
|
67
|
+
app, port=port, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
config = uvicorn.Config(app, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL)
|
|
71
|
+
|
|
72
|
+
server = uvicorn.Server(config)
|
|
73
|
+
task = asyncio.create_task(server.serve())
|
|
74
|
+
|
|
75
|
+
while not server.started and not task.done():
|
|
76
|
+
await asyncio.sleep(0.1)
|
|
77
|
+
|
|
78
|
+
for server in server.servers:
|
|
79
|
+
for socket in server.sockets:
|
|
80
|
+
q.put(socket.getsockname())
|
|
81
|
+
await task
|
|
82
|
+
|
|
83
|
+
asyncio.run(main())
|