xinference 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (120) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/__init__.py +13 -0
  3. xinference/api/oauth2/common.py +14 -0
  4. xinference/api/oauth2/core.py +93 -0
  5. xinference/api/oauth2/types.py +36 -0
  6. xinference/api/oauth2/utils.py +44 -0
  7. xinference/api/restful_api.py +216 -27
  8. xinference/client/oscar/actor_client.py +18 -18
  9. xinference/client/restful/restful_client.py +96 -33
  10. xinference/conftest.py +63 -1
  11. xinference/constants.py +1 -0
  12. xinference/core/chat_interface.py +143 -3
  13. xinference/core/metrics.py +83 -0
  14. xinference/core/model.py +244 -181
  15. xinference/core/status_guard.py +86 -0
  16. xinference/core/supervisor.py +57 -7
  17. xinference/core/worker.py +134 -13
  18. xinference/deploy/cmdline.py +142 -16
  19. xinference/deploy/local.py +39 -7
  20. xinference/deploy/supervisor.py +2 -0
  21. xinference/deploy/worker.py +33 -5
  22. xinference/fields.py +4 -1
  23. xinference/model/core.py +8 -1
  24. xinference/model/embedding/core.py +3 -2
  25. xinference/model/embedding/model_spec_modelscope.json +60 -18
  26. xinference/model/image/stable_diffusion/core.py +4 -3
  27. xinference/model/llm/__init__.py +7 -0
  28. xinference/model/llm/ggml/llamacpp.py +3 -2
  29. xinference/model/llm/llm_family.json +87 -3
  30. xinference/model/llm/llm_family.py +15 -5
  31. xinference/model/llm/llm_family_modelscope.json +92 -3
  32. xinference/model/llm/pytorch/chatglm.py +70 -28
  33. xinference/model/llm/pytorch/core.py +11 -30
  34. xinference/model/llm/pytorch/internlm2.py +155 -0
  35. xinference/model/llm/pytorch/utils.py +0 -153
  36. xinference/model/llm/utils.py +37 -8
  37. xinference/model/llm/vllm/core.py +15 -3
  38. xinference/model/multimodal/__init__.py +15 -8
  39. xinference/model/multimodal/core.py +8 -1
  40. xinference/model/multimodal/model_spec.json +9 -0
  41. xinference/model/multimodal/model_spec_modelscope.json +45 -0
  42. xinference/model/multimodal/qwen_vl.py +5 -9
  43. xinference/model/utils.py +7 -2
  44. xinference/types.py +2 -0
  45. xinference/web/ui/build/asset-manifest.json +3 -3
  46. xinference/web/ui/build/index.html +1 -1
  47. xinference/web/ui/build/static/js/main.b83095c2.js +3 -0
  48. xinference/web/ui/build/static/js/{main.236e72e7.js.LICENSE.txt → main.b83095c2.js.LICENSE.txt} +7 -0
  49. xinference/web/ui/build/static/js/main.b83095c2.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/0a853b2fa1902551e262a2f1a4b7894341f27b3dd9587f2ef7aaea195af89518.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/22858de5265f2d279fca9f2f54dfb147e4b2704200dfb5d2ad3ec9769417328f.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/27696db5fcd4fcf0e7974cadf1e4a2ab89690474045c3188eafd586323ad13bb.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/27bdbe25deab8cf08f7fab8f05f8f26cf84a98809527a37986a4ab73a57ba96a.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/30670751f55508ef3b861e13dd71b9e5a10d2561373357a12fc3831a0b77fd93.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/3605cd3a96ff2a3b443c70a101575482279ad26847924cab0684d165ba0d2492.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/3789ef437d3ecbf945bb9cea39093d1f16ebbfa32dbe6daf35abcfb6d48de6f1.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/4d933e35e0fe79867d3aa6c46db28804804efddf5490347cb6c2c2879762a157.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/4d96f071168af43965e0fab2ded658fa0a15b8d9ca03789a5ef9c5c16a4e3cee.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/663adbcb60b942e9cf094c8d9fabe57517f5e5e6e722d28b4948a40b7445a3b8.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/666bb2e1b250dc731311a7e4880886177885dfa768508d2ed63e02630cc78725.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/8b246d79cd3f6fc78f11777e6a6acca6a2c5d4ecce7f2dd4dcf9a48126440d3c.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/b4e4fccaf8f2489a29081f0bf3b191656bd452fb3c8b5e3c6d92d94f680964d5.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/b53eb7c7967f6577bd3e678293c44204fb03ffa7fdc1dd59d3099015c68f6f7f.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/d06af85a84e5c5a29d3acf2dbb5b30c0cf75c8aec4ab5f975e6096f944ee4324.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/d5e150bff31715977d8f537c970f06d4fe3de9909d7e8342244a83a9f6447121.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/de36e5c08fd524e341d664883dda6cb1745acc852a4f1b011a35a0b4615f72fa.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/f23ab356a8603d4a2aaa74388c2f381675c207d37c4d1c832df922e9655c9a6b.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/f7c23b0922f4087b9e2e3e46f15c946b772daa46c28c3a12426212ecaf481deb.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +1 -0
  88. xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +1 -0
  89. xinference/web/ui/node_modules/.package-lock.json +36 -0
  90. xinference/web/ui/node_modules/@types/cookie/package.json +30 -0
  91. xinference/web/ui/node_modules/@types/hoist-non-react-statics/package.json +33 -0
  92. xinference/web/ui/node_modules/react-cookie/package.json +55 -0
  93. xinference/web/ui/node_modules/universal-cookie/package.json +48 -0
  94. xinference/web/ui/package-lock.json +37 -0
  95. xinference/web/ui/package.json +3 -2
  96. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/METADATA +17 -6
  97. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/RECORD +101 -66
  98. xinference/web/ui/build/static/js/main.236e72e7.js +0 -3
  99. xinference/web/ui/build/static/js/main.236e72e7.js.map +0 -1
  100. xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +0 -1
  101. xinference/web/ui/node_modules/.cache/babel-loader/0f3b6cc71b7c83bdc85aa4835927aeb86af2ce0d2ac241917ecfbf90f75c6d27.json +0 -1
  102. xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +0 -1
  103. xinference/web/ui/node_modules/.cache/babel-loader/42bb623f337ad08ed076484185726e072ca52bb88e373d72c7b052db4c273342.json +0 -1
  104. xinference/web/ui/node_modules/.cache/babel-loader/57af83639c604bd3362d0f03f7505e81c6f67ff77bee7c6bb31f6e5523eba185.json +0 -1
  105. xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +0 -1
  106. xinference/web/ui/node_modules/.cache/babel-loader/66ed1bd4c06748c1b176a625c25c856997edc787856c73162f82f2b465c5d956.json +0 -1
  107. xinference/web/ui/node_modules/.cache/babel-loader/78f2521da2e2a98b075a2666cb782c7e2c019cd3c72199eecd5901c82d8655df.json +0 -1
  108. xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +0 -1
  109. xinference/web/ui/node_modules/.cache/babel-loader/9427ae7f1e94ae8dcd2333fb361e381f4054fde07394fe5448658e3417368476.json +0 -1
  110. xinference/web/ui/node_modules/.cache/babel-loader/bcee2b4e76b07620f9087989eb86d43c645ba3c7a74132cf926260af1164af0e.json +0 -1
  111. xinference/web/ui/node_modules/.cache/babel-loader/cc2ddd02ccc1dad1a2737ac247c79e6f6ed2c7836c6b68e511e3048f666b64af.json +0 -1
  112. xinference/web/ui/node_modules/.cache/babel-loader/d2e8e6665a7efc832b43907dadf4e3c896a59eaf8129f9a520882466c8f2e489.json +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +0 -1
  117. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
  118. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
  119. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
  120. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0
@@ -94,6 +94,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
94
94
  "code-llama-python",
95
95
  "code-llama-instruct",
96
96
  "mistral-instruct-v0.1",
97
+ "mistral-instruct-v0.2",
97
98
  "chatglm3",
98
99
  ]
99
100
 
@@ -170,7 +171,7 @@ class VLLMModel(LLM):
170
171
  )
171
172
  sanitized.setdefault("temperature", generate_config.get("temperature", 1.0))
172
173
  sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
173
- sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 16))
174
+ sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 1024))
174
175
  sanitized.setdefault("stop", generate_config.get("stop", None))
175
176
  sanitized.setdefault(
176
177
  "stop_token_ids", generate_config.get("stop_token_ids", None)
@@ -303,6 +304,16 @@ class VLLMModel(LLM):
303
304
  delta = choice["text"][len(previous_texts[i]) :]
304
305
  previous_texts[i] = choice["text"]
305
306
  choice["text"] = delta
307
+ prompt_tokens = len(_request_output.prompt_token_ids)
308
+ completion_tokens = sum(
309
+ len(output.token_ids) for output in _request_output.outputs
310
+ )
311
+ total_tokens = prompt_tokens + completion_tokens
312
+ chunk["usage"] = CompletionUsage(
313
+ prompt_tokens=prompt_tokens,
314
+ completion_tokens=completion_tokens,
315
+ total_tokens=total_tokens,
316
+ )
306
317
  yield chunk
307
318
 
308
319
  if stream:
@@ -379,7 +390,8 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
379
390
 
380
391
  generate_config = self._sanitize_chat_config(generate_config)
381
392
  # TODO(codingl2k1): qwen hacky to set stop for function call.
382
- if tools and self.model_family.model_name == "qwen-chat":
393
+ model_family = self.model_family.model_family or self.model_family.model_name
394
+ if tools and "qwen-chat" == model_family:
383
395
  stop = generate_config.get("stop")
384
396
  if isinstance(stop, str):
385
397
  generate_config["stop"] = [stop, "Observation:"]
@@ -400,6 +412,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
400
412
  assert not isinstance(c, AsyncGenerator)
401
413
  if tools:
402
414
  return self._tool_calls_completion(
403
- self.model_family.model_name, self.model_uid, c, tools
415
+ self.model_family, self.model_uid, c, tools
404
416
  )
405
417
  return self._to_chat_completion(c)
@@ -30,16 +30,23 @@ MODEL_CLASSES.append(QwenVLChat)
30
30
 
31
31
 
32
32
  def _install():
33
- json_path = os.path.join(
33
+ json_path_huggingface = os.path.join(
34
34
  os.path.dirname(os.path.abspath(__file__)), "model_spec.json"
35
35
  )
36
- for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
37
- model_family = LVLMFamilyV1.parse_obj(json_obj)
38
- BUILTIN_LVLM_FAMILIES.append(model_family)
39
- for model_spec in model_family.model_specs:
40
- MODEL_NAME_TO_REVISION[model_family.model_name].append(
41
- model_spec.model_revision
42
- )
36
+ json_path_modelscope = os.path.join(
37
+ os.path.dirname(os.path.abspath(__file__)), "model_spec_modelscope.json"
38
+ )
39
+ for builtin_family, json_path in [
40
+ (BUILTIN_LVLM_FAMILIES, json_path_huggingface),
41
+ (BUILTIN_MODELSCOPE_LVLM_FAMILIES, json_path_modelscope),
42
+ ]:
43
+ for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
44
+ model_family = LVLMFamilyV1.parse_obj(json_obj)
45
+ builtin_family.append(model_family)
46
+ for model_spec in model_family.model_specs:
47
+ MODEL_NAME_TO_REVISION[model_family.model_name].append(
48
+ model_spec.model_revision
49
+ )
43
50
 
44
51
 
45
52
  _install()
@@ -203,6 +203,8 @@ def match_multimodal(
203
203
  and matched_quantization is None
204
204
  ):
205
205
  continue
206
+ # Copy spec to avoid _apply_format_to_model_id modify the original spec.
207
+ spec = spec.copy()
206
208
  if quantization:
207
209
  return (
208
210
  family,
@@ -328,6 +330,11 @@ def _skip_download(
328
330
  logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
329
331
  return True
330
332
  return False
333
+ elif model_format in ["ggmlv3", "ggufv2", "gptq"]:
334
+ assert quantization is not None
335
+ return os.path.exists(
336
+ _get_meta_path(cache_dir, model_format, model_hub, quantization)
337
+ )
331
338
  else:
332
339
  raise ValueError(f"Unsupported format: {model_format}")
333
340
 
@@ -414,7 +421,7 @@ def cache_from_huggingface(
414
421
  ):
415
422
  return cache_dir
416
423
 
417
- if model_spec.model_format in ["pytorch"]:
424
+ if model_spec.model_format in ["pytorch", "gptq"]:
418
425
  assert isinstance(model_spec, LVLMSpecV1)
419
426
  retry_download(
420
427
  huggingface_hub.snapshot_download,
@@ -20,6 +20,15 @@
20
20
  ],
21
21
  "model_id": "Qwen/Qwen-VL-Chat",
22
22
  "model_revision": "6665c780ade5ff3f08853b4262dcb9c8f9598d42"
23
+ },
24
+ {
25
+ "model_format": "gptq",
26
+ "model_size_in_billions": 7,
27
+ "quantizations": [
28
+ "Int4"
29
+ ],
30
+ "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
31
+ "model_revision": "5d3a5aa033ed2c502300d426c81cc5b13bcd1409"
23
32
  }
24
33
  ],
25
34
  "prompt_style": {
@@ -0,0 +1,45 @@
1
+ [
2
+ {
3
+ "version": 1,
4
+ "context_length": 4096,
5
+ "model_name": "qwen-vl-chat",
6
+ "model_lang": [
7
+ "en",
8
+ "zh"
9
+ ],
10
+ "model_ability": [
11
+ "chat"
12
+ ],
13
+ "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
14
+ "model_specs": [
15
+ {
16
+ "model_format": "pytorch",
17
+ "model_size_in_billions": 7,
18
+ "quantizations": [
19
+ "none"
20
+ ],
21
+ "model_hub": "modelscope",
22
+ "model_id": "Qwen/Qwen-VL-Chat",
23
+ "model_revision": "master"
24
+ },
25
+ {
26
+ "model_format": "gptq",
27
+ "model_size_in_billions": 7,
28
+ "quantizations": [
29
+ "Int4"
30
+ ],
31
+ "model_hub": "modelscope",
32
+ "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
33
+ "model_revision": "master"
34
+ }
35
+ ],
36
+ "prompt_style": {
37
+ "style_name": "QWEN",
38
+ "system_prompt": "You are a helpful assistant.",
39
+ "roles": [
40
+ "user",
41
+ "assistant"
42
+ ]
43
+ }
44
+ }
45
+ ]
@@ -18,7 +18,6 @@ import tempfile
18
18
  import time
19
19
  import uuid
20
20
  from typing import Dict, Iterator, List, Optional, Union
21
- from urllib.parse import urlparse
22
21
 
23
22
  from ...types import (
24
23
  ChatCompletion,
@@ -73,14 +72,7 @@ class QwenVLChat(LVLM):
73
72
 
74
73
  def _message_content_to_qwen(self, content) -> str:
75
74
  def _ensure_url(_url):
76
- try:
77
- if _url.startswith("data:"):
78
- raise "Not a valid url."
79
- parsed = urlparse(_url)
80
- if not parsed.scheme:
81
- raise "Not a valid url."
82
- return _url
83
- except Exception:
75
+ if _url.startswith("data:"):
84
76
  logging.info("Parse url by base64 decoder.")
85
77
  # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
86
78
  # e.g. f"data:image/jpeg;base64,{base64_image}"
@@ -93,6 +85,10 @@ class QwenVLChat(LVLM):
93
85
  f.write(data)
94
86
  logging.info("Dump base64 data to %s", f.name)
95
87
  return f.name
88
+ else:
89
+ if len(_url) > 2048:
90
+ raise Exception(f"Image url is too long, {len(_url)} > 2048.")
91
+ return _url
96
92
 
97
93
  if not isinstance(content, str):
98
94
  # TODO(codingl2k1): Optimize _ensure_url
xinference/model/utils.py CHANGED
@@ -153,8 +153,13 @@ def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
153
153
 
154
154
 
155
155
  def is_valid_model_name(model_name: str) -> bool:
156
- model_name = model_name.strip()
157
- return 0 < len(model_name) <= 100
156
+ import re
157
+
158
+ if len(model_name) == 0:
159
+ return False
160
+
161
+ # check if contains +/?%#&=\s
162
+ return re.match(r"^[^+\/?%#&=\s]*$", model_name) is not None
158
163
 
159
164
 
160
165
  def parse_uri(uri: str) -> Tuple[str, str]:
xinference/types.py CHANGED
@@ -110,6 +110,7 @@ class CompletionChunk(TypedDict):
110
110
  created: int
111
111
  model: str
112
112
  choices: List[CompletionChoice]
113
+ usage: NotRequired[CompletionUsage]
113
114
 
114
115
 
115
116
  class Completion(TypedDict):
@@ -160,6 +161,7 @@ class ChatCompletionChunk(TypedDict):
160
161
  object: Literal["chat.completion.chunk"]
161
162
  created: int
162
163
  choices: List[ChatCompletionChunkChoice]
164
+ usage: NotRequired[CompletionUsage]
163
165
 
164
166
 
165
167
  class ChatglmCppModelConfig(TypedDict, total=False):
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "files": {
3
- "main.js": "./static/js/main.236e72e7.js",
3
+ "main.js": "./static/js/main.b83095c2.js",
4
4
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
5
5
  "index.html": "./index.html",
6
- "main.236e72e7.js.map": "./static/js/main.236e72e7.js.map"
6
+ "main.b83095c2.js.map": "./static/js/main.b83095c2.js.map"
7
7
  },
8
8
  "entrypoints": [
9
- "static/js/main.236e72e7.js"
9
+ "static/js/main.b83095c2.js"
10
10
  ]
11
11
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.236e72e7.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.b83095c2.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>