xinference 1.2.2__py3-none-any.whl → 1.3.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (68) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/restful_client.py +9 -1
  3. xinference/core/model.py +19 -0
  4. xinference/core/resource.py +7 -1
  5. xinference/core/status_guard.py +1 -0
  6. xinference/core/supervisor.py +228 -19
  7. xinference/core/utils.py +1 -29
  8. xinference/core/worker.py +28 -2
  9. xinference/deploy/cmdline.py +33 -3
  10. xinference/deploy/test/test_cmdline.py +32 -0
  11. xinference/device_utils.py +43 -1
  12. xinference/model/audio/kokoro.py +19 -36
  13. xinference/model/audio/model_spec.json +1 -1
  14. xinference/model/image/stable_diffusion/core.py +15 -6
  15. xinference/model/llm/llm_family.json +521 -6
  16. xinference/model/llm/llm_family.py +3 -1
  17. xinference/model/llm/llm_family_modelscope.json +559 -6
  18. xinference/model/llm/reasoning_parsers/__init__.py +13 -0
  19. xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
  20. xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
  21. xinference/model/llm/sglang/core.py +99 -11
  22. xinference/model/llm/transformers/intern_vl.py +23 -14
  23. xinference/model/llm/utils.py +55 -18
  24. xinference/model/llm/vllm/core.py +23 -2
  25. xinference/model/llm/vllm/xavier/executor.py +2 -2
  26. xinference/model/llm/vllm/xavier/scheduler.py +3 -3
  27. xinference/thirdparty/internvl/conversation.py +26 -17
  28. xinference/types.py +2 -0
  29. xinference/web/ui/build/asset-manifest.json +6 -6
  30. xinference/web/ui/build/index.html +1 -1
  31. xinference/web/ui/build/static/css/main.f8177338.css +2 -0
  32. xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
  33. xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
  34. xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
  44. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
  47. xinference/web/ui/src/locales/en.json +14 -1
  48. xinference/web/ui/src/locales/zh.json +14 -1
  49. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/METADATA +11 -11
  50. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/RECORD +55 -49
  51. xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
  52. xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
  53. xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
  54. xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
  55. xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
  57. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
  60. xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
  61. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
  62. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
  63. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
  64. /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
  65. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/LICENSE +0 -0
  66. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/WHEEL +0 -0
  67. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/entry_points.txt +0 -0
  68. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/top_level.txt +0 -0
@@ -770,11 +770,17 @@ def remove_cache(
770
770
  type=int,
771
771
  help="The replica count of the model, default is 1.",
772
772
  )
773
+ @click.option(
774
+ "--n-worker",
775
+ default=1,
776
+ type=int,
777
+ help="The number of workers used by the model, default is 1.",
778
+ )
773
779
  @click.option(
774
780
  "--n-gpu",
775
781
  default="auto",
776
782
  type=str,
777
- help='The number of GPUs used by the model, default is "auto".',
783
+ help='The number of GPUs used by the model, if n_worker>1, means number of GPUs per worker, default is "auto".',
778
784
  )
779
785
  @click.option(
780
786
  "--lora-modules",
@@ -815,6 +821,12 @@ def remove_cache(
815
821
  type=bool,
816
822
  help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
817
823
  )
824
+ @click.option(
825
+ "--reasoning-content",
826
+ default=False,
827
+ type=bool,
828
+ help="Whether or not to enable reasoning content in model responses.",
829
+ )
818
830
  @click.option(
819
831
  "--api-key",
820
832
  "-ak",
@@ -822,6 +834,7 @@ def remove_cache(
822
834
  type=str,
823
835
  help="Api-Key for access xinference api with authorization.",
824
836
  )
837
+ @click.option("--model-path", "-mp", default=None, type=str, help="Model path to run.")
825
838
  @click.pass_context
826
839
  def model_launch(
827
840
  ctx,
@@ -834,6 +847,7 @@ def model_launch(
834
847
  model_format: str,
835
848
  quantization: str,
836
849
  replica: int,
850
+ n_worker: int,
837
851
  n_gpu: str,
838
852
  lora_modules: Optional[Tuple],
839
853
  image_lora_load_kwargs: Optional[Tuple],
@@ -841,15 +855,28 @@ def model_launch(
841
855
  worker_ip: Optional[str],
842
856
  gpu_idx: Optional[str],
843
857
  trust_remote_code: bool,
858
+ reasoning_content: bool,
844
859
  api_key: Optional[str],
860
+ model_path: Optional[str],
845
861
  ):
846
862
  kwargs = {}
847
863
  for i in range(0, len(ctx.args), 2):
848
864
  if not ctx.args[i].startswith("--"):
849
865
  raise ValueError(
850
- f"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is {ctx.args[i]}."
866
+ f"You must specify extra kwargs with `--` prefix. "
867
+ f"There is an error in parameter passing that is {ctx.args[i]}."
851
868
  )
852
- kwargs[ctx.args[i][2:]] = handle_click_args_type(ctx.args[i + 1])
869
+ param_name = ctx.args[i][2:]
870
+ param_value = handle_click_args_type(ctx.args[i + 1])
871
+ if param_name == "model_path":
872
+ # fix for --model_path which is the old fashion to set model_path,
873
+ # now model_path is a builtin option, try to make it compatible
874
+ if model_path is None:
875
+ model_path = param_value
876
+ continue
877
+ else:
878
+ raise ValueError("Cannot set both for --model-path and --model_path")
879
+ kwargs[param_name] = param_value
853
880
  print(f"Launch model name: {model_name} with kwargs: {kwargs}", file=sys.stderr)
854
881
 
855
882
  if model_type == "LLM" and model_engine is None:
@@ -914,11 +941,14 @@ def model_launch(
914
941
  model_format=model_format,
915
942
  quantization=quantization,
916
943
  replica=replica,
944
+ n_worker=n_worker,
917
945
  n_gpu=_n_gpu,
918
946
  peft_model_config=peft_model_config,
919
947
  worker_ip=worker_ip,
920
948
  gpu_idx=_gpu_idx,
921
949
  trust_remote_code=trust_remote_code,
950
+ model_path=model_path,
951
+ reasoning_content=reasoning_content,
922
952
  **kwargs,
923
953
  )
924
954
 
@@ -147,6 +147,38 @@ def test_cmdline(setup, stream, model_uid):
147
147
  assert model_uid not in result.stdout
148
148
 
149
149
 
150
+ def test_cmdline_model_path_error(setup):
151
+ endpoint, _ = setup
152
+ runner = CliRunner(mix_stderr=False)
153
+
154
+ # launch model
155
+ result = runner.invoke(
156
+ model_launch,
157
+ [
158
+ "--endpoint",
159
+ endpoint,
160
+ "--model-name",
161
+ "tiny-llama",
162
+ "--size-in-billions",
163
+ 1,
164
+ "--model-format",
165
+ "ggufv2",
166
+ "--quantization",
167
+ "Q2_K",
168
+ "--model-path",
169
+ "/path/to/model",
170
+ "--model_path",
171
+ "/path/to/model",
172
+ ],
173
+ )
174
+ assert result.exit_code > 0
175
+ with pytest.raises(
176
+ ValueError, match="Cannot set both for --model-path and --model_path"
177
+ ):
178
+ t, e, tb = result.exc_info
179
+ raise e.with_traceback(tb)
180
+
181
+
150
182
  def test_cmdline_of_custom_model(setup):
151
183
  endpoint, _ = setup
152
184
  runner = CliRunner()
@@ -13,9 +13,9 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import os
16
+ from typing import Dict, Literal, Union
16
17
 
17
18
  import torch
18
- from typing_extensions import Literal, Union
19
19
 
20
20
  DeviceType = Literal["cuda", "mps", "xpu", "npu", "cpu"]
21
21
  DEVICE_TO_ENV_NAME = {
@@ -122,3 +122,45 @@ def gpu_count():
122
122
  return torch.npu.device_count()
123
123
  else:
124
124
  return 0
125
+
126
+
127
+ def _get_nvidia_gpu_mem_info(gpu_id: int) -> Dict[str, float]:
128
+ from pynvml import (
129
+ nvmlDeviceGetHandleByIndex,
130
+ nvmlDeviceGetMemoryInfo,
131
+ nvmlDeviceGetName,
132
+ nvmlDeviceGetUtilizationRates,
133
+ )
134
+
135
+ handler = nvmlDeviceGetHandleByIndex(gpu_id)
136
+ gpu_name = nvmlDeviceGetName(handler)
137
+ mem_info = nvmlDeviceGetMemoryInfo(handler)
138
+ utilization = nvmlDeviceGetUtilizationRates(handler)
139
+ return {
140
+ "name": gpu_name,
141
+ "total": mem_info.total,
142
+ "used": mem_info.used,
143
+ "free": mem_info.free,
144
+ "util": utilization.gpu,
145
+ }
146
+
147
+
148
+ def get_nvidia_gpu_info() -> Dict:
149
+ from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
150
+
151
+ try:
152
+ nvmlInit()
153
+ device_count = nvmlDeviceGetCount()
154
+ res = {}
155
+ for i in range(device_count):
156
+ res[f"gpu-{i}"] = _get_nvidia_gpu_mem_info(i)
157
+ return res
158
+ except:
159
+ # TODO: add log here
160
+ # logger.debug(f"Cannot init nvml. Maybe due to lack of NVIDIA GPUs or incorrect installation of CUDA.")
161
+ return {}
162
+ finally:
163
+ try:
164
+ nvmlShutdown()
165
+ except:
166
+ pass
@@ -26,36 +26,6 @@ logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
28
  class KokoroModel:
29
- # The available voices, should keep sync with https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
30
- VOICES = [
31
- "af_alloy",
32
- "af_aoede",
33
- "af_bella",
34
- "af_jessica",
35
- "af_kore",
36
- "af_nicole",
37
- "af_nova",
38
- "af_river",
39
- "af_sarah",
40
- "af_sky",
41
- "am_adam",
42
- "am_echo",
43
- "am_eric",
44
- "am_fenrir",
45
- "am_liam",
46
- "am_michael",
47
- "am_onyx",
48
- "am_puck",
49
- "bf_alice",
50
- "bf_emma",
51
- "bf_isabella",
52
- "bf_lily",
53
- "bm_daniel",
54
- "bm_fable",
55
- "bm_george",
56
- "bm_lewis",
57
- ]
58
-
59
29
  def __init__(
60
30
  self,
61
31
  model_uid: str,
@@ -89,10 +59,25 @@ class KokoroModel:
89
59
  config_path = os.path.join(self._model_path, "config.json")
90
60
  model_path = os.path.join(self._model_path, "kokoro-v1_0.pth")
91
61
  # LANG_CODES = dict(
62
+ # # pip install misaki[en]
92
63
  # a='American English',
93
64
  # b='British English',
65
+ #
66
+ # # espeak-ng
67
+ # e='es',
68
+ # f='fr-fr',
69
+ # h='hi',
70
+ # i='it',
71
+ # p='pt-br',
72
+ #
73
+ # # pip install misaki[ja]
74
+ # j='Japanese',
75
+ #
76
+ # # pip install misaki[zh]
77
+ # z='Mandarin Chinese',
94
78
  # )
95
79
  lang_code = self._kwargs.get("lang_code", "a")
80
+ logger.info("Launching Kokoro model with language code: %s", lang_code)
96
81
  self._model = KPipeline(
97
82
  lang_code=lang_code,
98
83
  model=KModel(config=config_path, model=model_path),
@@ -114,14 +99,12 @@ class KokoroModel:
114
99
  raise Exception("Kokoro does not support stream mode.")
115
100
  assert self._model is not None
116
101
  if not voice:
117
- voice = next(iter(self.VOICES))
102
+ voice = "af_alloy"
118
103
  logger.info("Auto select speaker: %s", voice)
119
- elif not voice.endswith(".pt") and voice not in self.VOICES:
120
- raise ValueError(
121
- f"Invalid voice: {voice}, available speakers: {self.VOICES}"
122
- )
123
- else:
104
+ elif voice.endswith(".pt"):
124
105
  logger.info("Using custom voice pt: %s", voice)
106
+ else:
107
+ logger.info("Using voice: %s", voice)
125
108
  logger.info("Speech kwargs: %s", kwargs)
126
109
  generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
127
110
  results = list(generator)
@@ -343,7 +343,7 @@
343
343
  "model_name": "Kokoro-82M",
344
344
  "model_family": "Kokoro",
345
345
  "model_id": "hexgrad/Kokoro-82M",
346
- "model_revision": "7a29fcdf8e997bac6d6f5f6f0c2f0b92912f6102",
346
+ "model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
347
347
  "model_ability": "text-to-audio",
348
348
  "multilingual": true
349
349
  }
@@ -22,7 +22,6 @@ import logging
22
22
  import os
23
23
  import re
24
24
  import sys
25
- import warnings
26
25
  from glob import glob
27
26
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
28
27
 
@@ -412,12 +411,22 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
412
411
  else:
413
412
  raise ValueError(f"Unknown sampler: {sampler_name}")
414
413
 
415
- @staticmethod
414
+ def _need_set_scheduler(self, scheduler: Any) -> bool:
415
+ """Determine whether it is necessary to set up a scheduler"""
416
+ if self._model_spec is None:
417
+ return False
418
+ if scheduler is None:
419
+ return False
420
+ if "FLUX" in self._model_spec.model_name:
421
+ logger.warning("FLUX model, skipping scheduler setup")
422
+ return False
423
+ return True
424
+
416
425
  @contextlib.contextmanager
417
- def _reset_when_done(model: Any, sampler_name: str):
418
- assert model is not None
426
+ def _reset_when_done(self, model: Any, sampler_name: str):
419
427
  scheduler = DiffusionModel._get_scheduler(model, sampler_name)
420
- if scheduler:
428
+ if self._need_set_scheduler(scheduler):
429
+ logger.debug("Use scheduler %s", scheduler)
421
430
  default_scheduler = model.scheduler
422
431
  model.scheduler = scheduler
423
432
  try:
@@ -517,7 +526,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
517
526
  for key in list(kwargs):
518
527
  allow_key = model_accept_param(key, model)
519
528
  if not allow_key:
520
- warnings.warn(f"{type(model)} cannot accept `{key}`, will ignore it")
529
+ logger.warning(f"{type(model)} cannot accept `{key}`, will ignore it")
521
530
  kwargs.pop(key)
522
531
 
523
532
  def text_to_image(