xinference 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (83) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +204 -1
  3. xinference/client/restful/restful_client.py +4 -2
  4. xinference/core/image_interface.py +28 -0
  5. xinference/core/model.py +28 -0
  6. xinference/core/supervisor.py +6 -0
  7. xinference/model/audio/fish_speech.py +9 -9
  8. xinference/model/audio/model_spec.json +9 -9
  9. xinference/model/audio/whisper.py +4 -1
  10. xinference/model/image/core.py +2 -1
  11. xinference/model/image/model_spec.json +16 -4
  12. xinference/model/image/model_spec_modelscope.json +16 -4
  13. xinference/model/image/sdapi.py +136 -0
  14. xinference/model/image/stable_diffusion/core.py +148 -20
  15. xinference/model/llm/__init__.py +8 -0
  16. xinference/model/llm/llm_family.json +393 -0
  17. xinference/model/llm/llm_family.py +3 -1
  18. xinference/model/llm/llm_family_modelscope.json +408 -3
  19. xinference/model/llm/sglang/core.py +3 -0
  20. xinference/model/llm/transformers/chatglm.py +1 -1
  21. xinference/model/llm/transformers/core.py +6 -0
  22. xinference/model/llm/transformers/deepseek_v2.py +340 -0
  23. xinference/model/llm/transformers/qwen2_audio.py +168 -0
  24. xinference/model/llm/transformers/qwen2_vl.py +31 -5
  25. xinference/model/llm/utils.py +104 -84
  26. xinference/model/llm/vllm/core.py +8 -0
  27. xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +2 -3
  28. xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +1 -1
  29. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
  30. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
  31. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
  32. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
  33. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
  34. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
  35. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
  36. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
  37. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
  38. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
  39. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
  40. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
  41. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
  42. xinference/thirdparty/fish_speech/tools/api.py +79 -134
  43. xinference/thirdparty/fish_speech/tools/commons.py +35 -0
  44. xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
  45. xinference/thirdparty/fish_speech/tools/file.py +17 -0
  46. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
  47. xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
  48. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
  49. xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
  50. xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
  51. xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
  52. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
  53. xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
  54. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
  55. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
  56. xinference/thirdparty/fish_speech/tools/webui.py +12 -146
  57. xinference/types.py +7 -4
  58. xinference/web/ui/build/asset-manifest.json +6 -6
  59. xinference/web/ui/build/index.html +1 -1
  60. xinference/web/ui/build/static/css/{main.632e9148.css → main.5061c4c3.css} +2 -2
  61. xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
  62. xinference/web/ui/build/static/js/{main.9cfafbd6.js → main.754740c0.js} +3 -3
  63. xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
  66. {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/METADATA +9 -3
  67. {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/RECORD +72 -74
  68. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
  69. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
  70. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
  71. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
  72. xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
  73. xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
  74. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
  75. xinference/web/ui/build/static/css/main.632e9148.css.map +0 -1
  76. xinference/web/ui/build/static/js/main.9cfafbd6.js.map +0 -1
  77. xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +0 -1
  78. xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +0 -1
  79. /xinference/web/ui/build/static/js/{main.9cfafbd6.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +0 -0
  80. {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
  81. {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
  82. {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
  83. {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
@@ -301,99 +301,89 @@ class ChatModelMixin:
301
301
  }
302
302
 
303
303
  @staticmethod
304
- def _eval_glm_chat_arguments(c):
304
+ def _eval_glm_chat_arguments(c) -> List[Tuple]:
305
+ """
306
+ Currently, glm4 tool call only supports one function
307
+ """
305
308
  try:
306
309
  if isinstance(c, dict):
307
- return None, c["name"], c["arguments"]
310
+ return [(None, c["name"], c["arguments"])]
308
311
  except KeyError:
309
312
  logger.error("Can't parse glm output: %s", c)
310
- return str(c), None, None
313
+ return [(str(c), None, None)]
311
314
  else:
312
- return str(c), None, None
315
+ return [(str(c), None, None)]
313
316
 
314
- @staticmethod
315
- def _eval_qwen_chat_arguments(c):
317
+ @classmethod
318
+ def _handle_qwen_tool_result(cls, text: str) -> List[Tuple]:
319
+ text: str = text.strip() # type: ignore
320
+ contents: List[str] = text.split(QWEN_TOOL_CALL_SYMBOLS[1])
321
+ results: List[Tuple] = []
322
+ for content in contents:
323
+ content = content.strip()
324
+ if content:
325
+ if content.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
326
+ content = content[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
327
+ content = content.strip()
328
+ try:
329
+ res = json.loads(content)
330
+ results.append((None, res["name"], res["arguments"]))
331
+ except Exception as e:
332
+ logger.error(
333
+ "Can't parse single qwen tool call output: %s. Error: %s",
334
+ content,
335
+ e,
336
+ )
337
+ results.append((content, None, None))
338
+ return results
339
+
340
+ @classmethod
341
+ def _eval_qwen_chat_arguments(cls, c) -> List[Tuple]:
316
342
  text = c["choices"][0]["text"]
317
- text: str = text.strip()
318
- if text.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
319
- text = text[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
320
- if text.endswith(QWEN_TOOL_CALL_SYMBOLS[1]):
321
- text = text[: -len(QWEN_TOOL_CALL_SYMBOLS[1])]
322
- text = text.strip()
323
- try:
324
- content = json.loads(text)
325
- return None, content["name"], content["arguments"]
326
- except Exception as e:
327
- logger.error("Can't parse qwen tool call output: %s. Error: %s", text, e)
328
- return text, None, None
343
+ return cls._handle_qwen_tool_result(text)
329
344
 
330
345
  @classmethod
331
346
  def _eval_tool_arguments(cls, model_family, c):
332
347
  family = model_family.model_family or model_family.model_name
333
348
  if family in GLM4_TOOL_CALL_FAMILY:
334
- content, func, args = cls._eval_glm_chat_arguments(c)
349
+ result = cls._eval_glm_chat_arguments(c)
335
350
  elif family in QWEN_TOOL_CALL_FAMILY:
336
- content, func, args = cls._eval_qwen_chat_arguments(c)
351
+ result = cls._eval_qwen_chat_arguments(c)
337
352
  else:
338
353
  raise Exception(
339
354
  f"Model {model_family.model_name} is not support tool calls."
340
355
  )
341
- logger.debug("Tool call content: %s, func: %s, args: %s", content, func, args)
342
- return content, func, args
343
-
344
- @classmethod
345
- def _tools_token_filter(cls, model_family):
346
- """
347
- Generates a filter function for Qwen series models to retain outputs after "\nFinal Answer:".
348
-
349
- Returns:
350
- A function that takes tokens (string output by the model so far) and delta (new tokens added) as input,
351
- returns the part after "\nFinal Answer:" if found, else returns delta.
352
- """
353
- family = model_family.model_family or model_family.model_name
354
- if family in QWEN_TOOL_CALL_FAMILY:
355
- # Encapsulating function to reset 'found' after each call
356
- found = False
357
-
358
- def process_tokens(tokens: str, delta: str):
359
- nonlocal found
360
- # Once "Final Answer:" is found, future tokens are allowed.
361
- if found:
362
- return delta
363
- # Check if the token ends with "\nFinal Answer:" and update `found`.
364
- final_answer_idx = tokens.lower().rfind("\nfinal answer:")
365
- if final_answer_idx != -1:
366
- found = True
367
- return tokens[final_answer_idx + len("\nfinal answer:") :]
368
- return ""
369
-
370
- return process_tokens
371
- else:
372
- return lambda tokens, delta: delta
356
+ logger.debug(f"Tool call content: {result}")
357
+ return result
373
358
 
374
359
  @classmethod
375
360
  def _tool_calls_completion_chunk(cls, model_family, model_uid, c):
376
361
  _id = str(uuid.uuid4())
377
- content, func, args = cls._eval_tool_arguments(model_family, c)
378
- if func:
379
- d = {
380
- "role": "assistant",
381
- "content": content,
382
- "tool_calls": [
383
- {
384
- "id": f"call_{_id}",
385
- "type": "function",
386
- "function": {
387
- "name": func,
388
- "arguments": json.dumps(args, ensure_ascii=False),
389
- },
390
- }
391
- ],
392
- }
393
- finish_reason = "tool_calls"
394
- else:
395
- d = {"role": "assistant", "content": content, "tool_calls": []}
396
- finish_reason = "stop"
362
+ tool_result = cls._eval_tool_arguments(model_family, c)
363
+ tool_calls = []
364
+ failed_contents = []
365
+ for content, func, args in tool_result:
366
+ if func:
367
+ tool_calls.append(
368
+ [
369
+ {
370
+ "id": f"call_{_id}",
371
+ "type": "function",
372
+ "function": {
373
+ "name": func,
374
+ "arguments": json.dumps(args, ensure_ascii=False),
375
+ },
376
+ }
377
+ ]
378
+ )
379
+ else:
380
+ failed_contents.append(content)
381
+ finish_reason = "tool_calls" if tool_calls else "stop"
382
+ d = {
383
+ "role": "assistant",
384
+ "content": ". ".join(failed_contents) if failed_contents else None,
385
+ "tool_calls": tool_calls,
386
+ }
397
387
  try:
398
388
  usage = c.get("usage")
399
389
  assert "prompt_tokens" in usage
@@ -422,12 +412,13 @@ class ChatModelMixin:
422
412
  @classmethod
423
413
  def _tool_calls_completion(cls, model_family, model_uid, c):
424
414
  _id = str(uuid.uuid4())
425
- content, func, args = cls._eval_tool_arguments(model_family, c)
426
- if func:
427
- m = {
428
- "role": "assistant",
429
- "content": content,
430
- "tool_calls": [
415
+ tool_result = cls._eval_tool_arguments(model_family, c)
416
+
417
+ tool_calls = []
418
+ failed_contents = []
419
+ for content, func, args in tool_result:
420
+ if func:
421
+ tool_calls.append(
431
422
  {
432
423
  "id": f"call_{_id}",
433
424
  "type": "function",
@@ -436,12 +427,15 @@ class ChatModelMixin:
436
427
  "arguments": json.dumps(args, ensure_ascii=False),
437
428
  },
438
429
  }
439
- ],
440
- }
441
- finish_reason = "tool_calls"
442
- else:
443
- m = {"role": "assistant", "content": content, "tool_calls": []}
444
- finish_reason = "stop"
430
+ )
431
+ else:
432
+ failed_contents.append(content)
433
+ finish_reason = "tool_calls" if tool_calls else "stop"
434
+ m = {
435
+ "role": "assistant",
436
+ "content": ". ".join(failed_contents) if failed_contents else None,
437
+ "tool_calls": tool_calls,
438
+ }
445
439
  try:
446
440
  usage = c.get("usage")
447
441
  assert "prompt_tokens" in usage
@@ -555,6 +549,32 @@ def generate_completion_chunk(
555
549
  )
556
550
 
557
551
 
552
+ def generate_completion(
553
+ model_uid: str,
554
+ response: str,
555
+ prompt_tokens=-1,
556
+ completion_tokens=-1,
557
+ total_tokens=-1,
558
+ finish_reason="stop",
559
+ ) -> Completion:
560
+ return Completion(
561
+ id=str(uuid.uuid1()),
562
+ object="text_completion",
563
+ created=int(time.time()),
564
+ model=model_uid,
565
+ choices=[
566
+ CompletionChoice(
567
+ text=response, index=0, logprobs=None, finish_reason=finish_reason
568
+ )
569
+ ],
570
+ usage=CompletionUsage(
571
+ prompt_tokens=prompt_tokens,
572
+ completion_tokens=completion_tokens,
573
+ total_tokens=total_tokens,
574
+ ),
575
+ )
576
+
577
+
558
578
  def generate_chat_completion(
559
579
  model_uid: str,
560
580
  response: str,
@@ -104,6 +104,7 @@ VLLM_SUPPORTED_MODELS = [
104
104
  "code-llama-python",
105
105
  "deepseek",
106
106
  "deepseek-coder",
107
+ "yi-coder",
107
108
  ]
108
109
  VLLM_SUPPORTED_CHAT_MODELS = [
109
110
  "llama-2-chat",
@@ -130,6 +131,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
130
131
  "codegeex4",
131
132
  "deepseek-chat",
132
133
  "deepseek-coder-instruct",
134
+ "yi-coder-chat",
133
135
  ]
134
136
  if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
135
137
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
@@ -149,6 +151,12 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
149
151
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen2-moe-instruct")
150
152
  VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
151
153
 
154
+ if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
155
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat")
156
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
157
+ VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
158
+
159
+
152
160
  if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
153
161
  VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
154
162
  VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
@@ -22,13 +22,12 @@ head:
22
22
  resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
23
23
  num_mels: 512
24
24
  upsample_initial_channel: 512
25
- use_template: false
26
25
  pre_conv_kernel_size: 13
27
26
  post_conv_kernel_size: 13
28
27
  quantizer:
29
28
  _target_: fish_speech.models.vqgan.modules.fsq.DownsampleFiniteScalarQuantize
30
29
  input_dim: 512
31
- n_groups: 4
30
+ n_groups: 8
32
31
  n_codebooks: 1
33
32
  levels: [8, 5, 5, 5]
34
- downsample_factor: [2]
33
+ downsample_factor: [2, 2]
@@ -4,7 +4,7 @@ defaults:
4
4
 
5
5
  project: text2semantic_finetune_dual_ar
6
6
  max_length: 4096
7
- pretrained_ckpt_path: checkpoints/fish-speech-1.2-sft
7
+ pretrained_ckpt_path: checkpoints/fish-speech-1.4
8
8
 
9
9
  # Lightning Trainer
10
10
  trainer:
@@ -72,7 +72,7 @@
72
72
  "Put your text here.": "Put your text here.",
73
73
  "Reference Audio": "Reference Audio",
74
74
  "Reference Text": "Reference Text",
75
- "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.",
75
+ "Related code and weights are released under CC BY-NC-SA 4.0 License.": "Related code and weights are released under CC BY-NC-SA 4.0 License.",
76
76
  "Remove Selected Data": "Remove Selected Data",
77
77
  "Removed path successfully!": "Removed path successfully!",
78
78
  "Repetition Penalty": "Repetition Penalty",
@@ -72,7 +72,7 @@
72
72
  "Put your text here.": "Ponga su texto aquí.",
73
73
  "Reference Audio": "Audio de Referencia",
74
74
  "Reference Text": "Texto de Referencia",
75
- "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado se publica bajo la Licencia BSD-3-Clause, y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
75
+ "Related code and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
76
76
  "Remove Selected Data": "Eliminar Datos Seleccionados",
77
77
  "Removed path successfully!": "¡Ruta eliminada exitosamente!",
78
78
  "Repetition Penalty": "Penalización por Repetición",
@@ -72,7 +72,7 @@
72
72
  "Put your text here.": "ここにテキストを入力してください。",
73
73
  "Reference Audio": "リファレンスオーディオ",
74
74
  "Reference Text": "リファレンステキスト",
75
- "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "関連コードはBSD-3-Clauseライセンスの下でリリースされ、重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
75
+ "Related code and weights are released under CC BY-NC-SA 4.0 License.": "関連コードと重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
76
76
  "Remove Selected Data": "選択したデータを削除",
77
77
  "Removed path successfully!": "パスの削除に成功しました!",
78
78
  "Repetition Penalty": "反復ペナルティ",
@@ -84,7 +84,7 @@
84
84
  "Reference Text": "Texto de Referência",
85
85
  "warning": "Aviso",
86
86
  "Pre-processing begins...": "O pré-processamento começou!",
87
- "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "O código relacionado é licenciado sob a Licença BSD-3-Clause, e os pesos sob a Licença CC BY-NC-SA 4.0.",
87
+ "Related code and weights are released under CC BY-NC-SA 4.0 License.": "O código relacionado e os pesos são licenciados sob a Licença CC BY-NC-SA 4.0.",
88
88
  "Remove Selected Data": "Remover Dados Selecionados",
89
89
  "Removed path successfully!": "Caminho removido com sucesso!",
90
90
  "Repetition Penalty": "Penalidade de Repetição",
@@ -72,7 +72,7 @@
72
72
  "Put your text here.": "在此处输入文本.",
73
73
  "Reference Audio": "参考音频",
74
74
  "Reference Text": "参考文本",
75
- "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布.",
75
+ "Related code and weights are released under CC BY-NC-SA 4.0 License.": "相关代码和权重使用 CC BY-NC-SA 4.0 许可证发布.",
76
76
  "Remove Selected Data": "移除选中数据",
77
77
  "Removed path successfully!": "移除路径成功!",
78
78
  "Repetition Penalty": "重复惩罚",
@@ -353,7 +353,7 @@ class BaseTransformer(nn.Module):
353
353
 
354
354
  if "int8" in str(Path(path)):
355
355
  logger.info("Using int8 weight-only quantization!")
356
- from ...tools.llama.quantize import WeightOnlyInt8QuantHandler
356
+ from tools.llama.quantize import WeightOnlyInt8QuantHandler
357
357
 
358
358
  simple_quantizer = WeightOnlyInt8QuantHandler(model)
359
359
  model = simple_quantizer.convert_for_runtime()
@@ -363,7 +363,7 @@ class BaseTransformer(nn.Module):
363
363
  path_comps = path.name.split("-")
364
364
  assert path_comps[-2].startswith("g")
365
365
  groupsize = int(path_comps[-2][1:])
366
- from ...tools.llama.quantize import WeightOnlyInt4QuantHandler
366
+ from tools.llama.quantize import WeightOnlyInt4QuantHandler
367
367
 
368
368
  simple_quantizer = WeightOnlyInt4QuantHandler(model, groupsize)
369
369
  model = simple_quantizer.convert_for_runtime()
@@ -1,3 +0,0 @@
1
- from .lit_module import VQGAN
2
-
3
- __all__ = ["VQGAN"]