xinference 1.6.1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (76) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +79 -2
  3. xinference/client/restful/restful_client.py +64 -2
  4. xinference/core/media_interface.py +123 -0
  5. xinference/core/model.py +31 -0
  6. xinference/core/supervisor.py +8 -17
  7. xinference/core/worker.py +5 -17
  8. xinference/deploy/cmdline.py +6 -2
  9. xinference/model/audio/chattts.py +24 -39
  10. xinference/model/audio/cosyvoice.py +18 -30
  11. xinference/model/audio/funasr.py +42 -0
  12. xinference/model/audio/model_spec.json +18 -0
  13. xinference/model/audio/model_spec_modelscope.json +19 -1
  14. xinference/model/audio/utils.py +75 -0
  15. xinference/model/core.py +1 -0
  16. xinference/model/embedding/__init__.py +74 -18
  17. xinference/model/embedding/core.py +98 -597
  18. xinference/model/embedding/embed_family.py +133 -0
  19. xinference/model/embedding/flag/__init__.py +13 -0
  20. xinference/model/embedding/flag/core.py +282 -0
  21. xinference/model/embedding/model_spec.json +24 -0
  22. xinference/model/embedding/model_spec_modelscope.json +24 -0
  23. xinference/model/embedding/sentence_transformers/__init__.py +13 -0
  24. xinference/model/embedding/sentence_transformers/core.py +399 -0
  25. xinference/model/embedding/vllm/__init__.py +0 -0
  26. xinference/model/embedding/vllm/core.py +95 -0
  27. xinference/model/image/model_spec.json +20 -2
  28. xinference/model/image/model_spec_modelscope.json +21 -2
  29. xinference/model/image/stable_diffusion/core.py +144 -53
  30. xinference/model/llm/llama_cpp/memory.py +4 -2
  31. xinference/model/llm/llm_family.json +57 -0
  32. xinference/model/llm/llm_family_modelscope.json +61 -0
  33. xinference/model/llm/sglang/core.py +4 -0
  34. xinference/model/llm/utils.py +11 -0
  35. xinference/model/llm/vllm/core.py +3 -0
  36. xinference/model/rerank/core.py +86 -4
  37. xinference/model/rerank/model_spec.json +24 -0
  38. xinference/model/rerank/model_spec_modelscope.json +24 -0
  39. xinference/model/rerank/utils.py +4 -3
  40. xinference/model/utils.py +38 -1
  41. xinference/model/video/diffusers.py +65 -3
  42. xinference/model/video/model_spec.json +31 -4
  43. xinference/model/video/model_spec_modelscope.json +32 -4
  44. xinference/web/ui/build/asset-manifest.json +6 -6
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/css/main.013f296b.css +2 -0
  47. xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
  48. xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
  49. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
  56. xinference/web/ui/src/locales/en.json +18 -7
  57. xinference/web/ui/src/locales/ja.json +224 -0
  58. xinference/web/ui/src/locales/ko.json +224 -0
  59. xinference/web/ui/src/locales/zh.json +18 -7
  60. {xinference-1.6.1.dist-info → xinference-1.7.0.dist-info}/METADATA +9 -8
  61. {xinference-1.6.1.dist-info → xinference-1.7.0.dist-info}/RECORD +66 -57
  62. xinference/web/ui/build/static/css/main.337afe76.css +0 -2
  63. xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
  64. xinference/web/ui/build/static/js/main.ddf9eaee.js +0 -3
  65. xinference/web/ui/build/static/js/main.ddf9eaee.js.map +0 -1
  66. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
  67. xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +0 -1
  68. xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +0 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +0 -1
  70. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
  71. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
  72. /xinference/web/ui/build/static/js/{main.ddf9eaee.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
  73. {xinference-1.6.1.dist-info → xinference-1.7.0.dist-info}/WHEEL +0 -0
  74. {xinference-1.6.1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
  75. {xinference-1.6.1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
  76. {xinference-1.6.1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
@@ -44,6 +44,44 @@ class FunASRModel:
44
44
  def model_ability(self):
45
45
  return self._model_spec.model_ability
46
46
 
47
+ def convert_to_openai_format(self, input_data):
48
+ if "timestamp" not in input_data:
49
+ return {"task": "transcribe", "text": input_data["text"]}
50
+ start_time = input_data["timestamp"][0][0] / 1000
51
+ end_time = input_data["timestamp"][-1][1] / 1000
52
+ duration = end_time - start_time
53
+ word_timestamps = []
54
+ for ts in input_data["timestamp"]:
55
+ word_timestamps.append({"start": ts[0] / 1000, "end": ts[1] / 1000})
56
+ if "sentence_info" not in input_data:
57
+ return {
58
+ "task": "transcribe",
59
+ "text": input_data["text"],
60
+ "words": word_timestamps,
61
+ "duration": duration,
62
+ }
63
+ output = {
64
+ "task": "transcribe",
65
+ "duration": duration,
66
+ "text": input_data["text"],
67
+ "words": word_timestamps,
68
+ "segments": [],
69
+ }
70
+ for sentence in input_data["sentence_info"]:
71
+ seg_start = sentence["start"] / 1000
72
+ seg_end = sentence["end"] / 1000
73
+ output["segments"].append(
74
+ {
75
+ "id": len(output["segments"]),
76
+ "start": seg_start,
77
+ "end": seg_end,
78
+ "text": sentence["text"],
79
+ "speaker": sentence["spk"],
80
+ }
81
+ )
82
+
83
+ return output
84
+
47
85
  def load(self):
48
86
  try:
49
87
  from funasr import AutoModel
@@ -103,6 +141,10 @@ class FunASRModel:
103
141
 
104
142
  if response_format == "json":
105
143
  return {"text": text}
144
+ elif response_format == "verbose_json":
145
+ verbose = result[0]
146
+ verbose["text"] = text
147
+ return self.convert_to_openai_format(verbose)
106
148
  else:
107
149
  raise ValueError(f"Unsupported response format: {response_format}")
108
150
 
@@ -230,6 +230,7 @@
230
230
  "punc_model": "ct-punc"
231
231
  },
232
232
  "default_transcription_config": {
233
+ "hotword": "",
233
234
  "batch_size_s": 300
234
235
  }
235
236
  },
@@ -255,11 +256,28 @@
255
256
  "model_revision": "36abd64af4392fe02bf76453bc86c081cf1ca6da",
256
257
  "model_ability": ["audio2text"],
257
258
  "multilingual": false,
259
+ "default_model_config": {
260
+ "vad_model": "fsmn-vad",
261
+ "punc_model": "ct-punc",
262
+ "spk_model":"cam++"
263
+ },
264
+ "default_transcription_config": {
265
+ "batch_size_s": 300
266
+ }
267
+ },
268
+ {
269
+ "model_name": "seaco-paraformer-zh",
270
+ "model_family": "funasr",
271
+ "model_id": "JunHowie/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
272
+ "model_revision": "42e6be00854cf8de0f40002794f99df2a444fa97",
273
+ "model_ability": ["audio2text"],
274
+ "multilingual": false,
258
275
  "default_model_config": {
259
276
  "vad_model": "fsmn-vad",
260
277
  "punc_model": "ct-punc"
261
278
  },
262
279
  "default_transcription_config": {
280
+ "hotword": "",
263
281
  "batch_size_s": 300
264
282
  }
265
283
  },
@@ -106,12 +106,30 @@
106
106
  "multilingual": false,
107
107
  "default_model_config": {
108
108
  "vad_model": "fsmn-vad",
109
- "punc_model": "ct-punc"
109
+ "punc_model": "ct-punc",
110
+ "spk_model":"cam++"
110
111
  },
111
112
  "default_transcription_config": {
112
113
  "batch_size_s": 300
113
114
  }
114
115
  },
116
+ {
117
+ "model_name": "seaco-paraformer-zh",
118
+ "model_family": "funasr",
119
+ "model_hub": "modelscope",
120
+ "model_id": "iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
121
+ "model_revision": "master",
122
+ "model_ability": ["audio2text"],
123
+ "multilingual": false,
124
+ "default_model_config": {
125
+ "vad_model": "fsmn-vad",
126
+ "punc_model": "ct-punc"
127
+ },
128
+ "default_transcription_config": {
129
+ "hotword": "",
130
+ "batch_size_s": 300
131
+ }
132
+ },
115
133
  {
116
134
  "model_name": "ChatTTS",
117
135
  "model_family": "ChatTTS",
@@ -13,16 +13,30 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import io
16
+ import logging
17
+ import types
18
+ import wave
19
+ from collections.abc import Callable
16
20
 
17
21
  import numpy as np
22
+ import torch
18
23
 
19
24
  from .core import AudioModelFamilyV1
20
25
 
26
+ logger = logging.getLogger(__name__)
27
+
21
28
 
22
29
  def get_model_version(audio_model: AudioModelFamilyV1) -> str:
23
30
  return audio_model.model_name
24
31
 
25
32
 
33
+ def _extract_pcm_from_wav_bytes(wav_bytes):
34
+ with io.BytesIO(wav_bytes) as wav_io:
35
+ with wave.open(wav_io, "rb") as wav_file:
36
+ num_frames = wav_file.getnframes()
37
+ return wav_file.readframes(num_frames)
38
+
39
+
26
40
  def ensure_sample_rate(
27
41
  audio: np.ndarray, old_sample_rate: int, sample_rate: int
28
42
  ) -> np.ndarray:
@@ -48,3 +62,64 @@ def ensure_sample_rate(
48
62
  audio, sr = sf.read(buffer, dtype="float32")
49
63
 
50
64
  return audio
65
+
66
+
67
+ def audio_stream_generator(
68
+ response_format: str,
69
+ sample_rate: int,
70
+ output_generator: types.GeneratorType,
71
+ output_chunk_transformer: Callable,
72
+ ):
73
+ import torch
74
+ import torchaudio
75
+
76
+ response_pcm = response_format.lower() == "pcm"
77
+ with io.BytesIO() as out:
78
+ if response_pcm:
79
+ logger.info(
80
+ f"PCM stream output, num_channels: 1, sample_rate: {sample_rate}"
81
+ )
82
+ writer = torchaudio.io.StreamWriter(out, format="wav")
83
+ writer.add_audio_stream(
84
+ sample_rate=sample_rate, num_channels=1, format="s16"
85
+ )
86
+ else:
87
+ writer = torchaudio.io.StreamWriter(out, format=response_format)
88
+ writer.add_audio_stream(sample_rate=sample_rate, num_channels=1)
89
+ strip_header = True
90
+ last_pos = 0
91
+ with writer.open():
92
+ for chunk in output_generator:
93
+ trans_chunk = output_chunk_transformer(chunk)
94
+ if response_pcm:
95
+ trans_chunk = trans_chunk.to(torch.float32)
96
+ trans_chunk = (
97
+ (trans_chunk * 32767).clamp(-32768, 32767).to(torch.int16)
98
+ )
99
+ writer.write_audio_chunk(0, trans_chunk)
100
+ new_last_pos = out.tell()
101
+ if new_last_pos != last_pos:
102
+ out.seek(last_pos)
103
+ encoded_bytes = out.read()
104
+ if response_pcm and strip_header:
105
+ # http://soundfile.sapp.org/doc/WaveFormat
106
+ yield _extract_pcm_from_wav_bytes(encoded_bytes)
107
+ strip_header = False
108
+ else:
109
+ yield encoded_bytes
110
+ last_pos = new_last_pos
111
+
112
+
113
+ def audio_to_bytes(response_format: str, sample_rate: int, tensor: "torch.Tensor"):
114
+ import torchaudio
115
+
116
+ response_pcm = response_format.lower() == "pcm"
117
+ with io.BytesIO() as out:
118
+ if response_pcm:
119
+ logger.info(f"PCM output, num_channels: 1, sample_rate: {sample_rate}")
120
+ torchaudio.save(out, tensor, sample_rate, format="wav", encoding="PCM_S")
121
+ # http://soundfile.sapp.org/doc/WaveFormat
122
+ return _extract_pcm_from_wav_bytes(out.getvalue())
123
+ else:
124
+ torchaudio.save(out, tensor, sample_rate, format=response_format)
125
+ return out.getvalue()
xinference/model/core.py CHANGED
@@ -97,6 +97,7 @@ def create_model_instance(
97
97
  devices,
98
98
  model_uid,
99
99
  model_name,
100
+ model_engine,
100
101
  download_hub,
101
102
  model_path,
102
103
  **kwargs,
@@ -16,7 +16,7 @@ import codecs
16
16
  import json
17
17
  import os
18
18
  import warnings
19
- from typing import Any, Dict
19
+ from typing import Any, Dict, List
20
20
 
21
21
  from .core import (
22
22
  EMBEDDING_MODEL_DESCRIPTIONS,
@@ -32,9 +32,15 @@ from .custom import (
32
32
  register_embedding,
33
33
  unregister_embedding,
34
34
  )
35
-
36
- BUILTIN_EMBEDDING_MODELS: Dict[str, Any] = {}
37
- MODELSCOPE_EMBEDDING_MODELS: Dict[str, Any] = {}
35
+ from .embed_family import (
36
+ BUILTIN_EMBEDDING_MODELS,
37
+ EMBEDDING_ENGINES,
38
+ FLAG_EMBEDDER_CLASSES,
39
+ MODELSCOPE_EMBEDDING_MODELS,
40
+ SENTENCE_TRANSFORMER_CLASSES,
41
+ SUPPORTED_ENGINES,
42
+ VLLM_CLASSES,
43
+ )
38
44
 
39
45
 
40
46
  def register_custom_model():
@@ -55,12 +61,56 @@ def register_custom_model():
55
61
  warnings.warn(f"{user_defined_embedding_dir}/{f} has error, {e}")
56
62
 
57
63
 
64
+ def generate_engine_config_by_model_name(model_spec: "EmbeddingModelSpec"):
65
+ model_name = model_spec.model_name
66
+ engines: Dict[str, List[Dict[str, Any]]] = EMBEDDING_ENGINES.get(
67
+ model_name, {}
68
+ ) # structure for engine query
69
+ for engine in SUPPORTED_ENGINES:
70
+ CLASSES = SUPPORTED_ENGINES[engine]
71
+ for cls in CLASSES:
72
+ # Every engine needs to implement match method
73
+ if cls.match(model_spec):
74
+ # we only match the first class for an engine
75
+ engines[engine] = [
76
+ {
77
+ "model_name": model_name,
78
+ "embedding_class": cls,
79
+ }
80
+ ]
81
+ break
82
+ EMBEDDING_ENGINES[model_name] = engines
83
+
84
+
85
+ # will be called in xinference/model/__init__.py
58
86
  def _install():
59
- load_model_family_from_json("model_spec.json", BUILTIN_EMBEDDING_MODELS)
60
- load_model_family_from_json(
61
- "model_spec_modelscope.json", MODELSCOPE_EMBEDDING_MODELS
87
+ _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
88
+ _model_spec_modelscope_json = os.path.join(
89
+ os.path.dirname(__file__), "model_spec_modelscope.json"
90
+ )
91
+ ################### HuggingFace Model List Info Init ###################
92
+ BUILTIN_EMBEDDING_MODELS.update(
93
+ dict(
94
+ (spec["model_name"], EmbeddingModelSpec(**spec))
95
+ for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
96
+ )
97
+ )
98
+ for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
99
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
100
+
101
+ ################### ModelScope Model List Info Init ###################
102
+ MODELSCOPE_EMBEDDING_MODELS.update(
103
+ dict(
104
+ (spec["model_name"], EmbeddingModelSpec(**spec))
105
+ for spec in json.load(
106
+ codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
107
+ )
108
+ )
62
109
  )
110
+ for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
111
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
63
112
 
113
+ # TODO: consider support more download hub in future...
64
114
  # register model description after recording model revision
65
115
  for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
66
116
  for model_name, model_spec in model_spec_info.items():
@@ -77,16 +127,22 @@ def _install():
77
127
  generate_embedding_description(ud_embedding)
78
128
  )
79
129
 
130
+ from .flag.core import FlagEmbeddingModel
131
+ from .sentence_transformers.core import SentenceTransformerEmbeddingModel
132
+ from .vllm.core import VLLMEmbeddingModel
80
133
 
81
- def load_model_family_from_json(json_filename, target_families):
82
- json_path = os.path.join(os.path.dirname(__file__), json_filename)
83
- target_families.update(
84
- dict(
85
- (spec["model_name"], EmbeddingModelSpec(**spec))
86
- for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
87
- )
88
- )
89
- for model_name, model_spec in target_families.items():
90
- MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
134
+ SENTENCE_TRANSFORMER_CLASSES.extend([SentenceTransformerEmbeddingModel])
135
+ FLAG_EMBEDDER_CLASSES.extend([FlagEmbeddingModel])
136
+ VLLM_CLASSES.extend([VLLMEmbeddingModel])
137
+
138
+ SUPPORTED_ENGINES["sentence_transformers"] = SENTENCE_TRANSFORMER_CLASSES
139
+ SUPPORTED_ENGINES["flag"] = FLAG_EMBEDDER_CLASSES
140
+ SUPPORTED_ENGINES["vllm"] = VLLM_CLASSES
141
+
142
+ # Init embedding engine
143
+ for model_infos in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
144
+ for model_spec in model_infos.values():
145
+ generate_engine_config_by_model_name(model_spec)
91
146
 
92
- del json_path
147
+ del _model_spec_json
148
+ del _model_spec_modelscope_json