xinference 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +22 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +91 -6
- xinference/client/restful/restful_client.py +39 -0
- xinference/core/model.py +41 -13
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +26 -4
- xinference/model/audio/f5tts.py +195 -0
- xinference/model/audio/fish_speech.py +71 -35
- xinference/model/audio/model_spec.json +88 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/llm_family.json +479 -53
- xinference/model/llm/llm_family_modelscope.json +423 -17
- xinference/model/llm/mlx/core.py +230 -50
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/core.py +1 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +23 -1
- xinference/model/llm/vllm/core.py +89 -2
- xinference/thirdparty/f5_tts/__init__.py +0 -0
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/types.py +2 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.2f269bb3.js → main.4eb4ee80.js} +3 -3
- xinference/web/ui/build/static/js/main.4eb4ee80.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8c5eeb02f772d02cbe8b89c05428d0dd41a97866f75f7dc1c2164a67f5a1cf98.json +1 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/METADATA +39 -18
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/RECORD +92 -39
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.4eb4ee80.js.LICENSE.txt} +0 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/LICENSE +0 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.0.0.dist-info → xinference-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -21,6 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union, no_type_check
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
import torch
|
|
23
23
|
|
|
24
|
+
from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
|
|
24
25
|
from ...device_utils import empty_cache
|
|
25
26
|
from ...types import Embedding, EmbeddingData, EmbeddingUsage
|
|
26
27
|
from ..core import CacheableModelSpec, ModelDescription
|
|
@@ -193,6 +194,29 @@ class EmbeddingModel:
|
|
|
193
194
|
device=self._device,
|
|
194
195
|
model_kwargs=model_kwargs,
|
|
195
196
|
)
|
|
197
|
+
elif (
|
|
198
|
+
self._kwargs.get("hybrid_mode")
|
|
199
|
+
and "m3" in self._model_spec.model_name.lower()
|
|
200
|
+
):
|
|
201
|
+
try:
|
|
202
|
+
from FlagEmbedding import BGEM3FlagModel
|
|
203
|
+
except ImportError:
|
|
204
|
+
error_message = "Failed to import module 'BGEM3FlagModel'"
|
|
205
|
+
installation_guide = [
|
|
206
|
+
"Please make sure 'FlagEmbedding' is installed. ",
|
|
207
|
+
"You can install it by `pip install FlagEmbedding`\n",
|
|
208
|
+
]
|
|
209
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
210
|
+
|
|
211
|
+
if torch_dtype and torch_dtype == torch.float16:
|
|
212
|
+
model_kwargs = {"use_fp16": True}
|
|
213
|
+
else:
|
|
214
|
+
model_kwargs = {}
|
|
215
|
+
self._model = BGEM3FlagModel(
|
|
216
|
+
self._model_path,
|
|
217
|
+
device=self._device,
|
|
218
|
+
**model_kwargs,
|
|
219
|
+
)
|
|
196
220
|
else:
|
|
197
221
|
model_kwargs = {"torch_dtype": torch_dtype} if torch_dtype else None
|
|
198
222
|
self._model = SentenceTransformer(
|
|
@@ -202,11 +226,209 @@ class EmbeddingModel:
|
|
|
202
226
|
trust_remote_code=True,
|
|
203
227
|
)
|
|
204
228
|
|
|
205
|
-
def
|
|
229
|
+
def _fix_langchain_openai_inputs(
|
|
230
|
+
self, sentences: Union[str, List[str], Dict[str, str], List[Dict[str, str]]]
|
|
231
|
+
):
|
|
232
|
+
# Check if sentences is a two-dimensional list of integers
|
|
233
|
+
if (
|
|
234
|
+
isinstance(sentences, list)
|
|
235
|
+
and len(sentences) > 0
|
|
236
|
+
and isinstance(sentences[0], list)
|
|
237
|
+
and len(sentences[0]) > 0
|
|
238
|
+
and isinstance(sentences[0][0], int)
|
|
239
|
+
):
|
|
240
|
+
# List[List[int]] stands for encoded inputs
|
|
241
|
+
import tiktoken
|
|
242
|
+
|
|
243
|
+
enc = tiktoken.get_encoding("cl100k_base")
|
|
244
|
+
lines_decoded = []
|
|
245
|
+
|
|
246
|
+
for line in sentences:
|
|
247
|
+
try:
|
|
248
|
+
# Decode each token into bytes, then join them into a complete string
|
|
249
|
+
output = b"".join(
|
|
250
|
+
enc.decode_single_token_bytes(token) for token in line
|
|
251
|
+
)
|
|
252
|
+
# Convert the byte sequence into a UTF-8 encoded string
|
|
253
|
+
decoded_line = output.decode("utf-8")
|
|
254
|
+
lines_decoded.append(decoded_line)
|
|
255
|
+
except (ValueError, TypeError, UnicodeDecodeError) as e:
|
|
256
|
+
raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], self)
|
|
257
|
+
|
|
258
|
+
# Update sentences to be the list of decoded strings
|
|
259
|
+
if len(lines_decoded) == 1:
|
|
260
|
+
sentences = lines_decoded[0]
|
|
261
|
+
else:
|
|
262
|
+
sentences = lines_decoded
|
|
263
|
+
return sentences
|
|
264
|
+
|
|
265
|
+
def create_embedding(
|
|
266
|
+
self,
|
|
267
|
+
sentences: Union[str, List[str]],
|
|
268
|
+
**kwargs,
|
|
269
|
+
):
|
|
270
|
+
sentences = self._fix_langchain_openai_inputs(sentences)
|
|
271
|
+
|
|
206
272
|
from sentence_transformers import SentenceTransformer
|
|
207
273
|
|
|
208
274
|
kwargs.setdefault("normalize_embeddings", True)
|
|
209
275
|
|
|
276
|
+
try:
|
|
277
|
+
from FlagEmbedding import BGEM3FlagModel
|
|
278
|
+
|
|
279
|
+
@no_type_check
|
|
280
|
+
def _encode_bgem3(
|
|
281
|
+
model: Union[SentenceTransformer, BGEM3FlagModel],
|
|
282
|
+
sentences: Union[str, List[str]],
|
|
283
|
+
batch_size: int = 32,
|
|
284
|
+
show_progress_bar: bool = None,
|
|
285
|
+
output_value: str = "sparse_embedding",
|
|
286
|
+
convert_to_numpy: bool = True,
|
|
287
|
+
convert_to_tensor: bool = False,
|
|
288
|
+
device: str = None,
|
|
289
|
+
normalize_embeddings: bool = False,
|
|
290
|
+
**kwargs,
|
|
291
|
+
):
|
|
292
|
+
"""
|
|
293
|
+
Computes sentence embeddings with bge-m3 model
|
|
294
|
+
Nothing special here, just replace sentence-transformer with FlagEmbedding
|
|
295
|
+
TODO: think about how to solve the redundant code of encode method in the future
|
|
296
|
+
|
|
297
|
+
:param sentences: the sentences to embed
|
|
298
|
+
:param batch_size: the batch size used for the computation
|
|
299
|
+
:param show_progress_bar: Output a progress bar when encode sentences
|
|
300
|
+
:param output_value: Default sentence_embedding, to get sentence embeddings. Can be set to token_embeddings to get wordpiece token embeddings. Set to None, to get all output values
|
|
301
|
+
:param convert_to_numpy: If true, the output is a list of numpy vectors. Else, it is a list of pytorch tensors.
|
|
302
|
+
:param convert_to_tensor: If true, you get one large tensor as return. Overwrites any setting from convert_to_numpy
|
|
303
|
+
:param device: Which torch.device to use for the computation
|
|
304
|
+
:param normalize_embeddings: If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
|
|
305
|
+
|
|
306
|
+
:return:
|
|
307
|
+
By default, a list of tensors is returned. If convert_to_tensor, a stacked tensor is returned. If convert_to_numpy, a numpy matrix is returned.
|
|
308
|
+
"""
|
|
309
|
+
import torch
|
|
310
|
+
from tqdm.autonotebook import trange
|
|
311
|
+
|
|
312
|
+
if show_progress_bar is None:
|
|
313
|
+
show_progress_bar = (
|
|
314
|
+
logger.getEffectiveLevel() == logging.INFO
|
|
315
|
+
or logger.getEffectiveLevel() == logging.DEBUG
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
if convert_to_tensor:
|
|
319
|
+
convert_to_numpy = False
|
|
320
|
+
|
|
321
|
+
if output_value != "sparse_embedding":
|
|
322
|
+
convert_to_tensor = False
|
|
323
|
+
convert_to_numpy = False
|
|
324
|
+
|
|
325
|
+
input_was_string = False
|
|
326
|
+
if isinstance(sentences, str) or not hasattr(
|
|
327
|
+
sentences, "__len__"
|
|
328
|
+
): # Cast an individual sentence to a list with length 1
|
|
329
|
+
sentences = [sentences]
|
|
330
|
+
input_was_string = True
|
|
331
|
+
|
|
332
|
+
if device is None:
|
|
333
|
+
# Same as SentenceTransformer.py
|
|
334
|
+
from sentence_transformers.util import get_device_name
|
|
335
|
+
|
|
336
|
+
device = get_device_name()
|
|
337
|
+
logger.info(f"Use pytorch device_name: {device}")
|
|
338
|
+
|
|
339
|
+
all_embeddings = []
|
|
340
|
+
all_token_nums = 0
|
|
341
|
+
|
|
342
|
+
# The original code does not support other inference engines
|
|
343
|
+
def _text_length(text):
|
|
344
|
+
if isinstance(text, dict): # {key: value} case
|
|
345
|
+
return len(next(iter(text.values())))
|
|
346
|
+
elif not hasattr(text, "__len__"): # Object has no len() method
|
|
347
|
+
return 1
|
|
348
|
+
elif len(text) == 0 or isinstance(
|
|
349
|
+
text[0], int
|
|
350
|
+
): # Empty string or list of ints
|
|
351
|
+
return len(text)
|
|
352
|
+
else:
|
|
353
|
+
return sum(
|
|
354
|
+
[len(t) for t in text]
|
|
355
|
+
) # Sum of length of individual strings
|
|
356
|
+
|
|
357
|
+
length_sorted_idx = np.argsort(
|
|
358
|
+
[-_text_length(sen) for sen in sentences]
|
|
359
|
+
)
|
|
360
|
+
sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
|
|
361
|
+
|
|
362
|
+
for start_index in trange(
|
|
363
|
+
0,
|
|
364
|
+
len(sentences),
|
|
365
|
+
batch_size,
|
|
366
|
+
desc="Batches",
|
|
367
|
+
disable=not show_progress_bar,
|
|
368
|
+
):
|
|
369
|
+
sentences_batch = sentences_sorted[
|
|
370
|
+
start_index : start_index + batch_size
|
|
371
|
+
]
|
|
372
|
+
|
|
373
|
+
with torch.no_grad():
|
|
374
|
+
out_features = model.encode(sentences_batch, **kwargs)
|
|
375
|
+
|
|
376
|
+
if output_value == "token_embeddings":
|
|
377
|
+
embeddings = []
|
|
378
|
+
for token_emb, attention in zip(
|
|
379
|
+
out_features[output_value],
|
|
380
|
+
out_features["attention_mask"],
|
|
381
|
+
):
|
|
382
|
+
last_mask_id = len(attention) - 1
|
|
383
|
+
while (
|
|
384
|
+
last_mask_id > 0
|
|
385
|
+
and attention[last_mask_id].item() == 0
|
|
386
|
+
):
|
|
387
|
+
last_mask_id -= 1
|
|
388
|
+
|
|
389
|
+
embeddings.append(token_emb[0 : last_mask_id + 1])
|
|
390
|
+
elif output_value is None: # Return all outputs
|
|
391
|
+
embeddings = []
|
|
392
|
+
for sent_idx in range(
|
|
393
|
+
len(out_features["sentence_embedding"])
|
|
394
|
+
):
|
|
395
|
+
row = {
|
|
396
|
+
name: out_features[name][sent_idx]
|
|
397
|
+
for name in out_features
|
|
398
|
+
}
|
|
399
|
+
embeddings.append(row)
|
|
400
|
+
# for sparse embedding
|
|
401
|
+
else:
|
|
402
|
+
if kwargs.get("return_sparse"):
|
|
403
|
+
embeddings = out_features["lexical_weights"]
|
|
404
|
+
else:
|
|
405
|
+
embeddings = out_features["dense_vecs"]
|
|
406
|
+
|
|
407
|
+
if convert_to_numpy:
|
|
408
|
+
embeddings = embeddings.cpu()
|
|
409
|
+
|
|
410
|
+
all_embeddings.extend(embeddings)
|
|
411
|
+
|
|
412
|
+
all_embeddings = [
|
|
413
|
+
all_embeddings[idx] for idx in np.argsort(length_sorted_idx)
|
|
414
|
+
]
|
|
415
|
+
|
|
416
|
+
if convert_to_tensor:
|
|
417
|
+
if len(all_embeddings):
|
|
418
|
+
all_embeddings = torch.stack(all_embeddings)
|
|
419
|
+
else:
|
|
420
|
+
all_embeddings = torch.Tensor()
|
|
421
|
+
elif convert_to_numpy:
|
|
422
|
+
all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
|
|
423
|
+
|
|
424
|
+
if input_was_string:
|
|
425
|
+
all_embeddings = all_embeddings[0]
|
|
426
|
+
|
|
427
|
+
return all_embeddings, all_token_nums
|
|
428
|
+
|
|
429
|
+
except ImportError:
|
|
430
|
+
_encode_bgem3 = None
|
|
431
|
+
|
|
210
432
|
# copied from sentence-transformers, and modify it to return tokens num
|
|
211
433
|
@no_type_check
|
|
212
434
|
def encode(
|
|
@@ -323,7 +545,11 @@ class EmbeddingModel:
|
|
|
323
545
|
features.update(extra_features)
|
|
324
546
|
# when batching, the attention mask 1 means there is a token
|
|
325
547
|
# thus we just sum up it to get the total number of tokens
|
|
326
|
-
|
|
548
|
+
if "clip" in self._model_spec.model_name.lower():
|
|
549
|
+
all_token_nums += features["input_ids"].numel()
|
|
550
|
+
all_token_nums += features["pixel_values"].numel()
|
|
551
|
+
else:
|
|
552
|
+
all_token_nums += features["attention_mask"].sum().item()
|
|
327
553
|
|
|
328
554
|
with torch.no_grad():
|
|
329
555
|
out_features = model.forward(features, **kwargs)
|
|
@@ -379,6 +605,10 @@ class EmbeddingModel:
|
|
|
379
605
|
|
|
380
606
|
return all_embeddings, all_token_nums
|
|
381
607
|
|
|
608
|
+
is_bge_m3_flag_model = (
|
|
609
|
+
self._kwargs.get("hybrid_mode")
|
|
610
|
+
and "m3" in self._model_spec.model_name.lower()
|
|
611
|
+
)
|
|
382
612
|
if (
|
|
383
613
|
"gte" in self._model_spec.model_name.lower()
|
|
384
614
|
and "qwen2" in self._model_spec.model_name.lower()
|
|
@@ -390,6 +620,45 @@ class EmbeddingModel:
|
|
|
390
620
|
convert_to_numpy=False,
|
|
391
621
|
**kwargs,
|
|
392
622
|
)
|
|
623
|
+
elif is_bge_m3_flag_model:
|
|
624
|
+
assert _encode_bgem3 is not None
|
|
625
|
+
all_embeddings, all_token_nums = _encode_bgem3(
|
|
626
|
+
self._model, sentences, convert_to_numpy=False, **kwargs
|
|
627
|
+
)
|
|
628
|
+
elif "clip" in self._model_spec.model_name.lower():
|
|
629
|
+
import base64
|
|
630
|
+
import re
|
|
631
|
+
from io import BytesIO
|
|
632
|
+
|
|
633
|
+
from PIL import Image
|
|
634
|
+
|
|
635
|
+
def base64_to_image(base64_str: str) -> Image.Image:
|
|
636
|
+
# base64_data = re.sub("^data:image/.+;base64,", "", base64_str)
|
|
637
|
+
base64_data = base64_str.split(",", 1)[1]
|
|
638
|
+
byte_data = base64.b64decode(base64_data)
|
|
639
|
+
image_data = BytesIO(byte_data)
|
|
640
|
+
img = Image.open(image_data)
|
|
641
|
+
return img
|
|
642
|
+
|
|
643
|
+
objs: list[dict[str, str]] = []
|
|
644
|
+
for item in sentences:
|
|
645
|
+
if isinstance(item, dict):
|
|
646
|
+
if item.get("text") is not None:
|
|
647
|
+
objs.append(item["text"])
|
|
648
|
+
elif item.get("image") is not None:
|
|
649
|
+
if re.match(r"^data:image/.+;base64,", item["image"]):
|
|
650
|
+
image = base64_to_image(item["image"])
|
|
651
|
+
objs.append(image)
|
|
652
|
+
else:
|
|
653
|
+
objs.append(item["image"])
|
|
654
|
+
else:
|
|
655
|
+
logger.error("Please check the input data.")
|
|
656
|
+
all_embeddings, all_token_nums = encode(
|
|
657
|
+
self._model,
|
|
658
|
+
objs,
|
|
659
|
+
convert_to_numpy=False,
|
|
660
|
+
**self._kwargs,
|
|
661
|
+
)
|
|
393
662
|
else:
|
|
394
663
|
all_embeddings, all_token_nums = encode(
|
|
395
664
|
self._model,
|
|
@@ -401,14 +670,29 @@ class EmbeddingModel:
|
|
|
401
670
|
all_embeddings = [all_embeddings]
|
|
402
671
|
embedding_list = []
|
|
403
672
|
for index, data in enumerate(all_embeddings):
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
673
|
+
if kwargs.get("return_sparse") and is_bge_m3_flag_model:
|
|
674
|
+
embedding_list.append(
|
|
675
|
+
EmbeddingData(
|
|
676
|
+
index=index,
|
|
677
|
+
object="embedding",
|
|
678
|
+
embedding={k: float(v) for k, v in data.items()},
|
|
679
|
+
)
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
embedding_list.append(
|
|
683
|
+
EmbeddingData(
|
|
684
|
+
index=index, object="embedding", embedding=data.tolist()
|
|
685
|
+
)
|
|
686
|
+
)
|
|
407
687
|
usage = EmbeddingUsage(
|
|
408
688
|
prompt_tokens=all_token_nums, total_tokens=all_token_nums
|
|
409
689
|
)
|
|
410
690
|
result = Embedding(
|
|
411
|
-
object=
|
|
691
|
+
object=(
|
|
692
|
+
"list" # type: ignore
|
|
693
|
+
if not is_bge_m3_flag_model and not kwargs.get("return_sparse")
|
|
694
|
+
else "dict"
|
|
695
|
+
),
|
|
412
696
|
model=self._model_uid,
|
|
413
697
|
data=embedding_list,
|
|
414
698
|
usage=usage,
|
|
@@ -430,6 +714,38 @@ class EmbeddingModel:
|
|
|
430
714
|
|
|
431
715
|
return result
|
|
432
716
|
|
|
717
|
+
def convert_ids_to_tokens(
|
|
718
|
+
self,
|
|
719
|
+
batch_token_ids: Union[List[Union[int, str]], List[List[Union[int, str]]]],
|
|
720
|
+
**kwargs,
|
|
721
|
+
) -> Union[List[str]]:
|
|
722
|
+
batch_decoded_texts: List[str] = []
|
|
723
|
+
|
|
724
|
+
assert self._model is not None
|
|
725
|
+
|
|
726
|
+
if isinstance(batch_token_ids, (int, str)):
|
|
727
|
+
return self._model.tokenizer.convert_ids_to_tokens(
|
|
728
|
+
[int(str(batch_token_ids))]
|
|
729
|
+
)[0]
|
|
730
|
+
|
|
731
|
+
# check if it's a nested list
|
|
732
|
+
if (
|
|
733
|
+
isinstance(batch_token_ids, list)
|
|
734
|
+
and batch_token_ids
|
|
735
|
+
and isinstance(batch_token_ids[0], list)
|
|
736
|
+
):
|
|
737
|
+
for token_ids in batch_token_ids:
|
|
738
|
+
token_ids = [int(token_id) for token_id in token_ids]
|
|
739
|
+
batch_decoded_texts.append(
|
|
740
|
+
self._model.tokenizer.convert_ids_to_tokens(token_ids)
|
|
741
|
+
)
|
|
742
|
+
else:
|
|
743
|
+
batch_token_ids = [int(token_id) for token_id in batch_token_ids]
|
|
744
|
+
batch_decoded_texts = self._model.tokenizer.convert_ids_to_tokens(
|
|
745
|
+
batch_token_ids
|
|
746
|
+
)
|
|
747
|
+
return batch_decoded_texts
|
|
748
|
+
|
|
433
749
|
|
|
434
750
|
def match_embedding(
|
|
435
751
|
model_name: str,
|
|
@@ -233,7 +233,7 @@
|
|
|
233
233
|
},
|
|
234
234
|
{
|
|
235
235
|
"model_name": "gte-Qwen2",
|
|
236
|
-
"dimensions":
|
|
236
|
+
"dimensions": 3584,
|
|
237
237
|
"max_tokens": 32000,
|
|
238
238
|
"language": ["zh", "en"],
|
|
239
239
|
"model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
|
|
@@ -245,5 +245,12 @@
|
|
|
245
245
|
"max_tokens": 8192,
|
|
246
246
|
"language": ["zh", "en"],
|
|
247
247
|
"model_id": "jinaai/jina-embeddings-v3"
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
"model_name": "jina-clip-v2",
|
|
251
|
+
"dimensions": 1024,
|
|
252
|
+
"max_tokens": 8192,
|
|
253
|
+
"language": ["89 languages supported"],
|
|
254
|
+
"model_id": "jinaai/jina-clip-v2"
|
|
248
255
|
}
|
|
249
256
|
]
|
|
@@ -235,7 +235,7 @@
|
|
|
235
235
|
},
|
|
236
236
|
{
|
|
237
237
|
"model_name": "gte-Qwen2",
|
|
238
|
-
"dimensions":
|
|
238
|
+
"dimensions": 3584,
|
|
239
239
|
"max_tokens": 32000,
|
|
240
240
|
"language": ["zh", "en"],
|
|
241
241
|
"model_id": "iic/gte_Qwen2-7B-instruct",
|
|
@@ -248,5 +248,13 @@
|
|
|
248
248
|
"language": ["zh", "en"],
|
|
249
249
|
"model_id": "jinaai/jina-embeddings-v3",
|
|
250
250
|
"model_hub": "modelscope"
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
"model_name": "jina-clip-v2",
|
|
254
|
+
"dimensions": 1024,
|
|
255
|
+
"max_tokens": 8192,
|
|
256
|
+
"language": ["89 languages supported"],
|
|
257
|
+
"model_id": "jinaai/jina-clip-v2",
|
|
258
|
+
"model_hub": "modelscope"
|
|
251
259
|
}
|
|
252
260
|
]
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -131,7 +131,7 @@ def register_custom_model():
|
|
|
131
131
|
def _install():
|
|
132
132
|
from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
|
|
133
133
|
from .lmdeploy.core import LMDeployChatModel, LMDeployModel
|
|
134
|
-
from .mlx.core import MLXChatModel, MLXModel
|
|
134
|
+
from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
|
|
135
135
|
from .sglang.core import SGLANGChatModel, SGLANGModel
|
|
136
136
|
from .transformers.chatglm import ChatglmPytorchChatModel
|
|
137
137
|
from .transformers.cogvlm2 import CogVLM2Model
|
|
@@ -143,6 +143,7 @@ def _install():
|
|
|
143
143
|
)
|
|
144
144
|
from .transformers.deepseek_vl import DeepSeekVLChatModel
|
|
145
145
|
from .transformers.glm4v import Glm4VModel
|
|
146
|
+
from .transformers.glm_edge_v import GlmEdgeVModel
|
|
146
147
|
from .transformers.intern_vl import InternVLChatModel
|
|
147
148
|
from .transformers.internlm2 import Internlm2PytorchChatModel
|
|
148
149
|
from .transformers.minicpmv25 import MiniCPMV25Model
|
|
@@ -171,7 +172,7 @@ def _install():
|
|
|
171
172
|
)
|
|
172
173
|
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
|
|
173
174
|
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
|
|
174
|
-
MLX_CLASSES.extend([MLXModel, MLXChatModel])
|
|
175
|
+
MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
|
|
175
176
|
LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
|
|
176
177
|
TRANSFORMERS_CLASSES.extend(
|
|
177
178
|
[
|
|
@@ -193,6 +194,7 @@ def _install():
|
|
|
193
194
|
DeepSeekV2PytorchModel,
|
|
194
195
|
DeepSeekV2PytorchChatModel,
|
|
195
196
|
OptPytorchModel,
|
|
197
|
+
GlmEdgeVModel,
|
|
196
198
|
]
|
|
197
199
|
)
|
|
198
200
|
if OmniLMMModel: # type: ignore
|