xinference 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/client/restful/restful_client.py +1 -1
- xinference/conftest.py +0 -7
- xinference/core/media_interface.py +9 -8
- xinference/core/model.py +13 -6
- xinference/core/scheduler.py +1 -10
- xinference/core/worker.py +0 -10
- xinference/model/audio/model_spec.json +53 -1
- xinference/model/audio/model_spec_modelscope.json +57 -1
- xinference/model/embedding/core.py +19 -11
- xinference/model/image/model_spec.json +10 -1
- xinference/model/image/model_spec_modelscope.json +20 -0
- xinference/model/llm/__init__.py +6 -54
- xinference/model/llm/core.py +19 -5
- xinference/model/llm/llama_cpp/core.py +59 -3
- xinference/model/llm/llama_cpp/memory.py +455 -0
- xinference/model/llm/llm_family.json +185 -397
- xinference/model/llm/llm_family.py +88 -16
- xinference/model/llm/llm_family_modelscope.json +199 -421
- xinference/model/llm/llm_family_openmind_hub.json +0 -34
- xinference/model/llm/sglang/core.py +4 -0
- xinference/model/llm/transformers/__init__.py +27 -6
- xinference/model/llm/transformers/chatglm.py +4 -2
- xinference/model/llm/transformers/core.py +49 -28
- xinference/model/llm/transformers/deepseek_v2.py +6 -49
- xinference/model/llm/transformers/gemma3.py +119 -164
- xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
- xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
- xinference/model/llm/transformers/multimodal/core.py +205 -0
- xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
- xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
- xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
- xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
- xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
- xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
- xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
- xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
- xinference/model/llm/transformers/opt.py +4 -2
- xinference/model/llm/transformers/utils.py +6 -37
- xinference/model/llm/vllm/core.py +4 -0
- xinference/model/rerank/core.py +7 -1
- xinference/model/rerank/utils.py +17 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
- xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
- xinference/web/ui/src/locales/en.json +3 -1
- xinference/web/ui/src/locales/zh.json +3 -1
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/METADATA +16 -14
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/cogvlm2.py +0 -442
- xinference/model/llm/transformers/cogvlm2_video.py +0 -333
- xinference/model/llm/transformers/deepseek_vl.py +0 -280
- xinference/model/llm/transformers/glm_edge_v.py +0 -213
- xinference/model/llm/transformers/intern_vl.py +0 -526
- xinference/model/llm/transformers/internlm2.py +0 -94
- xinference/model/llm/transformers/minicpmv25.py +0 -193
- xinference/model/llm/transformers/omnilmm.py +0 -132
- xinference/model/llm/transformers/qwen2_audio.py +0 -179
- xinference/model/llm/transformers/qwen_vl.py +0 -360
- xinference/thirdparty/omnilmm/LICENSE +0 -201
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +0 -218
- xinference/thirdparty/omnilmm/constants.py +0 -4
- xinference/thirdparty/omnilmm/conversation.py +0 -332
- xinference/thirdparty/omnilmm/model/__init__.py +0 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
- xinference/thirdparty/omnilmm/model/resampler.py +0 -166
- xinference/thirdparty/omnilmm/model/utils.py +0 -578
- xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
- xinference/thirdparty/omnilmm/utils.py +0 -134
- xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
- xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
- /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import warnings
|
|
3
|
-
from typing import Dict, Sequence
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import transformers
|
|
7
|
-
|
|
8
|
-
IGNORE_INDEX = -100
|
|
9
|
-
DEFAULT_IMAGE_TOKEN = "<image>"
|
|
10
|
-
DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
|
|
11
|
-
DEFAULT_IM_START_TOKEN = "<im_start>"
|
|
12
|
-
DEFAULT_IM_END_TOKEN = "<im_end>"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def _tokenize_fn(
|
|
16
|
-
strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer
|
|
17
|
-
) -> Dict:
|
|
18
|
-
"""Tokenize a list of strings."""
|
|
19
|
-
tokenized_list = [
|
|
20
|
-
tokenizer(
|
|
21
|
-
text,
|
|
22
|
-
return_tensors="pt",
|
|
23
|
-
padding="longest",
|
|
24
|
-
max_length=tokenizer.model_max_length,
|
|
25
|
-
truncation=True,
|
|
26
|
-
)
|
|
27
|
-
for text in strings
|
|
28
|
-
]
|
|
29
|
-
input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
|
|
30
|
-
input_ids_lens = labels_lens = [
|
|
31
|
-
tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item()
|
|
32
|
-
for tokenized in tokenized_list
|
|
33
|
-
]
|
|
34
|
-
return dict(
|
|
35
|
-
input_ids=input_ids,
|
|
36
|
-
labels=labels,
|
|
37
|
-
input_ids_lens=input_ids_lens,
|
|
38
|
-
labels_lens=labels_lens,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def omni_preprocess(
|
|
43
|
-
sources, tokenizer: transformers.PreTrainedTokenizer, generation=False
|
|
44
|
-
):
|
|
45
|
-
system_content = "You are an artificial intelligence assistant, which gives helpful, detailed, and polite answers to the human's questions."
|
|
46
|
-
ignore_index = -100
|
|
47
|
-
|
|
48
|
-
response_template = "\n<|assistant|>\n"
|
|
49
|
-
instruction_template = "\n<|user|>\n"
|
|
50
|
-
response_token_ids = tokenizer.encode(response_template, add_special_tokens=False)
|
|
51
|
-
instruction_token_ids = tokenizer.encode(
|
|
52
|
-
instruction_template, add_special_tokens=False
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
batch_input_ids = []
|
|
56
|
-
batch_labels = []
|
|
57
|
-
for i in range(len(sources)):
|
|
58
|
-
new_source = []
|
|
59
|
-
prev_role = "unexpect"
|
|
60
|
-
for conv_turn in sources[i]:
|
|
61
|
-
role = conv_turn["from"] if "from" in conv_turn else conv_turn["role"]
|
|
62
|
-
content = (
|
|
63
|
-
conv_turn["value"] if "value" in conv_turn else conv_turn["content"]
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
role = "user" if role == "human" else role
|
|
67
|
-
role = "assistant" if role == "gpt" else role
|
|
68
|
-
|
|
69
|
-
assert role in ["user", "assistant"]
|
|
70
|
-
assert role != prev_role, f"role={role}, prev_role={prev_role}"
|
|
71
|
-
prev_role = role
|
|
72
|
-
|
|
73
|
-
new_turn = {"role": role, "content": content}
|
|
74
|
-
new_source.append(new_turn)
|
|
75
|
-
if new_source[0]["role"] != "system":
|
|
76
|
-
new_source.insert(0, {"role": "system", "content": system_content})
|
|
77
|
-
|
|
78
|
-
# TODO: this automatically add '\n' to the end
|
|
79
|
-
res_text = tokenizer.apply_chat_template(
|
|
80
|
-
new_source, tokenize=False, add_generation_prompt=generation
|
|
81
|
-
)
|
|
82
|
-
if not generation:
|
|
83
|
-
res_text = res_text.strip()
|
|
84
|
-
|
|
85
|
-
conversations_tokenized = _tokenize_fn([res_text], tokenizer)
|
|
86
|
-
res_input_ids = conversations_tokenized["input_ids"][0]
|
|
87
|
-
|
|
88
|
-
# since labels and input_ids are reference towards the same object
|
|
89
|
-
res_labels = copy.deepcopy(conversations_tokenized["labels"][0])
|
|
90
|
-
|
|
91
|
-
response_token_ids_idxs = []
|
|
92
|
-
human_token_ids_idxs = []
|
|
93
|
-
|
|
94
|
-
for assistant_idx in np.where(res_labels == response_token_ids[0])[0]:
|
|
95
|
-
# find the indexes of the start of a response.
|
|
96
|
-
if (
|
|
97
|
-
response_token_ids
|
|
98
|
-
== res_labels[
|
|
99
|
-
assistant_idx : assistant_idx + len(response_token_ids)
|
|
100
|
-
].tolist()
|
|
101
|
-
):
|
|
102
|
-
response_token_ids_idxs.append(assistant_idx + len(response_token_ids))
|
|
103
|
-
|
|
104
|
-
if len(response_token_ids_idxs) == 0:
|
|
105
|
-
warnings.warn(
|
|
106
|
-
f"Could not find response key `{response_template}` in the "
|
|
107
|
-
f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
|
|
108
|
-
f"Raw text is @===>{res_text}<===@"
|
|
109
|
-
f"Raw source is @===>{new_source}<===@"
|
|
110
|
-
f"This instance will be ignored in loss calculation. "
|
|
111
|
-
f"Note, if this happens often, consider increasing the `max_seq_length`."
|
|
112
|
-
)
|
|
113
|
-
res_labels[:] = ignore_index
|
|
114
|
-
|
|
115
|
-
human_token_ids = instruction_token_ids
|
|
116
|
-
for human_idx in np.where(res_labels == human_token_ids[0])[0]:
|
|
117
|
-
# find the indexes of the start of a human answer.
|
|
118
|
-
if (
|
|
119
|
-
human_token_ids
|
|
120
|
-
== res_labels[human_idx : human_idx + len(human_token_ids)].tolist()
|
|
121
|
-
):
|
|
122
|
-
human_token_ids_idxs.append(human_idx)
|
|
123
|
-
|
|
124
|
-
if len(human_token_ids_idxs) == 0:
|
|
125
|
-
warnings.warn(
|
|
126
|
-
f"Could not find instruction key `{instruction_template}` in the "
|
|
127
|
-
f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
|
|
128
|
-
f"Raw text is @===>{res_text}<===@"
|
|
129
|
-
f"Raw source is @===>{new_source}<===@"
|
|
130
|
-
f"This instance will be ignored in loss calculation. "
|
|
131
|
-
f"Note, if this happens often, consider increasing the `max_seq_length`."
|
|
132
|
-
)
|
|
133
|
-
res_labels[:] = ignore_index
|
|
134
|
-
|
|
135
|
-
for idx, (start, end) in enumerate(
|
|
136
|
-
zip(human_token_ids_idxs, response_token_ids_idxs)
|
|
137
|
-
):
|
|
138
|
-
# Make pytorch loss function ignore all non response tokens
|
|
139
|
-
if idx != 0:
|
|
140
|
-
res_labels[start:end] = ignore_index
|
|
141
|
-
else:
|
|
142
|
-
res_labels[:end] = ignore_index
|
|
143
|
-
|
|
144
|
-
if len(response_token_ids_idxs) < len(human_token_ids_idxs):
|
|
145
|
-
res_labels[human_token_ids_idxs[-1] :] = ignore_index
|
|
146
|
-
|
|
147
|
-
batch_input_ids.append(res_input_ids)
|
|
148
|
-
batch_labels.append(res_labels)
|
|
149
|
-
|
|
150
|
-
return dict(input_ids=batch_input_ids, labels=batch_labels)
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import logging.handlers
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
|
|
6
|
-
import requests
|
|
7
|
-
|
|
8
|
-
from .constants import LOGDIR
|
|
9
|
-
|
|
10
|
-
server_error_msg = (
|
|
11
|
-
"**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
|
|
12
|
-
)
|
|
13
|
-
moderation_msg = (
|
|
14
|
-
"YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
handler = None
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def build_logger(logger_name, logger_filename):
|
|
21
|
-
global handler
|
|
22
|
-
|
|
23
|
-
formatter = logging.Formatter(
|
|
24
|
-
fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
|
25
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
# Set the format of root handlers
|
|
29
|
-
if not logging.getLogger().handlers:
|
|
30
|
-
logging.basicConfig(level=logging.INFO)
|
|
31
|
-
logging.getLogger().handlers[0].setFormatter(formatter)
|
|
32
|
-
|
|
33
|
-
# Redirect stdout and stderr to loggers
|
|
34
|
-
stdout_logger = logging.getLogger("stdout")
|
|
35
|
-
stdout_logger.setLevel(logging.INFO)
|
|
36
|
-
sl = StreamToLogger(stdout_logger, logging.INFO)
|
|
37
|
-
sys.stdout = sl
|
|
38
|
-
|
|
39
|
-
stderr_logger = logging.getLogger("stderr")
|
|
40
|
-
stderr_logger.setLevel(logging.ERROR)
|
|
41
|
-
sl = StreamToLogger(stderr_logger, logging.ERROR)
|
|
42
|
-
sys.stderr = sl
|
|
43
|
-
|
|
44
|
-
# Get logger
|
|
45
|
-
logger = logging.getLogger(logger_name)
|
|
46
|
-
logger.setLevel(logging.INFO)
|
|
47
|
-
|
|
48
|
-
# Add a file handler for all loggers
|
|
49
|
-
if handler is None:
|
|
50
|
-
os.makedirs(LOGDIR, exist_ok=True)
|
|
51
|
-
filename = os.path.join(LOGDIR, logger_filename)
|
|
52
|
-
handler = logging.handlers.TimedRotatingFileHandler(
|
|
53
|
-
filename, when="D", utc=True
|
|
54
|
-
)
|
|
55
|
-
handler.setFormatter(formatter)
|
|
56
|
-
|
|
57
|
-
for name, item in logging.root.manager.loggerDict.items():
|
|
58
|
-
if isinstance(item, logging.Logger):
|
|
59
|
-
item.addHandler(handler)
|
|
60
|
-
|
|
61
|
-
return logger
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class StreamToLogger(object):
|
|
65
|
-
"""
|
|
66
|
-
Fake file-like stream object that redirects writes to a logger instance.
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
def __init__(self, logger, log_level=logging.INFO):
|
|
70
|
-
self.terminal = sys.stdout
|
|
71
|
-
self.logger = logger
|
|
72
|
-
self.log_level = log_level
|
|
73
|
-
self.linebuf = ""
|
|
74
|
-
|
|
75
|
-
def __getattr__(self, attr):
|
|
76
|
-
return getattr(self.terminal, attr)
|
|
77
|
-
|
|
78
|
-
def write(self, buf):
|
|
79
|
-
temp_linebuf = self.linebuf + buf
|
|
80
|
-
self.linebuf = ""
|
|
81
|
-
for line in temp_linebuf.splitlines(True):
|
|
82
|
-
# From the io.TextIOWrapper docs:
|
|
83
|
-
# On output, if newline is None, any '\n' characters written
|
|
84
|
-
# are translated to the system default line separator.
|
|
85
|
-
# By default sys.stdout.write() expects '\n' newlines and then
|
|
86
|
-
# translates them so this is still cross platform.
|
|
87
|
-
if line[-1] == "\n":
|
|
88
|
-
self.logger.log(self.log_level, line.rstrip())
|
|
89
|
-
else:
|
|
90
|
-
self.linebuf += line
|
|
91
|
-
|
|
92
|
-
def flush(self):
|
|
93
|
-
if self.linebuf != "":
|
|
94
|
-
self.logger.log(self.log_level, self.linebuf.rstrip())
|
|
95
|
-
self.linebuf = ""
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def disable_torch_init():
|
|
99
|
-
"""
|
|
100
|
-
Disable the redundant torch default initialization to accelerate model creation.
|
|
101
|
-
"""
|
|
102
|
-
import torch
|
|
103
|
-
|
|
104
|
-
setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
|
|
105
|
-
setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def violates_moderation(text):
|
|
109
|
-
"""
|
|
110
|
-
Check whether the text violates OpenAI moderation API.
|
|
111
|
-
"""
|
|
112
|
-
url = "https://api.openai.com/v1/moderations"
|
|
113
|
-
headers = {
|
|
114
|
-
"Content-Type": "application/json",
|
|
115
|
-
"Authorization": "Bearer " + os.environ["OPENAI_API_KEY"],
|
|
116
|
-
}
|
|
117
|
-
text = text.replace("\n", "")
|
|
118
|
-
data = "{" + '"input": ' + f'"{text}"' + "}"
|
|
119
|
-
data = data.encode("utf-8")
|
|
120
|
-
try:
|
|
121
|
-
ret = requests.post(url, headers=headers, data=data, timeout=5)
|
|
122
|
-
flagged = ret.json()["results"][0]["flagged"]
|
|
123
|
-
except requests.exceptions.RequestException:
|
|
124
|
-
flagged = False
|
|
125
|
-
except KeyError:
|
|
126
|
-
flagged = False
|
|
127
|
-
|
|
128
|
-
return flagged
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def pretty_print_semaphore(semaphore):
|
|
132
|
-
if semaphore is None:
|
|
133
|
-
return "None"
|
|
134
|
-
return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})"
|