xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +47 -18
- xinference/api/oauth2/types.py +1 -0
- xinference/api/restful_api.py +34 -7
- xinference/client/oscar/actor_client.py +4 -3
- xinference/client/restful/restful_client.py +20 -4
- xinference/conftest.py +13 -2
- xinference/core/supervisor.py +48 -1
- xinference/core/worker.py +139 -20
- xinference/deploy/cmdline.py +119 -20
- xinference/model/embedding/core.py +1 -2
- xinference/model/llm/__init__.py +4 -6
- xinference/model/llm/ggml/llamacpp.py +2 -10
- xinference/model/llm/llm_family.json +877 -13
- xinference/model/llm/llm_family.py +15 -0
- xinference/model/llm/llm_family_modelscope.json +571 -0
- xinference/model/llm/pytorch/chatglm.py +2 -0
- xinference/model/llm/pytorch/core.py +22 -26
- xinference/model/llm/pytorch/deepseek_vl.py +232 -0
- xinference/model/llm/pytorch/internlm2.py +2 -0
- xinference/model/llm/pytorch/omnilmm.py +153 -0
- xinference/model/llm/pytorch/qwen_vl.py +2 -0
- xinference/model/llm/pytorch/yi_vl.py +4 -2
- xinference/model/llm/utils.py +53 -5
- xinference/model/llm/vllm/core.py +54 -6
- xinference/model/rerank/core.py +3 -0
- xinference/thirdparty/deepseek_vl/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
- xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
- xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
- xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
- xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
- xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
- xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
- xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +216 -0
- xinference/thirdparty/omnilmm/constants.py +4 -0
- xinference/thirdparty/omnilmm/conversation.py +332 -0
- xinference/thirdparty/omnilmm/model/__init__.py +1 -0
- xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
- xinference/thirdparty/omnilmm/model/resampler.py +166 -0
- xinference/thirdparty/omnilmm/model/utils.py +563 -0
- xinference/thirdparty/omnilmm/train/__init__.py +13 -0
- xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
- xinference/thirdparty/omnilmm/utils.py +134 -0
- xinference/types.py +15 -19
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
- xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
- xinference/model/llm/ggml/ctransformers.py +0 -281
- xinference/model/llm/ggml/ctransformers_util.py +0 -161
- xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
- xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
- /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Dict, Sequence
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import transformers
|
|
7
|
+
|
|
8
|
+
IGNORE_INDEX = -100
|
|
9
|
+
DEFAULT_IMAGE_TOKEN = "<image>"
|
|
10
|
+
DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
|
|
11
|
+
DEFAULT_IM_START_TOKEN = "<im_start>"
|
|
12
|
+
DEFAULT_IM_END_TOKEN = "<im_end>"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _tokenize_fn(
|
|
16
|
+
strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer
|
|
17
|
+
) -> Dict:
|
|
18
|
+
"""Tokenize a list of strings."""
|
|
19
|
+
tokenized_list = [
|
|
20
|
+
tokenizer(
|
|
21
|
+
text,
|
|
22
|
+
return_tensors="pt",
|
|
23
|
+
padding="longest",
|
|
24
|
+
max_length=tokenizer.model_max_length,
|
|
25
|
+
truncation=True,
|
|
26
|
+
)
|
|
27
|
+
for text in strings
|
|
28
|
+
]
|
|
29
|
+
input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
|
|
30
|
+
input_ids_lens = labels_lens = [
|
|
31
|
+
tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item()
|
|
32
|
+
for tokenized in tokenized_list
|
|
33
|
+
]
|
|
34
|
+
return dict(
|
|
35
|
+
input_ids=input_ids,
|
|
36
|
+
labels=labels,
|
|
37
|
+
input_ids_lens=input_ids_lens,
|
|
38
|
+
labels_lens=labels_lens,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def omni_preprocess(
|
|
43
|
+
sources, tokenizer: transformers.PreTrainedTokenizer, generation=False
|
|
44
|
+
):
|
|
45
|
+
system_content = "You are an artificial intelligence assistant, which gives helpful, detailed, and polite answers to the human's questions."
|
|
46
|
+
ignore_index = -100
|
|
47
|
+
|
|
48
|
+
response_template = "\n<|assistant|>\n"
|
|
49
|
+
instruction_template = "\n<|user|>\n"
|
|
50
|
+
response_token_ids = tokenizer.encode(response_template, add_special_tokens=False)
|
|
51
|
+
instruction_token_ids = tokenizer.encode(
|
|
52
|
+
instruction_template, add_special_tokens=False
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
batch_input_ids = []
|
|
56
|
+
batch_labels = []
|
|
57
|
+
for i in range(len(sources)):
|
|
58
|
+
new_source = []
|
|
59
|
+
prev_role = "unexpect"
|
|
60
|
+
for conv_turn in sources[i]:
|
|
61
|
+
role = conv_turn["from"] if "from" in conv_turn else conv_turn["role"]
|
|
62
|
+
content = (
|
|
63
|
+
conv_turn["value"] if "value" in conv_turn else conv_turn["content"]
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
role = "user" if role == "human" else role
|
|
67
|
+
role = "assistant" if role == "gpt" else role
|
|
68
|
+
|
|
69
|
+
assert role in ["user", "assistant"]
|
|
70
|
+
assert role != prev_role, f"role={role}, prev_role={prev_role}"
|
|
71
|
+
prev_role = role
|
|
72
|
+
|
|
73
|
+
new_turn = {"role": role, "content": content}
|
|
74
|
+
new_source.append(new_turn)
|
|
75
|
+
if new_source[0]["role"] != "system":
|
|
76
|
+
new_source.insert(0, {"role": "system", "content": system_content})
|
|
77
|
+
|
|
78
|
+
# TODO: this automatically add '\n' to the end
|
|
79
|
+
res_text = tokenizer.apply_chat_template(
|
|
80
|
+
new_source, tokenize=False, add_generation_prompt=generation
|
|
81
|
+
)
|
|
82
|
+
if not generation:
|
|
83
|
+
res_text = res_text.strip()
|
|
84
|
+
|
|
85
|
+
conversations_tokenized = _tokenize_fn([res_text], tokenizer)
|
|
86
|
+
res_input_ids = conversations_tokenized["input_ids"][0]
|
|
87
|
+
|
|
88
|
+
# since labels and input_ids are reference towards the same object
|
|
89
|
+
res_labels = copy.deepcopy(conversations_tokenized["labels"][0])
|
|
90
|
+
|
|
91
|
+
response_token_ids_idxs = []
|
|
92
|
+
human_token_ids_idxs = []
|
|
93
|
+
|
|
94
|
+
for assistant_idx in np.where(res_labels == response_token_ids[0])[0]:
|
|
95
|
+
# find the indexes of the start of a response.
|
|
96
|
+
if (
|
|
97
|
+
response_token_ids
|
|
98
|
+
== res_labels[
|
|
99
|
+
assistant_idx : assistant_idx + len(response_token_ids)
|
|
100
|
+
].tolist()
|
|
101
|
+
):
|
|
102
|
+
response_token_ids_idxs.append(assistant_idx + len(response_token_ids))
|
|
103
|
+
|
|
104
|
+
if len(response_token_ids_idxs) == 0:
|
|
105
|
+
warnings.warn(
|
|
106
|
+
f"Could not find response key `{response_template}` in the "
|
|
107
|
+
f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
|
|
108
|
+
f"Raw text is @===>{res_text}<===@"
|
|
109
|
+
f"Raw source is @===>{new_source}<===@"
|
|
110
|
+
f"This instance will be ignored in loss calculation. "
|
|
111
|
+
f"Note, if this happens often, consider increasing the `max_seq_length`."
|
|
112
|
+
)
|
|
113
|
+
res_labels[:] = ignore_index
|
|
114
|
+
|
|
115
|
+
human_token_ids = instruction_token_ids
|
|
116
|
+
for human_idx in np.where(res_labels == human_token_ids[0])[0]:
|
|
117
|
+
# find the indexes of the start of a human answer.
|
|
118
|
+
if (
|
|
119
|
+
human_token_ids
|
|
120
|
+
== res_labels[human_idx : human_idx + len(human_token_ids)].tolist()
|
|
121
|
+
):
|
|
122
|
+
human_token_ids_idxs.append(human_idx)
|
|
123
|
+
|
|
124
|
+
if len(human_token_ids_idxs) == 0:
|
|
125
|
+
warnings.warn(
|
|
126
|
+
f"Could not find instruction key `{instruction_template}` in the "
|
|
127
|
+
f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
|
|
128
|
+
f"Raw text is @===>{res_text}<===@"
|
|
129
|
+
f"Raw source is @===>{new_source}<===@"
|
|
130
|
+
f"This instance will be ignored in loss calculation. "
|
|
131
|
+
f"Note, if this happens often, consider increasing the `max_seq_length`."
|
|
132
|
+
)
|
|
133
|
+
res_labels[:] = ignore_index
|
|
134
|
+
|
|
135
|
+
for idx, (start, end) in enumerate(
|
|
136
|
+
zip(human_token_ids_idxs, response_token_ids_idxs)
|
|
137
|
+
):
|
|
138
|
+
# Make pytorch loss function ignore all non response tokens
|
|
139
|
+
if idx != 0:
|
|
140
|
+
res_labels[start:end] = ignore_index
|
|
141
|
+
else:
|
|
142
|
+
res_labels[:end] = ignore_index
|
|
143
|
+
|
|
144
|
+
if len(response_token_ids_idxs) < len(human_token_ids_idxs):
|
|
145
|
+
res_labels[human_token_ids_idxs[-1] :] = ignore_index
|
|
146
|
+
|
|
147
|
+
batch_input_ids.append(res_input_ids)
|
|
148
|
+
batch_labels.append(res_labels)
|
|
149
|
+
|
|
150
|
+
return dict(input_ids=batch_input_ids, labels=batch_labels)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging.handlers
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from .constants import LOGDIR
|
|
9
|
+
|
|
10
|
+
server_error_msg = (
|
|
11
|
+
"**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
|
|
12
|
+
)
|
|
13
|
+
moderation_msg = (
|
|
14
|
+
"YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
handler = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def build_logger(logger_name, logger_filename):
|
|
21
|
+
global handler
|
|
22
|
+
|
|
23
|
+
formatter = logging.Formatter(
|
|
24
|
+
fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
|
25
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Set the format of root handlers
|
|
29
|
+
if not logging.getLogger().handlers:
|
|
30
|
+
logging.basicConfig(level=logging.INFO)
|
|
31
|
+
logging.getLogger().handlers[0].setFormatter(formatter)
|
|
32
|
+
|
|
33
|
+
# Redirect stdout and stderr to loggers
|
|
34
|
+
stdout_logger = logging.getLogger("stdout")
|
|
35
|
+
stdout_logger.setLevel(logging.INFO)
|
|
36
|
+
sl = StreamToLogger(stdout_logger, logging.INFO)
|
|
37
|
+
sys.stdout = sl
|
|
38
|
+
|
|
39
|
+
stderr_logger = logging.getLogger("stderr")
|
|
40
|
+
stderr_logger.setLevel(logging.ERROR)
|
|
41
|
+
sl = StreamToLogger(stderr_logger, logging.ERROR)
|
|
42
|
+
sys.stderr = sl
|
|
43
|
+
|
|
44
|
+
# Get logger
|
|
45
|
+
logger = logging.getLogger(logger_name)
|
|
46
|
+
logger.setLevel(logging.INFO)
|
|
47
|
+
|
|
48
|
+
# Add a file handler for all loggers
|
|
49
|
+
if handler is None:
|
|
50
|
+
os.makedirs(LOGDIR, exist_ok=True)
|
|
51
|
+
filename = os.path.join(LOGDIR, logger_filename)
|
|
52
|
+
handler = logging.handlers.TimedRotatingFileHandler(
|
|
53
|
+
filename, when="D", utc=True
|
|
54
|
+
)
|
|
55
|
+
handler.setFormatter(formatter)
|
|
56
|
+
|
|
57
|
+
for name, item in logging.root.manager.loggerDict.items():
|
|
58
|
+
if isinstance(item, logging.Logger):
|
|
59
|
+
item.addHandler(handler)
|
|
60
|
+
|
|
61
|
+
return logger
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class StreamToLogger(object):
|
|
65
|
+
"""
|
|
66
|
+
Fake file-like stream object that redirects writes to a logger instance.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, logger, log_level=logging.INFO):
|
|
70
|
+
self.terminal = sys.stdout
|
|
71
|
+
self.logger = logger
|
|
72
|
+
self.log_level = log_level
|
|
73
|
+
self.linebuf = ""
|
|
74
|
+
|
|
75
|
+
def __getattr__(self, attr):
|
|
76
|
+
return getattr(self.terminal, attr)
|
|
77
|
+
|
|
78
|
+
def write(self, buf):
|
|
79
|
+
temp_linebuf = self.linebuf + buf
|
|
80
|
+
self.linebuf = ""
|
|
81
|
+
for line in temp_linebuf.splitlines(True):
|
|
82
|
+
# From the io.TextIOWrapper docs:
|
|
83
|
+
# On output, if newline is None, any '\n' characters written
|
|
84
|
+
# are translated to the system default line separator.
|
|
85
|
+
# By default sys.stdout.write() expects '\n' newlines and then
|
|
86
|
+
# translates them so this is still cross platform.
|
|
87
|
+
if line[-1] == "\n":
|
|
88
|
+
self.logger.log(self.log_level, line.rstrip())
|
|
89
|
+
else:
|
|
90
|
+
self.linebuf += line
|
|
91
|
+
|
|
92
|
+
def flush(self):
|
|
93
|
+
if self.linebuf != "":
|
|
94
|
+
self.logger.log(self.log_level, self.linebuf.rstrip())
|
|
95
|
+
self.linebuf = ""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def disable_torch_init():
|
|
99
|
+
"""
|
|
100
|
+
Disable the redundant torch default initialization to accelerate model creation.
|
|
101
|
+
"""
|
|
102
|
+
import torch
|
|
103
|
+
|
|
104
|
+
setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
|
|
105
|
+
setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def violates_moderation(text):
|
|
109
|
+
"""
|
|
110
|
+
Check whether the text violates OpenAI moderation API.
|
|
111
|
+
"""
|
|
112
|
+
url = "https://api.openai.com/v1/moderations"
|
|
113
|
+
headers = {
|
|
114
|
+
"Content-Type": "application/json",
|
|
115
|
+
"Authorization": "Bearer " + os.environ["OPENAI_API_KEY"],
|
|
116
|
+
}
|
|
117
|
+
text = text.replace("\n", "")
|
|
118
|
+
data = "{" + '"input": ' + f'"{text}"' + "}"
|
|
119
|
+
data = data.encode("utf-8")
|
|
120
|
+
try:
|
|
121
|
+
ret = requests.post(url, headers=headers, data=data, timeout=5)
|
|
122
|
+
flagged = ret.json()["results"][0]["flagged"]
|
|
123
|
+
except requests.exceptions.RequestException:
|
|
124
|
+
flagged = False
|
|
125
|
+
except KeyError:
|
|
126
|
+
flagged = False
|
|
127
|
+
|
|
128
|
+
return flagged
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def pretty_print_semaphore(semaphore):
|
|
132
|
+
if semaphore is None:
|
|
133
|
+
return "None"
|
|
134
|
+
return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})"
|
xinference/types.py
CHANGED
|
@@ -91,11 +91,23 @@ class CompletionLogprobs(TypedDict):
|
|
|
91
91
|
top_logprobs: List[Optional[Dict[str, float]]]
|
|
92
92
|
|
|
93
93
|
|
|
94
|
+
class ToolCallFunction(TypedDict):
|
|
95
|
+
name: str
|
|
96
|
+
arguments: str
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ToolCalls(TypedDict):
|
|
100
|
+
id: str
|
|
101
|
+
type: Literal["function"]
|
|
102
|
+
function: ToolCallFunction
|
|
103
|
+
|
|
104
|
+
|
|
94
105
|
class CompletionChoice(TypedDict):
|
|
95
106
|
text: str
|
|
96
107
|
index: int
|
|
97
108
|
logprobs: Optional[CompletionLogprobs]
|
|
98
109
|
finish_reason: Optional[str]
|
|
110
|
+
tool_calls: NotRequired[List[ToolCalls]]
|
|
99
111
|
|
|
100
112
|
|
|
101
113
|
class CompletionUsage(TypedDict):
|
|
@@ -147,6 +159,7 @@ class ChatCompletion(TypedDict):
|
|
|
147
159
|
class ChatCompletionChunkDelta(TypedDict):
|
|
148
160
|
role: NotRequired[str]
|
|
149
161
|
content: NotRequired[str]
|
|
162
|
+
tool_calls: NotRequired[List[ToolCalls]]
|
|
150
163
|
|
|
151
164
|
|
|
152
165
|
class ChatCompletionChunkChoice(TypedDict):
|
|
@@ -232,6 +245,8 @@ class LlamaCppModelConfig(TypedDict, total=False):
|
|
|
232
245
|
n_ctx: int
|
|
233
246
|
n_parts: int
|
|
234
247
|
n_gpu_layers: int
|
|
248
|
+
split_mode: int
|
|
249
|
+
main_gpu: int
|
|
235
250
|
seed: int
|
|
236
251
|
f16_kv: bool
|
|
237
252
|
logits_all: bool
|
|
@@ -355,21 +370,6 @@ try:
|
|
|
355
370
|
except ImportError:
|
|
356
371
|
CreateCompletionLlamaCpp = create_model("CreateCompletionLlamaCpp")
|
|
357
372
|
|
|
358
|
-
CreateCompletionCTransformers: BaseModel
|
|
359
|
-
try:
|
|
360
|
-
from ctransformers.llm import LLM
|
|
361
|
-
|
|
362
|
-
CreateCompletionCTransformers = get_pydantic_model_from_method(
|
|
363
|
-
LLM.generate,
|
|
364
|
-
exclude_fields=["tokens"],
|
|
365
|
-
include_fields={
|
|
366
|
-
"max_tokens": (Optional[int], max_tokens_field),
|
|
367
|
-
"stream": (Optional[bool], stream_field),
|
|
368
|
-
},
|
|
369
|
-
)
|
|
370
|
-
except ImportError:
|
|
371
|
-
CreateCompletionCTransformers = create_model("CreateCompletionCTransformers")
|
|
372
|
-
|
|
373
373
|
|
|
374
374
|
# This type is for openai API compatibility
|
|
375
375
|
CreateCompletionOpenAI: BaseModel
|
|
@@ -415,7 +415,6 @@ class CreateCompletion(
|
|
|
415
415
|
ModelAndPrompt,
|
|
416
416
|
CreateCompletionTorch,
|
|
417
417
|
CreateCompletionLlamaCpp,
|
|
418
|
-
CreateCompletionCTransformers,
|
|
419
418
|
CreateCompletionOpenAI,
|
|
420
419
|
):
|
|
421
420
|
pass
|
|
@@ -428,8 +427,6 @@ class CreateChatModel(BaseModel):
|
|
|
428
427
|
# Currently, chat calls generates, so the params share the same one.
|
|
429
428
|
CreateChatCompletionTorch = CreateCompletionTorch
|
|
430
429
|
CreateChatCompletionLlamaCpp: BaseModel = CreateCompletionLlamaCpp
|
|
431
|
-
CreateChatCompletionCTransformers: BaseModel = CreateCompletionCTransformers
|
|
432
|
-
|
|
433
430
|
|
|
434
431
|
# This type is for openai API compatibility
|
|
435
432
|
CreateChatCompletionOpenAI: BaseModel
|
|
@@ -450,7 +447,6 @@ class CreateChatCompletion(
|
|
|
450
447
|
CreateChatModel,
|
|
451
448
|
CreateChatCompletionTorch,
|
|
452
449
|
CreateChatCompletionLlamaCpp,
|
|
453
|
-
CreateChatCompletionCTransformers,
|
|
454
450
|
CreateChatCompletionOpenAI,
|
|
455
451
|
):
|
|
456
452
|
pass
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.js": "./static/js/main.
|
|
3
|
+
"main.js": "./static/js/main.76ef2b17.js",
|
|
4
4
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
5
5
|
"index.html": "./index.html",
|
|
6
|
-
"main.
|
|
6
|
+
"main.76ef2b17.js.map": "./static/js/main.76ef2b17.js.map"
|
|
7
7
|
},
|
|
8
8
|
"entrypoints": [
|
|
9
|
-
"static/js/main.
|
|
9
|
+
"static/js/main.76ef2b17.js"
|
|
10
10
|
]
|
|
11
11
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.76ef2b17.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|