xinference 0.11.3__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +143 -6
- xinference/client/restful/restful_client.py +144 -5
- xinference/constants.py +5 -0
- xinference/core/cache_tracker.py +48 -28
- xinference/core/model.py +160 -19
- xinference/core/scheduler.py +446 -0
- xinference/core/supervisor.py +99 -24
- xinference/core/worker.py +68 -2
- xinference/deploy/cmdline.py +86 -2
- xinference/deploy/test/test_cmdline.py +19 -10
- xinference/isolation.py +9 -2
- xinference/model/audio/__init__.py +14 -1
- xinference/model/audio/chattts.py +84 -0
- xinference/model/audio/core.py +22 -4
- xinference/model/audio/custom.py +6 -4
- xinference/model/audio/model_spec.json +20 -0
- xinference/model/audio/model_spec_modelscope.json +20 -0
- xinference/model/llm/__init__.py +38 -2
- xinference/model/llm/llm_family.json +509 -1
- xinference/model/llm/llm_family.py +86 -1
- xinference/model/llm/llm_family_csghub.json +66 -0
- xinference/model/llm/llm_family_modelscope.json +411 -2
- xinference/model/llm/pytorch/chatglm.py +20 -13
- xinference/model/llm/pytorch/cogvlm2.py +76 -17
- xinference/model/llm/pytorch/core.py +141 -6
- xinference/model/llm/pytorch/glm4v.py +268 -0
- xinference/model/llm/pytorch/minicpmv25.py +232 -0
- xinference/model/llm/pytorch/qwen_vl.py +1 -1
- xinference/model/llm/pytorch/utils.py +405 -8
- xinference/model/llm/utils.py +14 -13
- xinference/model/llm/vllm/core.py +16 -4
- xinference/model/utils.py +8 -2
- xinference/thirdparty/ChatTTS/__init__.py +1 -0
- xinference/thirdparty/ChatTTS/core.py +200 -0
- xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/experimental/llm.py +40 -0
- xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/infer/api.py +125 -0
- xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/model/dvae.py +155 -0
- xinference/thirdparty/ChatTTS/model/gpt.py +265 -0
- xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/utils/gpu_utils.py +23 -0
- xinference/thirdparty/ChatTTS/utils/infer_utils.py +141 -0
- xinference/thirdparty/ChatTTS/utils/io_utils.py +14 -0
- xinference/types.py +3 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.074e2b31.css +2 -0
- xinference/web/ui/build/static/css/main.074e2b31.css.map +1 -0
- xinference/web/ui/build/static/js/main.a58ff436.js +3 -0
- xinference/web/ui/build/static/js/main.a58ff436.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/10262a281dec3bc2b185f4385ceb6846626f52d41cb4d46c7c649e719f979d4d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/762a75a62daf3bec2cfc97ec8612798493fb34ef87087dcad6aad64ab7f14345.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7f3bdb3a48fa00c046c8b185acd4da6f2e2940a20dbd77f9373d60de3fd6633e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f2f73bfdc13b12b02c8cbc4769b0b8e6367e9b6d8331c322d94318491a0b3653.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +1 -0
- {xinference-0.11.3.dist-info → xinference-0.12.1.dist-info}/METADATA +26 -9
- {xinference-0.11.3.dist-info → xinference-0.12.1.dist-info}/RECORD +65 -47
- xinference/web/ui/build/static/css/main.54bca460.css +0 -2
- xinference/web/ui/build/static/css/main.54bca460.css.map +0 -1
- xinference/web/ui/build/static/js/main.551aa479.js +0 -3
- xinference/web/ui/build/static/js/main.551aa479.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1e86938a0cdf706d21e99b21f5d868fa247c0c88b26807047e26dcdc4d9a9db3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1fa824d82b2af519de7700c594e50bde4bbca60d13bd3fabff576802e4070304.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/46b6dd1f6d1109cd0e2455a0ea0be3e9bda1097cd4ebec9c4040070372671cfc.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a6da6bc3d0d2191adebee87fb58ecebe82d071087bd2f7f3a9c7fdd2ada130f2.json +0 -1
- /xinference/web/ui/build/static/js/{main.551aa479.js.LICENSE.txt → main.a58ff436.js.LICENSE.txt} +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.1.dist-info}/LICENSE +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.1.dist-info}/WHEEL +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
import os
|
|
2
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
from einops import rearrange
|
|
7
|
+
from transformers.cache_utils import Cache
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
import torch.nn.functional as F
|
|
12
|
+
import torch.nn.utils.parametrize as P
|
|
13
|
+
from torch.nn.utils.parametrizations import weight_norm
|
|
14
|
+
from transformers import LlamaModel, LlamaConfig
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LlamaMLP(nn.Module):
|
|
18
|
+
def __init__(self, hidden_size, intermediate_size):
|
|
19
|
+
super().__init__()
|
|
20
|
+
self.hidden_size = hidden_size
|
|
21
|
+
self.intermediate_size = intermediate_size
|
|
22
|
+
self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
|
|
23
|
+
self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
|
|
24
|
+
self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
|
|
25
|
+
self.act_fn = F.silu
|
|
26
|
+
|
|
27
|
+
def forward(self, x):
|
|
28
|
+
down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
|
|
29
|
+
return down_proj
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class GPT_warpper(nn.Module):
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
gpt_config,
|
|
36
|
+
num_audio_tokens,
|
|
37
|
+
num_text_tokens,
|
|
38
|
+
num_vq=4,
|
|
39
|
+
**kwargs,
|
|
40
|
+
):
|
|
41
|
+
super().__init__()
|
|
42
|
+
|
|
43
|
+
self.logger = logging.getLogger(__name__)
|
|
44
|
+
self.gpt = self.build_model(gpt_config)
|
|
45
|
+
self.model_dim = self.gpt.config.hidden_size
|
|
46
|
+
|
|
47
|
+
self.num_vq = num_vq
|
|
48
|
+
self.emb_code = nn.ModuleList([nn.Embedding(num_audio_tokens, self.model_dim) for i in range(self.num_vq)])
|
|
49
|
+
self.emb_text = nn.Embedding(num_text_tokens, self.model_dim)
|
|
50
|
+
self.head_text = weight_norm(nn.Linear(self.model_dim, num_text_tokens, bias=False), name='weight')
|
|
51
|
+
self.head_code = nn.ModuleList([weight_norm(nn.Linear(self.model_dim, num_audio_tokens, bias=False), name='weight') for i in range(self.num_vq)])
|
|
52
|
+
|
|
53
|
+
def build_model(self, config):
|
|
54
|
+
|
|
55
|
+
configuration = LlamaConfig(**config)
|
|
56
|
+
model = LlamaModel(configuration)
|
|
57
|
+
del model.embed_tokens
|
|
58
|
+
|
|
59
|
+
return model
|
|
60
|
+
|
|
61
|
+
def get_emb(self, input_ids, text_mask, **kwargs):
|
|
62
|
+
|
|
63
|
+
emb_text = self.emb_text(input_ids[text_mask][:, 0])
|
|
64
|
+
|
|
65
|
+
emb_code = [self.emb_code[i](input_ids[~text_mask][:, i]) for i in range(self.num_vq)]
|
|
66
|
+
emb_code = torch.stack(emb_code, 2).sum(2)
|
|
67
|
+
|
|
68
|
+
emb = torch.zeros((input_ids.shape[:-1])+(emb_text.shape[-1],), device=emb_text.device, dtype=emb_text.dtype)
|
|
69
|
+
emb[text_mask] = emb_text
|
|
70
|
+
emb[~text_mask] = emb_code.to(emb.dtype)
|
|
71
|
+
|
|
72
|
+
return emb
|
|
73
|
+
|
|
74
|
+
def prepare_inputs_for_generation(
|
|
75
|
+
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, cache_position=None, **kwargs
|
|
76
|
+
):
|
|
77
|
+
# With static cache, the `past_key_values` is None
|
|
78
|
+
# TODO joao: standardize interface for the different Cache classes and remove of this if
|
|
79
|
+
has_static_cache = False
|
|
80
|
+
if past_key_values is None:
|
|
81
|
+
past_key_values = getattr(self.gpt.layers[0].self_attn, "past_key_value", None)
|
|
82
|
+
has_static_cache = past_key_values is not None
|
|
83
|
+
|
|
84
|
+
past_length = 0
|
|
85
|
+
if past_key_values is not None:
|
|
86
|
+
if isinstance(past_key_values, Cache):
|
|
87
|
+
past_length = cache_position[0] if cache_position is not None else past_key_values.get_seq_length()
|
|
88
|
+
max_cache_length = (
|
|
89
|
+
torch.tensor(past_key_values.get_max_length(), device=input_ids.device)
|
|
90
|
+
if past_key_values.get_max_length() is not None
|
|
91
|
+
else None
|
|
92
|
+
)
|
|
93
|
+
cache_length = past_length if max_cache_length is None else torch.min(max_cache_length, past_length)
|
|
94
|
+
# TODO joao: remove this `else` after `generate` prioritizes `Cache` objects
|
|
95
|
+
else:
|
|
96
|
+
cache_length = past_length = past_key_values[0][0].shape[2]
|
|
97
|
+
max_cache_length = None
|
|
98
|
+
|
|
99
|
+
# Keep only the unprocessed tokens:
|
|
100
|
+
# 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
|
|
101
|
+
# some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
|
|
102
|
+
# input)
|
|
103
|
+
if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
|
|
104
|
+
input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
|
|
105
|
+
# 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
|
|
106
|
+
# input_ids based on the past_length.
|
|
107
|
+
elif past_length < input_ids.shape[1]:
|
|
108
|
+
input_ids = input_ids[:, past_length:]
|
|
109
|
+
# 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
|
|
110
|
+
|
|
111
|
+
# If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
|
|
112
|
+
if (
|
|
113
|
+
max_cache_length is not None
|
|
114
|
+
and attention_mask is not None
|
|
115
|
+
and cache_length + input_ids.shape[1] > max_cache_length
|
|
116
|
+
):
|
|
117
|
+
attention_mask = attention_mask[:, -max_cache_length:]
|
|
118
|
+
|
|
119
|
+
position_ids = kwargs.get("position_ids", None)
|
|
120
|
+
if attention_mask is not None and position_ids is None:
|
|
121
|
+
# create position_ids on the fly for batch generation
|
|
122
|
+
position_ids = attention_mask.long().cumsum(-1) - 1
|
|
123
|
+
position_ids.masked_fill_(attention_mask == 0, 1)
|
|
124
|
+
if past_key_values:
|
|
125
|
+
position_ids = position_ids[:, -input_ids.shape[1] :]
|
|
126
|
+
|
|
127
|
+
# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
|
|
128
|
+
if inputs_embeds is not None and past_key_values is None:
|
|
129
|
+
model_inputs = {"inputs_embeds": inputs_embeds}
|
|
130
|
+
else:
|
|
131
|
+
# The `contiguous()` here is necessary to have a static stride during decoding. torchdynamo otherwise
|
|
132
|
+
# recompiles graphs as the stride of the inputs is a guard. Ref: https://github.com/huggingface/transformers/pull/29114
|
|
133
|
+
# TODO: use `next_tokens` directly instead.
|
|
134
|
+
model_inputs = {"input_ids": input_ids.contiguous()}
|
|
135
|
+
|
|
136
|
+
input_length = position_ids.shape[-1] if position_ids is not None else input_ids.shape[-1]
|
|
137
|
+
if cache_position is None:
|
|
138
|
+
cache_position = torch.arange(past_length, past_length + input_length, device=input_ids.device)
|
|
139
|
+
else:
|
|
140
|
+
cache_position = cache_position[-input_length:]
|
|
141
|
+
|
|
142
|
+
if has_static_cache:
|
|
143
|
+
past_key_values = None
|
|
144
|
+
|
|
145
|
+
model_inputs.update(
|
|
146
|
+
{
|
|
147
|
+
"position_ids": position_ids,
|
|
148
|
+
"cache_position": cache_position,
|
|
149
|
+
"past_key_values": past_key_values,
|
|
150
|
+
"use_cache": kwargs.get("use_cache"),
|
|
151
|
+
"attention_mask": attention_mask,
|
|
152
|
+
}
|
|
153
|
+
)
|
|
154
|
+
return model_inputs
|
|
155
|
+
|
|
156
|
+
def generate(
|
|
157
|
+
self,
|
|
158
|
+
emb,
|
|
159
|
+
inputs_ids,
|
|
160
|
+
temperature,
|
|
161
|
+
eos_token,
|
|
162
|
+
attention_mask = None,
|
|
163
|
+
max_new_token = 2048,
|
|
164
|
+
min_new_token = 0,
|
|
165
|
+
LogitsWarpers = [],
|
|
166
|
+
LogitsProcessors = [],
|
|
167
|
+
infer_text=False,
|
|
168
|
+
return_attn=False,
|
|
169
|
+
return_hidden=False,
|
|
170
|
+
):
|
|
171
|
+
|
|
172
|
+
with torch.no_grad():
|
|
173
|
+
|
|
174
|
+
attentions = []
|
|
175
|
+
hiddens = []
|
|
176
|
+
|
|
177
|
+
start_idx, end_idx = inputs_ids.shape[1], torch.zeros(inputs_ids.shape[0], device=inputs_ids.device, dtype=torch.long)
|
|
178
|
+
finish = torch.zeros(inputs_ids.shape[0], device=inputs_ids.device).bool()
|
|
179
|
+
|
|
180
|
+
temperature = temperature[None].expand(inputs_ids.shape[0], -1)
|
|
181
|
+
temperature = rearrange(temperature, "b n -> (b n) 1")
|
|
182
|
+
|
|
183
|
+
attention_mask_cache = torch.ones((inputs_ids.shape[0], inputs_ids.shape[1]+max_new_token,), dtype=torch.bool, device=inputs_ids.device)
|
|
184
|
+
if attention_mask is not None:
|
|
185
|
+
attention_mask_cache[:, :attention_mask.shape[1]] = attention_mask
|
|
186
|
+
|
|
187
|
+
for i in tqdm(range(max_new_token)):
|
|
188
|
+
|
|
189
|
+
model_input = self.prepare_inputs_for_generation(inputs_ids,
|
|
190
|
+
outputs.past_key_values if i!=0 else None,
|
|
191
|
+
attention_mask_cache[:, :inputs_ids.shape[1]], use_cache=True)
|
|
192
|
+
|
|
193
|
+
if i == 0:
|
|
194
|
+
model_input['inputs_embeds'] = emb
|
|
195
|
+
else:
|
|
196
|
+
if infer_text:
|
|
197
|
+
model_input['inputs_embeds'] = self.emb_text(model_input['input_ids'][:,:,0])
|
|
198
|
+
else:
|
|
199
|
+
code_emb = [self.emb_code[i](model_input['input_ids'][:,:,i]) for i in range(self.num_vq)]
|
|
200
|
+
model_input['inputs_embeds'] = torch.stack(code_emb, 3).sum(3)
|
|
201
|
+
|
|
202
|
+
model_input['input_ids'] = None
|
|
203
|
+
outputs = self.gpt.forward(**model_input, output_attentions=return_attn)
|
|
204
|
+
attentions.append(outputs.attentions)
|
|
205
|
+
hidden_states = outputs[0] # 🐻
|
|
206
|
+
if return_hidden:
|
|
207
|
+
hiddens.append(hidden_states[:, -1])
|
|
208
|
+
|
|
209
|
+
with P.cached():
|
|
210
|
+
if infer_text:
|
|
211
|
+
logits = self.head_text(hidden_states)
|
|
212
|
+
else:
|
|
213
|
+
logits = torch.stack([self.head_code[i](hidden_states) for i in range(self.num_vq)], 3)
|
|
214
|
+
|
|
215
|
+
logits = logits[:, -1].float()
|
|
216
|
+
|
|
217
|
+
if not infer_text:
|
|
218
|
+
logits = rearrange(logits, "b c n -> (b n) c")
|
|
219
|
+
logits_token = rearrange(inputs_ids[:, start_idx:], "b c n -> (b n) c")
|
|
220
|
+
else:
|
|
221
|
+
logits_token = inputs_ids[:, start_idx:, 0]
|
|
222
|
+
|
|
223
|
+
logits = logits / temperature
|
|
224
|
+
|
|
225
|
+
for logitsProcessors in LogitsProcessors:
|
|
226
|
+
logits = logitsProcessors(logits_token, logits)
|
|
227
|
+
|
|
228
|
+
for logitsWarpers in LogitsWarpers:
|
|
229
|
+
logits = logitsWarpers(logits_token, logits)
|
|
230
|
+
|
|
231
|
+
if i < min_new_token:
|
|
232
|
+
logits[:, eos_token] = -torch.inf
|
|
233
|
+
|
|
234
|
+
scores = F.softmax(logits, dim=-1)
|
|
235
|
+
|
|
236
|
+
idx_next = torch.multinomial(scores, num_samples=1)
|
|
237
|
+
|
|
238
|
+
if not infer_text:
|
|
239
|
+
idx_next = rearrange(idx_next, "(b n) 1 -> b n", n=self.num_vq)
|
|
240
|
+
finish = finish | (idx_next == eos_token).any(1)
|
|
241
|
+
inputs_ids = torch.cat([inputs_ids, idx_next.unsqueeze(1)], 1)
|
|
242
|
+
else:
|
|
243
|
+
finish = finish | (idx_next == eos_token).any(1)
|
|
244
|
+
inputs_ids = torch.cat([inputs_ids, idx_next.unsqueeze(-1).expand(-1, -1, self.num_vq)], 1)
|
|
245
|
+
|
|
246
|
+
end_idx = end_idx + (~finish).int()
|
|
247
|
+
|
|
248
|
+
if finish.all():
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
inputs_ids = [inputs_ids[idx, start_idx: start_idx+i] for idx, i in enumerate(end_idx.int())]
|
|
252
|
+
inputs_ids = [i[:, 0] for i in inputs_ids] if infer_text else inputs_ids
|
|
253
|
+
|
|
254
|
+
if return_hidden:
|
|
255
|
+
hiddens = torch.stack(hiddens, 1)
|
|
256
|
+
hiddens = [hiddens[idx, :i] for idx, i in enumerate(end_idx.int())]
|
|
257
|
+
|
|
258
|
+
if not finish.all():
|
|
259
|
+
self.logger.warn(f'Incomplete result. hit max_new_token: {max_new_token}')
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
'ids': inputs_ids,
|
|
263
|
+
'attentions': attentions,
|
|
264
|
+
'hiddens':hiddens,
|
|
265
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
|
|
2
|
+
import torch
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
def select_device(min_memory = 2048):
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
if torch.cuda.is_available():
|
|
8
|
+
available_gpus = []
|
|
9
|
+
for i in range(torch.cuda.device_count()):
|
|
10
|
+
props = torch.cuda.get_device_properties(i)
|
|
11
|
+
free_memory = props.total_memory - torch.cuda.memory_reserved(i)
|
|
12
|
+
available_gpus.append((i, free_memory))
|
|
13
|
+
selected_gpu, max_free_memory = max(available_gpus, key=lambda x: x[1])
|
|
14
|
+
device = torch.device(f'cuda:{selected_gpu}')
|
|
15
|
+
free_memory_mb = max_free_memory / (1024 * 1024)
|
|
16
|
+
if free_memory_mb < min_memory:
|
|
17
|
+
logger.log(logging.WARNING, f'GPU {selected_gpu} has {round(free_memory_mb, 2)} MB memory left.')
|
|
18
|
+
device = torch.device('cpu')
|
|
19
|
+
else:
|
|
20
|
+
logger.log(logging.WARNING, f'No GPU found, use CPU instead')
|
|
21
|
+
device = torch.device('cpu')
|
|
22
|
+
|
|
23
|
+
return device
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
|
|
2
|
+
import re
|
|
3
|
+
import torch
|
|
4
|
+
import torch.nn.functional as F
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CustomRepetitionPenaltyLogitsProcessorRepeat():
|
|
8
|
+
|
|
9
|
+
def __init__(self, penalty: float, max_input_ids, past_window):
|
|
10
|
+
if not isinstance(penalty, float) or not (penalty > 0):
|
|
11
|
+
raise ValueError(f"`penalty` has to be a strictly positive float, but is {penalty}")
|
|
12
|
+
|
|
13
|
+
self.penalty = penalty
|
|
14
|
+
self.max_input_ids = max_input_ids
|
|
15
|
+
self.past_window = past_window
|
|
16
|
+
|
|
17
|
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
|
18
|
+
|
|
19
|
+
input_ids = input_ids[:, -self.past_window:]
|
|
20
|
+
freq = F.one_hot(input_ids, scores.size(1)).sum(1)
|
|
21
|
+
freq[self.max_input_ids:] = 0
|
|
22
|
+
alpha = self.penalty**freq
|
|
23
|
+
scores = torch.where(scores < 0, scores*alpha, scores/alpha)
|
|
24
|
+
|
|
25
|
+
return scores
|
|
26
|
+
|
|
27
|
+
class CustomRepetitionPenaltyLogitsProcessor():
|
|
28
|
+
|
|
29
|
+
def __init__(self, penalty: float, max_input_ids, past_window):
|
|
30
|
+
if not isinstance(penalty, float) or not (penalty > 0):
|
|
31
|
+
raise ValueError(f"`penalty` has to be a strictly positive float, but is {penalty}")
|
|
32
|
+
|
|
33
|
+
self.penalty = penalty
|
|
34
|
+
self.max_input_ids = max_input_ids
|
|
35
|
+
self.past_window = past_window
|
|
36
|
+
|
|
37
|
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
|
38
|
+
|
|
39
|
+
input_ids = input_ids[:, -self.past_window:]
|
|
40
|
+
score = torch.gather(scores, 1, input_ids)
|
|
41
|
+
_score = score.detach().clone()
|
|
42
|
+
score = torch.where(score < 0, score * self.penalty, score / self.penalty)
|
|
43
|
+
score[input_ids>=self.max_input_ids] = _score[input_ids>=self.max_input_ids]
|
|
44
|
+
scores.scatter_(1, input_ids, score)
|
|
45
|
+
|
|
46
|
+
return scores
|
|
47
|
+
|
|
48
|
+
def count_invalid_characters(s):
|
|
49
|
+
|
|
50
|
+
s = re.sub(r'\[uv_break\]|\[laugh\]|\[lbreak\]', '', s)
|
|
51
|
+
pattern = re.compile(r'[^\u4e00-\u9fffA-Za-z,。、,\. ]')
|
|
52
|
+
non_alphabetic_chinese_chars = pattern.findall(s)
|
|
53
|
+
return set(non_alphabetic_chinese_chars)
|
|
54
|
+
|
|
55
|
+
def detect_language(sentence):
|
|
56
|
+
|
|
57
|
+
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]')
|
|
58
|
+
english_word_pattern = re.compile(r'\b[A-Za-z]+\b')
|
|
59
|
+
|
|
60
|
+
chinese_chars = chinese_char_pattern.findall(sentence)
|
|
61
|
+
english_words = english_word_pattern.findall(sentence)
|
|
62
|
+
|
|
63
|
+
if len(chinese_chars) > len(english_words):
|
|
64
|
+
return "zh"
|
|
65
|
+
else:
|
|
66
|
+
return "en"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
character_map = {
|
|
70
|
+
':': ',',
|
|
71
|
+
';': ',',
|
|
72
|
+
'!': '。',
|
|
73
|
+
'(': ',',
|
|
74
|
+
')': ',',
|
|
75
|
+
'【': ',',
|
|
76
|
+
'】': ',',
|
|
77
|
+
'『': ',',
|
|
78
|
+
'』': ',',
|
|
79
|
+
'「': ',',
|
|
80
|
+
'」': ',',
|
|
81
|
+
'《': ',',
|
|
82
|
+
'》': ',',
|
|
83
|
+
'-': ',',
|
|
84
|
+
'‘': '',
|
|
85
|
+
'“': '',
|
|
86
|
+
'’': '',
|
|
87
|
+
'”': '',
|
|
88
|
+
':': ',',
|
|
89
|
+
';': ',',
|
|
90
|
+
'!': '.',
|
|
91
|
+
'(': ',',
|
|
92
|
+
')': ',',
|
|
93
|
+
'[': ',',
|
|
94
|
+
']': ',',
|
|
95
|
+
'>': ',',
|
|
96
|
+
'<': ',',
|
|
97
|
+
'-': ',',
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
halfwidth_2_fullwidth_map = {
|
|
101
|
+
'!': '!',
|
|
102
|
+
'"': '“',
|
|
103
|
+
"'": '‘',
|
|
104
|
+
'#': '#',
|
|
105
|
+
'$': '$',
|
|
106
|
+
'%': '%',
|
|
107
|
+
'&': '&',
|
|
108
|
+
'(': '(',
|
|
109
|
+
')': ')',
|
|
110
|
+
',': ',',
|
|
111
|
+
'-': '-',
|
|
112
|
+
'*': '*',
|
|
113
|
+
'+': '+',
|
|
114
|
+
'.': '。',
|
|
115
|
+
'/': '/',
|
|
116
|
+
':': ':',
|
|
117
|
+
';': ';',
|
|
118
|
+
'<': '<',
|
|
119
|
+
'=': '=',
|
|
120
|
+
'>': '>',
|
|
121
|
+
'?': '?',
|
|
122
|
+
'@': '@',
|
|
123
|
+
# '[': '[',
|
|
124
|
+
'\\': '\',
|
|
125
|
+
# ']': ']',
|
|
126
|
+
'^': '^',
|
|
127
|
+
# '_': '_',
|
|
128
|
+
'`': '`',
|
|
129
|
+
'{': '{',
|
|
130
|
+
'|': '|',
|
|
131
|
+
'}': '}',
|
|
132
|
+
'~': '~'
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
def apply_half2full_map(text):
|
|
136
|
+
translation_table = str.maketrans(halfwidth_2_fullwidth_map)
|
|
137
|
+
return text.translate(translation_table)
|
|
138
|
+
|
|
139
|
+
def apply_character_map(text):
|
|
140
|
+
translation_table = str.maketrans(character_map)
|
|
141
|
+
return text.translate(translation_table)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
def get_latest_modified_file(directory):
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
files = [os.path.join(directory, f) for f in os.listdir(directory)]
|
|
9
|
+
if not files:
|
|
10
|
+
logger.log(logging.WARNING, f'No files found in the directory: {directory}')
|
|
11
|
+
return None
|
|
12
|
+
latest_file = max(files, key=os.path.getmtime)
|
|
13
|
+
|
|
14
|
+
return latest_file
|
xinference/types.py
CHANGED
|
@@ -284,6 +284,7 @@ class PytorchGenerateConfig(TypedDict, total=False):
|
|
|
284
284
|
tools: Optional[List[Dict]]
|
|
285
285
|
lora_name: Optional[str]
|
|
286
286
|
stream_options: Optional[Union[dict, None]]
|
|
287
|
+
request_id: Optional[str]
|
|
287
288
|
|
|
288
289
|
|
|
289
290
|
class PytorchModelConfig(TypedDict, total=False):
|
|
@@ -297,6 +298,7 @@ class PytorchModelConfig(TypedDict, total=False):
|
|
|
297
298
|
gptq_groupsize: int
|
|
298
299
|
gptq_act_order: bool
|
|
299
300
|
trust_remote_code: bool
|
|
301
|
+
max_num_seqs: int
|
|
300
302
|
|
|
301
303
|
|
|
302
304
|
def get_pydantic_model_from_method(
|
|
@@ -361,6 +363,7 @@ class CreateCompletionTorch(BaseModel):
|
|
|
361
363
|
top_p: float = top_p_field
|
|
362
364
|
top_k: int = top_k_field
|
|
363
365
|
lora_name: Optional[str]
|
|
366
|
+
request_id: Optional[str]
|
|
364
367
|
|
|
365
368
|
|
|
366
369
|
CreateCompletionLlamaCpp: BaseModel
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.css": "./static/css/main.
|
|
4
|
-
"main.js": "./static/js/main.
|
|
3
|
+
"main.css": "./static/css/main.074e2b31.css",
|
|
4
|
+
"main.js": "./static/js/main.a58ff436.js",
|
|
5
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
6
6
|
"index.html": "./index.html",
|
|
7
|
-
"main.
|
|
8
|
-
"main.
|
|
7
|
+
"main.074e2b31.css.map": "./static/css/main.074e2b31.css.map",
|
|
8
|
+
"main.a58ff436.js.map": "./static/js/main.a58ff436.js.map"
|
|
9
9
|
},
|
|
10
10
|
"entrypoints": [
|
|
11
|
-
"static/css/main.
|
|
12
|
-
"static/js/main.
|
|
11
|
+
"static/css/main.074e2b31.css",
|
|
12
|
+
"static/js/main.a58ff436.js"
|
|
13
13
|
]
|
|
14
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.a58ff436.js"></script><link href="./static/css/main.074e2b31.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
.container{cursor:pointer;display:block}.container,.descriptionCard{border-radius:20px;height:300px;position:relative;width:300px}.descriptionCard{left:-1px;padding:20px;top:-1px}.drawerCard{min-height:100%;padding:20px 80px 0;position:relative;width:60vw}.p{-webkit-line-clamp:4;-webkit-box-orient:vertical;display:-webkit-box;font-size:14px;overflow:hidden;padding:0 10px;text-overflow:ellipsis;word-break:break-word}.formContainer{height:80%;overflow:scroll;padding:0 10px}.buttonsContainer{align-items:center;bottom:50px;display:flex;justify-content:space-between;left:100px;position:absolute;right:100px}.buttonContainer{background-color:initial;border-width:0;width:45%}.buttonItem{border:1px solid #e5e7eb;border-radius:4px;padding:5px;width:100%}.instructionText{color:#666;font-size:12px;font-style:italic;margin:30px 0;text-align:center}.iconRow{bottom:20px;justify-content:space-between;left:20px;position:absolute;right:20px}.iconItem,.iconRow{align-items:center;display:flex}.iconItem{flex-direction:column;margin:20px}.boldIconText{font-size:1.2em;font-weight:700}.muiIcon{font-size:1.5em}.smallText{font-size:.8em}.dialogBox{background-color:#fff;height:607px;margin:32px;overflow-x:scroll;width:1241px}.dialogTitle{color:#000;display:flex;justify-content:space-between;padding:20px 20px 7px}.dialogTitle-model_name{font-size:18px;font-weight:700}.pathBox{cursor:pointer;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;width:160px}.pathBox2{width:300px}.copyPath{color:#555;cursor:pointer;font-size:14px!important}.empty{color:#555;font-size:20px;left:50%;position:absolute;top:30%;-webkit-transform:translate(-50%);transform:translate(-50%)}.deleteDialog{align-items:center;display:flex}.warningIcon{color:#ed6c02;margin-right:10px}.formBox{max-height:80vh;max-width:50vw;min-width:50vw;overflow:auto;padding:40px 20px 0 0;position:relative;transition:all .4s ease-in-out}.broaden{max-width:100%;min-width:100%;padding-right:0}.show-json{align-items:center;color:#444;display:flex;position:fixed;right:60px;top:90px}.icon{cursor:pointer;margin-left:20px;position:absolute;right:-40px}.icon:hover{color:#1976d2}.arrow{font-size:24px!important}.jsonBox{min-height:80vh;position:relative;transition:all .4s ease-in-out;width:100%}.hide{overflow:hidden;-webkit-transform:translate(30vw);transform:translate(30vw);width:0}.jsonBox-header{font-weight:700;line-height:40px}.textarea{border:1px solid #ddd;border-radius:5px;color:#444;height:calc(100% - 40px);padding:5px 10px;resize:none;width:100%}.copyIcon{color:#555;cursor:pointer;font-size:16px!important;position:absolute;right:5px;top:13px}.copyIcon:hover{color:#1976d2}.addBtn{margin-left:20px!important}.item{background-color:#eee;border-radius:10px;margin:10px 50px 0;overflow:hidden;padding:20px;position:relative}.item:hover .deleteBtn{-webkit-transform:translateX(-50px);transform:translateX(-50px)}.deleteBtn{background-color:#1976d2;border-radius:25px;height:50px;line-height:70px;position:absolute;right:20px;text-align:center;top:calc(50% - 25px);-webkit-transform:translateX(80px);transform:translateX(80px);transition:all .3s ease-in-out;width:50px}.deleteBtn:hover{box-shadow:0 0 10px #aaa;cursor:pointer}.deleteIcon{color:#fff;font-size:28px!important}
|
|
2
|
+
/*# sourceMappingURL=main.074e2b31.css.map*/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"static/css/main.074e2b31.css","mappings":"AAAA,WAKE,cAAe,CAJf,aAMF,CACA,4BAFE,kBAAmB,CAFnB,YAAa,CAFb,iBAAkB,CAClB,WAaF,CARA,iBAGE,SAAU,CAGV,YAAa,CAJb,QAMF,CACA,YAGE,eAAgB,CADhB,mBAAoB,CADpB,iBAAkB,CAGlB,UACF,CACA,GAEE,oBAAqB,CACrB,2BAA4B,CAF5B,mBAAoB,CAMpB,cAAe,CAHf,eAAgB,CAIhB,cAAiB,CAHjB,sBAAuB,CACvB,qBAGF,CACA,eACE,UAAW,CACX,eAAgB,CAChB,cACF,CACA,kBAOE,kBAAmB,CALnB,WAAY,CAGZ,YAAa,CACb,6BAA8B,CAH9B,UAAW,CAFX,iBAAkB,CAGlB,WAIF,CACA,iBAGE,wBAA6B,CAD7B,cAAiB,CADjB,SAGF,CACA,YAME,wBAAqB,CAHrB,iBAAkB,CADlB,WAAY,CADZ,UAMF,CACA,iBAEE,UAAc,CADd,cAAe,CAEf,iBAAkB,CAClB,aAAc,CACd,iBACF,CACA,SAEE,WAAY,CAIZ,6BAA8B,CAH9B,SAAU,CAFV,iBAAkB,CAGlB,UAIF,CACA,mBAFE,kBAAmB,CAFnB,YASF,CALA,UAEE,qBAAsB,CAEtB,WACF,CACA,cAEE,eAAgB,CADhB,eAEF,CACA,SACE,eACF,CACA,WACE,cACF,CACA,WAGE,qBAAsB,CADtB,YAAa,CAEb,WAAY,CACZ,iBAAkB,CAJlB,YAKF,CACA,aAIE,UAAW,CAHX,YAAa,CACb,6BAA8B,CAC9B,qBAEF,CACA,wBACE,cAAe,CACf,eACF,CACA,SAEE,cAAe,CACf,eAAgB,CAEhB,sBAAuB,CADvB,kBAAmB,CAHnB,WAKF,CACA,UACE,WACF,CACA,UAGE,UAAW,CADX,cAAe,CADf,wBAGF,CACA,OAKE,UAAW,CADX,cAAe,CAFf,QAAS,CADT,iBAAkB,CAElB,OAAQ,CAGR,iCAA6B,CAA7B,yBACF,CACA,cAEE,kBAAmB,CADnB,YAEF,CAEA,aAEE,aAAuB,CADvB,iBAEF,CC5IA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CAInB,UAAW,CALX,YAAa,CAEb,cAAe,CAEf,UAAW,CADX,QAGF,CAEA,MAGE,cAAe,CACf,gBAAiB,CAHjB,iBAAkB,CAClB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,gBAEE,eAAgB,CADhB,gBAEF,CAEA,UAIE,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAOF,CAEA,UAME,UAAW,CALX,cAAe,CAIf,wBAA0B,CAH1B,iBAAkB,CAElB,SAAU,CADV,QAIF,CAEA,gBACE,aACF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF","sources":["scenes/launch_model/styles/modelCardStyle.css","scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".container {\n display: block;\n position: relative;\n width: 300px;\n height: 300px;\n cursor: pointer;\n border-radius: 20px;\n}\n.descriptionCard {\n position: relative;\n top: -1px;\n left: -1px;\n width: 300px;\n height: 300px;\n padding: 20px;\n border-radius: 20px;\n}\n.drawerCard {\n position: relative;\n padding: 20px 80px 0;\n min-height: 100%;\n width: 60vw;\n}\n.p {\n display: -webkit-box;\n -webkit-line-clamp: 4;\n -webkit-box-orient: vertical;\n overflow: hidden;\n text-overflow: ellipsis;\n word-break: break-word;\n font-size: 14px;\n padding: 0px 10px;\n}\n.formContainer {\n height: 80%;\n overflow: scroll;\n padding: 0 10px;\n}\n.buttonsContainer {\n position: absolute;\n bottom: 50px;\n left: 100px;\n right: 100px;\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n.buttonContainer {\n width: 45%;\n border-width: 0px;\n background-color: transparent;\n}\n.buttonItem {\n width: 100%;\n padding: 5px;\n border-radius: 4px;\n border: 1px solid #e5e7eb;\n border-width: 1px;\n border-color: #e5e7eb;\n}\n.instructionText {\n font-size: 12px;\n color: #666666;\n font-style: italic;\n margin: 30px 0;\n text-align: center;\n}\n.iconRow {\n position: absolute;\n bottom: 20px;\n left: 20px;\n right: 20px;\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n.iconItem {\n display: flex;\n flex-direction: column;\n align-items: center;\n margin: 20px;\n}\n.boldIconText {\n font-weight: bold;\n font-size: 1.2em;\n}\n.muiIcon {\n font-size: 1.5em;\n}\n.smallText {\n font-size: 0.8em;\n}\n.dialogBox {\n width: 1241px;\n height: 607px;\n background-color: #fff;\n margin: 32px;\n overflow-x: scroll;\n}\n.dialogTitle {\n display: flex;\n justify-content: space-between;\n padding: 20px 20px 7px;\n color: #000;\n}\n.dialogTitle-model_name {\n font-size: 18px;\n font-weight: 700;\n}\n.pathBox {\n width: 160px;\n cursor: pointer;\n overflow: hidden;\n white-space: nowrap;\n text-overflow: ellipsis;\n}\n.pathBox2 {\n width: 300px;\n}\n.copyPath {\n font-size: 14px !important;\n cursor: pointer;\n color: #555;\n}\n.empty {\n position: absolute;\n left: 50%;\n top: 30%;\n font-size: 20px;\n color: #555;\n transform: translate(-50%, 0);\n}\n.deleteDialog {\n display: flex;\n align-items: center;\n}\n\n.warningIcon {\n margin-right: 10px;\n color: rgb(237, 108, 2);\n}\n",".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: fixed;\n top: 90px;\n right: 60px;\n color: #444;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.jsonBox-header {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #444;\n}\n\n.copyIcon {\n cursor: pointer;\n position: absolute;\n top: 13px;\n right: 5px;\n font-size: 16px !important;\n color: #555;\n}\n\n.copyIcon:hover {\n color: #1976d2;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n background-color: #eee;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n"],"names":[],"sourceRoot":""}
|