xinference 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +79 -2
- xinference/client/restful/restful_client.py +65 -3
- xinference/conftest.py +0 -7
- xinference/core/media_interface.py +132 -8
- xinference/core/model.py +44 -6
- xinference/core/scheduler.py +1 -10
- xinference/core/supervisor.py +8 -17
- xinference/core/worker.py +5 -27
- xinference/deploy/cmdline.py +6 -2
- xinference/model/audio/chattts.py +24 -39
- xinference/model/audio/cosyvoice.py +18 -30
- xinference/model/audio/funasr.py +42 -0
- xinference/model/audio/model_spec.json +71 -1
- xinference/model/audio/model_spec_modelscope.json +76 -2
- xinference/model/audio/utils.py +75 -0
- xinference/model/core.py +1 -0
- xinference/model/embedding/__init__.py +74 -18
- xinference/model/embedding/core.py +98 -589
- xinference/model/embedding/embed_family.py +133 -0
- xinference/{thirdparty/omnilmm/train → model/embedding/flag}/__init__.py +1 -1
- xinference/model/embedding/flag/core.py +282 -0
- xinference/model/embedding/model_spec.json +24 -0
- xinference/model/embedding/model_spec_modelscope.json +24 -0
- xinference/model/embedding/sentence_transformers/__init__.py +13 -0
- xinference/model/embedding/sentence_transformers/core.py +399 -0
- xinference/model/embedding/vllm/core.py +95 -0
- xinference/model/image/model_spec.json +30 -3
- xinference/model/image/model_spec_modelscope.json +41 -2
- xinference/model/image/stable_diffusion/core.py +144 -53
- xinference/model/llm/__init__.py +6 -54
- xinference/model/llm/core.py +19 -5
- xinference/model/llm/llama_cpp/core.py +59 -3
- xinference/model/llm/llama_cpp/memory.py +457 -0
- xinference/model/llm/llm_family.json +247 -402
- xinference/model/llm/llm_family.py +88 -16
- xinference/model/llm/llm_family_modelscope.json +260 -421
- xinference/model/llm/llm_family_openmind_hub.json +0 -34
- xinference/model/llm/sglang/core.py +8 -0
- xinference/model/llm/transformers/__init__.py +27 -6
- xinference/model/llm/transformers/chatglm.py +4 -2
- xinference/model/llm/transformers/core.py +49 -28
- xinference/model/llm/transformers/deepseek_v2.py +6 -49
- xinference/model/llm/transformers/gemma3.py +119 -164
- xinference/model/llm/transformers/multimodal/__init__.py +13 -0
- xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
- xinference/model/llm/transformers/multimodal/core.py +205 -0
- xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
- xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
- xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
- xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
- xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
- xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
- xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
- xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
- xinference/model/llm/transformers/opt.py +4 -2
- xinference/model/llm/transformers/utils.py +6 -37
- xinference/model/llm/utils.py +11 -0
- xinference/model/llm/vllm/core.py +7 -0
- xinference/model/rerank/core.py +91 -3
- xinference/model/rerank/model_spec.json +24 -0
- xinference/model/rerank/model_spec_modelscope.json +24 -0
- xinference/model/rerank/utils.py +20 -2
- xinference/model/utils.py +38 -1
- xinference/model/video/diffusers.py +65 -3
- xinference/model/video/model_spec.json +31 -4
- xinference/model/video/model_spec_modelscope.json +32 -4
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.013f296b.css +2 -0
- xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
- xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
- xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
- xinference/web/ui/src/locales/en.json +21 -8
- xinference/web/ui/src/locales/ja.json +224 -0
- xinference/web/ui/src/locales/ko.json +224 -0
- xinference/web/ui/src/locales/zh.json +21 -8
- {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/METADATA +14 -11
- {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/RECORD +93 -100
- {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/cogvlm2.py +0 -442
- xinference/model/llm/transformers/cogvlm2_video.py +0 -333
- xinference/model/llm/transformers/deepseek_vl.py +0 -280
- xinference/model/llm/transformers/glm_edge_v.py +0 -213
- xinference/model/llm/transformers/intern_vl.py +0 -526
- xinference/model/llm/transformers/internlm2.py +0 -94
- xinference/model/llm/transformers/minicpmv25.py +0 -193
- xinference/model/llm/transformers/omnilmm.py +0 -132
- xinference/model/llm/transformers/qwen2_audio.py +0 -179
- xinference/model/llm/transformers/qwen_vl.py +0 -360
- xinference/thirdparty/omnilmm/LICENSE +0 -201
- xinference/thirdparty/omnilmm/chat.py +0 -218
- xinference/thirdparty/omnilmm/constants.py +0 -4
- xinference/thirdparty/omnilmm/conversation.py +0 -332
- xinference/thirdparty/omnilmm/model/__init__.py +0 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
- xinference/thirdparty/omnilmm/model/resampler.py +0 -166
- xinference/thirdparty/omnilmm/model/utils.py +0 -578
- xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
- xinference/thirdparty/omnilmm/utils.py +0 -134
- xinference/web/ui/build/static/css/main.337afe76.css +0 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
- xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
- xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
- /xinference/{thirdparty/omnilmm → model/embedding/vllm}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
- {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Alibaba Cloud.
|
|
2
|
-
#
|
|
3
|
-
# This source code is licensed under the license found in the
|
|
4
|
-
# LICENSE file in the root directory of this source tree.
|
|
5
|
-
|
|
6
|
-
import math
|
|
7
|
-
from functools import partial
|
|
8
|
-
|
|
9
|
-
import numpy as np
|
|
10
|
-
import torch
|
|
11
|
-
from torch import nn
|
|
12
|
-
from torch.nn import functional as F
|
|
13
|
-
from torch.nn.init import trunc_normal_
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def get_abs_pos(abs_pos, tgt_size):
|
|
17
|
-
# abs_pos: L, C
|
|
18
|
-
# tgt_size: M
|
|
19
|
-
# return: M, C
|
|
20
|
-
src_size = int(math.sqrt(abs_pos.size(0)))
|
|
21
|
-
tgt_size = int(math.sqrt(tgt_size))
|
|
22
|
-
dtype = abs_pos.dtype
|
|
23
|
-
|
|
24
|
-
if src_size != tgt_size:
|
|
25
|
-
return (
|
|
26
|
-
F.interpolate(
|
|
27
|
-
abs_pos.float().reshape(1, src_size, src_size, -1).permute(0, 3, 1, 2),
|
|
28
|
-
size=(tgt_size, tgt_size),
|
|
29
|
-
mode="bicubic",
|
|
30
|
-
align_corners=False,
|
|
31
|
-
)
|
|
32
|
-
.permute(0, 2, 3, 1)
|
|
33
|
-
.flatten(0, 2)
|
|
34
|
-
.to(dtype=dtype)
|
|
35
|
-
)
|
|
36
|
-
else:
|
|
37
|
-
return abs_pos
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# https://github.com/facebookresearch/mae/blob/efb2a8062c206524e35e47d04501ed4f544c0ae8/util/pos_embed.py#L20
|
|
41
|
-
def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
|
|
42
|
-
"""
|
|
43
|
-
grid_size: int of the grid height and width
|
|
44
|
-
return:
|
|
45
|
-
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
|
|
46
|
-
"""
|
|
47
|
-
grid_h = np.arange(grid_size, dtype=np.float32)
|
|
48
|
-
grid_w = np.arange(grid_size, dtype=np.float32)
|
|
49
|
-
grid = np.meshgrid(grid_w, grid_h) # here w goes first
|
|
50
|
-
grid = np.stack(grid, axis=0)
|
|
51
|
-
|
|
52
|
-
grid = grid.reshape([2, 1, grid_size, grid_size])
|
|
53
|
-
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
|
|
54
|
-
if cls_token:
|
|
55
|
-
pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
|
|
56
|
-
return pos_embed
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
|
|
60
|
-
assert embed_dim % 2 == 0
|
|
61
|
-
|
|
62
|
-
# use half of dimensions to encode grid_h
|
|
63
|
-
emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
|
|
64
|
-
emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
|
|
65
|
-
|
|
66
|
-
emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
|
|
67
|
-
return emb
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
|
|
71
|
-
"""
|
|
72
|
-
embed_dim: output dimension for each position
|
|
73
|
-
pos: a list of positions to be encoded: size (M,)
|
|
74
|
-
out: (M, D)
|
|
75
|
-
"""
|
|
76
|
-
assert embed_dim % 2 == 0
|
|
77
|
-
omega = np.arange(embed_dim // 2, dtype=np.float32)
|
|
78
|
-
omega /= embed_dim / 2.0
|
|
79
|
-
omega = 1.0 / 10000**omega # (D/2,)
|
|
80
|
-
|
|
81
|
-
pos = pos.reshape(-1) # (M,)
|
|
82
|
-
out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product
|
|
83
|
-
|
|
84
|
-
emb_sin = np.sin(out) # (M, D/2)
|
|
85
|
-
emb_cos = np.cos(out) # (M, D/2)
|
|
86
|
-
|
|
87
|
-
emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
|
|
88
|
-
return emb
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class Resampler(nn.Module):
|
|
92
|
-
"""
|
|
93
|
-
A 2D perceiver-resampler network with one cross attention layers by
|
|
94
|
-
(grid_size**2) learnable queries and 2d sincos pos_emb
|
|
95
|
-
Outputs:
|
|
96
|
-
A tensor with the shape of (grid_size**2, embed_dim)
|
|
97
|
-
"""
|
|
98
|
-
|
|
99
|
-
def __init__(
|
|
100
|
-
self,
|
|
101
|
-
grid_size,
|
|
102
|
-
embed_dim,
|
|
103
|
-
num_heads,
|
|
104
|
-
kv_dim=None,
|
|
105
|
-
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
|
106
|
-
):
|
|
107
|
-
super().__init__()
|
|
108
|
-
self.num_queries = grid_size**2
|
|
109
|
-
self.embed_dim = embed_dim
|
|
110
|
-
self.num_heads = num_heads
|
|
111
|
-
|
|
112
|
-
self.pos_embed = nn.Parameter(
|
|
113
|
-
torch.from_numpy(get_2d_sincos_pos_embed(embed_dim, grid_size)).float()
|
|
114
|
-
).requires_grad_(False)
|
|
115
|
-
|
|
116
|
-
self.query = nn.Parameter(torch.zeros(self.num_queries, embed_dim))
|
|
117
|
-
trunc_normal_(self.query, std=0.02)
|
|
118
|
-
|
|
119
|
-
if kv_dim is not None and kv_dim != embed_dim:
|
|
120
|
-
self.kv_proj = nn.Linear(kv_dim, embed_dim, bias=False)
|
|
121
|
-
else:
|
|
122
|
-
self.kv_proj = nn.Identity()
|
|
123
|
-
|
|
124
|
-
self.attn = nn.MultiheadAttention(embed_dim, num_heads)
|
|
125
|
-
self.ln_q = norm_layer(embed_dim)
|
|
126
|
-
self.ln_kv = norm_layer(embed_dim)
|
|
127
|
-
|
|
128
|
-
self.ln_post = norm_layer(embed_dim)
|
|
129
|
-
self.proj = nn.Parameter(
|
|
130
|
-
(embed_dim**-0.5) * torch.randn(embed_dim, embed_dim)
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
self.apply(self._init_weights)
|
|
134
|
-
|
|
135
|
-
def _init_weights(self, m):
|
|
136
|
-
if isinstance(m, nn.Linear):
|
|
137
|
-
trunc_normal_(m.weight, std=0.02)
|
|
138
|
-
if isinstance(m, nn.Linear) and m.bias is not None:
|
|
139
|
-
nn.init.constant_(m.bias, 0)
|
|
140
|
-
elif isinstance(m, nn.LayerNorm):
|
|
141
|
-
nn.init.constant_(m.bias, 0)
|
|
142
|
-
nn.init.constant_(m.weight, 1.0)
|
|
143
|
-
|
|
144
|
-
def forward(self, x, attn_mask=None):
|
|
145
|
-
pos_embed = get_abs_pos(self.pos_embed, x.size(1))
|
|
146
|
-
|
|
147
|
-
x = self.kv_proj(x)
|
|
148
|
-
x = self.ln_kv(x).permute(1, 0, 2)
|
|
149
|
-
|
|
150
|
-
N = x.shape[1]
|
|
151
|
-
q = self.ln_q(self.query)
|
|
152
|
-
# print((self._repeat(q, N) + self.pos_embed.unsqueeze(1)).dtype, (x + pos_embed.unsqueeze(1)).dtype, x.dtype)
|
|
153
|
-
out = self.attn(
|
|
154
|
-
self._repeat(q, N) + self.pos_embed.unsqueeze(1),
|
|
155
|
-
x + pos_embed.unsqueeze(1),
|
|
156
|
-
x,
|
|
157
|
-
attn_mask=attn_mask,
|
|
158
|
-
)[0]
|
|
159
|
-
x = out.permute(1, 0, 2)
|
|
160
|
-
|
|
161
|
-
x = self.ln_post(x)
|
|
162
|
-
x = x @ self.proj
|
|
163
|
-
return x
|
|
164
|
-
|
|
165
|
-
def _repeat(self, query, N: int):
|
|
166
|
-
return query.unsqueeze(1).repeat(1, N, 1)
|