xinference 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/client/restful/restful_client.py +1 -1
- xinference/conftest.py +0 -7
- xinference/core/media_interface.py +9 -8
- xinference/core/model.py +13 -6
- xinference/core/scheduler.py +1 -10
- xinference/core/worker.py +0 -10
- xinference/model/audio/model_spec.json +53 -1
- xinference/model/audio/model_spec_modelscope.json +57 -1
- xinference/model/embedding/core.py +19 -11
- xinference/model/image/model_spec.json +10 -1
- xinference/model/image/model_spec_modelscope.json +20 -0
- xinference/model/llm/__init__.py +6 -54
- xinference/model/llm/core.py +19 -5
- xinference/model/llm/llama_cpp/core.py +59 -3
- xinference/model/llm/llama_cpp/memory.py +455 -0
- xinference/model/llm/llm_family.json +185 -397
- xinference/model/llm/llm_family.py +88 -16
- xinference/model/llm/llm_family_modelscope.json +199 -421
- xinference/model/llm/llm_family_openmind_hub.json +0 -34
- xinference/model/llm/sglang/core.py +4 -0
- xinference/model/llm/transformers/__init__.py +27 -6
- xinference/model/llm/transformers/chatglm.py +4 -2
- xinference/model/llm/transformers/core.py +49 -28
- xinference/model/llm/transformers/deepseek_v2.py +6 -49
- xinference/model/llm/transformers/gemma3.py +119 -164
- xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
- xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
- xinference/model/llm/transformers/multimodal/core.py +205 -0
- xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
- xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
- xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
- xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
- xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
- xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
- xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
- xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
- xinference/model/llm/transformers/opt.py +4 -2
- xinference/model/llm/transformers/utils.py +6 -37
- xinference/model/llm/vllm/core.py +4 -0
- xinference/model/rerank/core.py +7 -1
- xinference/model/rerank/utils.py +17 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
- xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
- xinference/web/ui/src/locales/en.json +3 -1
- xinference/web/ui/src/locales/zh.json +3 -1
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/METADATA +16 -14
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/cogvlm2.py +0 -442
- xinference/model/llm/transformers/cogvlm2_video.py +0 -333
- xinference/model/llm/transformers/deepseek_vl.py +0 -280
- xinference/model/llm/transformers/glm_edge_v.py +0 -213
- xinference/model/llm/transformers/intern_vl.py +0 -526
- xinference/model/llm/transformers/internlm2.py +0 -94
- xinference/model/llm/transformers/minicpmv25.py +0 -193
- xinference/model/llm/transformers/omnilmm.py +0 -132
- xinference/model/llm/transformers/qwen2_audio.py +0 -179
- xinference/model/llm/transformers/qwen_vl.py +0 -360
- xinference/thirdparty/omnilmm/LICENSE +0 -201
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +0 -218
- xinference/thirdparty/omnilmm/constants.py +0 -4
- xinference/thirdparty/omnilmm/conversation.py +0 -332
- xinference/thirdparty/omnilmm/model/__init__.py +0 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
- xinference/thirdparty/omnilmm/model/resampler.py +0 -166
- xinference/thirdparty/omnilmm/model/utils.py +0 -578
- xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
- xinference/thirdparty/omnilmm/utils.py +0 -134
- xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
- xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
- /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Alibaba Cloud.
|
|
2
|
-
#
|
|
3
|
-
# This source code is licensed under the license found in the
|
|
4
|
-
# LICENSE file in the root directory of this source tree.
|
|
5
|
-
|
|
6
|
-
import math
|
|
7
|
-
from functools import partial
|
|
8
|
-
|
|
9
|
-
import numpy as np
|
|
10
|
-
import torch
|
|
11
|
-
from torch import nn
|
|
12
|
-
from torch.nn import functional as F
|
|
13
|
-
from torch.nn.init import trunc_normal_
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def get_abs_pos(abs_pos, tgt_size):
|
|
17
|
-
# abs_pos: L, C
|
|
18
|
-
# tgt_size: M
|
|
19
|
-
# return: M, C
|
|
20
|
-
src_size = int(math.sqrt(abs_pos.size(0)))
|
|
21
|
-
tgt_size = int(math.sqrt(tgt_size))
|
|
22
|
-
dtype = abs_pos.dtype
|
|
23
|
-
|
|
24
|
-
if src_size != tgt_size:
|
|
25
|
-
return (
|
|
26
|
-
F.interpolate(
|
|
27
|
-
abs_pos.float().reshape(1, src_size, src_size, -1).permute(0, 3, 1, 2),
|
|
28
|
-
size=(tgt_size, tgt_size),
|
|
29
|
-
mode="bicubic",
|
|
30
|
-
align_corners=False,
|
|
31
|
-
)
|
|
32
|
-
.permute(0, 2, 3, 1)
|
|
33
|
-
.flatten(0, 2)
|
|
34
|
-
.to(dtype=dtype)
|
|
35
|
-
)
|
|
36
|
-
else:
|
|
37
|
-
return abs_pos
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# https://github.com/facebookresearch/mae/blob/efb2a8062c206524e35e47d04501ed4f544c0ae8/util/pos_embed.py#L20
|
|
41
|
-
def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
|
|
42
|
-
"""
|
|
43
|
-
grid_size: int of the grid height and width
|
|
44
|
-
return:
|
|
45
|
-
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
|
|
46
|
-
"""
|
|
47
|
-
grid_h = np.arange(grid_size, dtype=np.float32)
|
|
48
|
-
grid_w = np.arange(grid_size, dtype=np.float32)
|
|
49
|
-
grid = np.meshgrid(grid_w, grid_h) # here w goes first
|
|
50
|
-
grid = np.stack(grid, axis=0)
|
|
51
|
-
|
|
52
|
-
grid = grid.reshape([2, 1, grid_size, grid_size])
|
|
53
|
-
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
|
|
54
|
-
if cls_token:
|
|
55
|
-
pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
|
|
56
|
-
return pos_embed
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
|
|
60
|
-
assert embed_dim % 2 == 0
|
|
61
|
-
|
|
62
|
-
# use half of dimensions to encode grid_h
|
|
63
|
-
emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
|
|
64
|
-
emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
|
|
65
|
-
|
|
66
|
-
emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
|
|
67
|
-
return emb
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
|
|
71
|
-
"""
|
|
72
|
-
embed_dim: output dimension for each position
|
|
73
|
-
pos: a list of positions to be encoded: size (M,)
|
|
74
|
-
out: (M, D)
|
|
75
|
-
"""
|
|
76
|
-
assert embed_dim % 2 == 0
|
|
77
|
-
omega = np.arange(embed_dim // 2, dtype=np.float32)
|
|
78
|
-
omega /= embed_dim / 2.0
|
|
79
|
-
omega = 1.0 / 10000**omega # (D/2,)
|
|
80
|
-
|
|
81
|
-
pos = pos.reshape(-1) # (M,)
|
|
82
|
-
out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product
|
|
83
|
-
|
|
84
|
-
emb_sin = np.sin(out) # (M, D/2)
|
|
85
|
-
emb_cos = np.cos(out) # (M, D/2)
|
|
86
|
-
|
|
87
|
-
emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
|
|
88
|
-
return emb
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class Resampler(nn.Module):
|
|
92
|
-
"""
|
|
93
|
-
A 2D perceiver-resampler network with one cross attention layers by
|
|
94
|
-
(grid_size**2) learnable queries and 2d sincos pos_emb
|
|
95
|
-
Outputs:
|
|
96
|
-
A tensor with the shape of (grid_size**2, embed_dim)
|
|
97
|
-
"""
|
|
98
|
-
|
|
99
|
-
def __init__(
|
|
100
|
-
self,
|
|
101
|
-
grid_size,
|
|
102
|
-
embed_dim,
|
|
103
|
-
num_heads,
|
|
104
|
-
kv_dim=None,
|
|
105
|
-
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
|
106
|
-
):
|
|
107
|
-
super().__init__()
|
|
108
|
-
self.num_queries = grid_size**2
|
|
109
|
-
self.embed_dim = embed_dim
|
|
110
|
-
self.num_heads = num_heads
|
|
111
|
-
|
|
112
|
-
self.pos_embed = nn.Parameter(
|
|
113
|
-
torch.from_numpy(get_2d_sincos_pos_embed(embed_dim, grid_size)).float()
|
|
114
|
-
).requires_grad_(False)
|
|
115
|
-
|
|
116
|
-
self.query = nn.Parameter(torch.zeros(self.num_queries, embed_dim))
|
|
117
|
-
trunc_normal_(self.query, std=0.02)
|
|
118
|
-
|
|
119
|
-
if kv_dim is not None and kv_dim != embed_dim:
|
|
120
|
-
self.kv_proj = nn.Linear(kv_dim, embed_dim, bias=False)
|
|
121
|
-
else:
|
|
122
|
-
self.kv_proj = nn.Identity()
|
|
123
|
-
|
|
124
|
-
self.attn = nn.MultiheadAttention(embed_dim, num_heads)
|
|
125
|
-
self.ln_q = norm_layer(embed_dim)
|
|
126
|
-
self.ln_kv = norm_layer(embed_dim)
|
|
127
|
-
|
|
128
|
-
self.ln_post = norm_layer(embed_dim)
|
|
129
|
-
self.proj = nn.Parameter(
|
|
130
|
-
(embed_dim**-0.5) * torch.randn(embed_dim, embed_dim)
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
self.apply(self._init_weights)
|
|
134
|
-
|
|
135
|
-
def _init_weights(self, m):
|
|
136
|
-
if isinstance(m, nn.Linear):
|
|
137
|
-
trunc_normal_(m.weight, std=0.02)
|
|
138
|
-
if isinstance(m, nn.Linear) and m.bias is not None:
|
|
139
|
-
nn.init.constant_(m.bias, 0)
|
|
140
|
-
elif isinstance(m, nn.LayerNorm):
|
|
141
|
-
nn.init.constant_(m.bias, 0)
|
|
142
|
-
nn.init.constant_(m.weight, 1.0)
|
|
143
|
-
|
|
144
|
-
def forward(self, x, attn_mask=None):
|
|
145
|
-
pos_embed = get_abs_pos(self.pos_embed, x.size(1))
|
|
146
|
-
|
|
147
|
-
x = self.kv_proj(x)
|
|
148
|
-
x = self.ln_kv(x).permute(1, 0, 2)
|
|
149
|
-
|
|
150
|
-
N = x.shape[1]
|
|
151
|
-
q = self.ln_q(self.query)
|
|
152
|
-
# print((self._repeat(q, N) + self.pos_embed.unsqueeze(1)).dtype, (x + pos_embed.unsqueeze(1)).dtype, x.dtype)
|
|
153
|
-
out = self.attn(
|
|
154
|
-
self._repeat(q, N) + self.pos_embed.unsqueeze(1),
|
|
155
|
-
x + pos_embed.unsqueeze(1),
|
|
156
|
-
x,
|
|
157
|
-
attn_mask=attn_mask,
|
|
158
|
-
)[0]
|
|
159
|
-
x = out.permute(1, 0, 2)
|
|
160
|
-
|
|
161
|
-
x = self.ln_post(x)
|
|
162
|
-
x = x @ self.proj
|
|
163
|
-
return x
|
|
164
|
-
|
|
165
|
-
def _repeat(self, query, N: int):
|
|
166
|
-
return query.unsqueeze(1).repeat(1, N, 1)
|