xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +47 -18
- xinference/api/oauth2/types.py +1 -0
- xinference/api/restful_api.py +34 -7
- xinference/client/oscar/actor_client.py +4 -3
- xinference/client/restful/restful_client.py +20 -4
- xinference/conftest.py +13 -2
- xinference/core/supervisor.py +48 -1
- xinference/core/worker.py +139 -20
- xinference/deploy/cmdline.py +119 -20
- xinference/model/embedding/core.py +1 -2
- xinference/model/llm/__init__.py +4 -6
- xinference/model/llm/ggml/llamacpp.py +2 -10
- xinference/model/llm/llm_family.json +877 -13
- xinference/model/llm/llm_family.py +15 -0
- xinference/model/llm/llm_family_modelscope.json +571 -0
- xinference/model/llm/pytorch/chatglm.py +2 -0
- xinference/model/llm/pytorch/core.py +22 -26
- xinference/model/llm/pytorch/deepseek_vl.py +232 -0
- xinference/model/llm/pytorch/internlm2.py +2 -0
- xinference/model/llm/pytorch/omnilmm.py +153 -0
- xinference/model/llm/pytorch/qwen_vl.py +2 -0
- xinference/model/llm/pytorch/yi_vl.py +4 -2
- xinference/model/llm/utils.py +53 -5
- xinference/model/llm/vllm/core.py +54 -6
- xinference/model/rerank/core.py +3 -0
- xinference/thirdparty/deepseek_vl/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
- xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
- xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
- xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
- xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
- xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
- xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
- xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +216 -0
- xinference/thirdparty/omnilmm/constants.py +4 -0
- xinference/thirdparty/omnilmm/conversation.py +332 -0
- xinference/thirdparty/omnilmm/model/__init__.py +1 -0
- xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
- xinference/thirdparty/omnilmm/model/resampler.py +166 -0
- xinference/thirdparty/omnilmm/model/utils.py +563 -0
- xinference/thirdparty/omnilmm/train/__init__.py +13 -0
- xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
- xinference/thirdparty/omnilmm/utils.py +134 -0
- xinference/types.py +15 -19
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
- xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
- xinference/model/llm/ggml/ctransformers.py +0 -281
- xinference/model/llm/ggml/ctransformers_util.py +0 -161
- xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
- xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
- /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Copyright (c) Alibaba Cloud.
|
|
2
|
+
#
|
|
3
|
+
# This source code is licensed under the license found in the
|
|
4
|
+
# LICENSE file in the root directory of this source tree.
|
|
5
|
+
|
|
6
|
+
import math
|
|
7
|
+
from functools import partial
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import torch
|
|
11
|
+
from torch import nn
|
|
12
|
+
from torch.nn import functional as F
|
|
13
|
+
from torch.nn.init import trunc_normal_
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_abs_pos(abs_pos, tgt_size):
|
|
17
|
+
# abs_pos: L, C
|
|
18
|
+
# tgt_size: M
|
|
19
|
+
# return: M, C
|
|
20
|
+
src_size = int(math.sqrt(abs_pos.size(0)))
|
|
21
|
+
tgt_size = int(math.sqrt(tgt_size))
|
|
22
|
+
dtype = abs_pos.dtype
|
|
23
|
+
|
|
24
|
+
if src_size != tgt_size:
|
|
25
|
+
return (
|
|
26
|
+
F.interpolate(
|
|
27
|
+
abs_pos.float().reshape(1, src_size, src_size, -1).permute(0, 3, 1, 2),
|
|
28
|
+
size=(tgt_size, tgt_size),
|
|
29
|
+
mode="bicubic",
|
|
30
|
+
align_corners=False,
|
|
31
|
+
)
|
|
32
|
+
.permute(0, 2, 3, 1)
|
|
33
|
+
.flatten(0, 2)
|
|
34
|
+
.to(dtype=dtype)
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
return abs_pos
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# https://github.com/facebookresearch/mae/blob/efb2a8062c206524e35e47d04501ed4f544c0ae8/util/pos_embed.py#L20
|
|
41
|
+
def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
|
|
42
|
+
"""
|
|
43
|
+
grid_size: int of the grid height and width
|
|
44
|
+
return:
|
|
45
|
+
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
|
|
46
|
+
"""
|
|
47
|
+
grid_h = np.arange(grid_size, dtype=np.float32)
|
|
48
|
+
grid_w = np.arange(grid_size, dtype=np.float32)
|
|
49
|
+
grid = np.meshgrid(grid_w, grid_h) # here w goes first
|
|
50
|
+
grid = np.stack(grid, axis=0)
|
|
51
|
+
|
|
52
|
+
grid = grid.reshape([2, 1, grid_size, grid_size])
|
|
53
|
+
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
|
|
54
|
+
if cls_token:
|
|
55
|
+
pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
|
|
56
|
+
return pos_embed
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
|
|
60
|
+
assert embed_dim % 2 == 0
|
|
61
|
+
|
|
62
|
+
# use half of dimensions to encode grid_h
|
|
63
|
+
emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
|
|
64
|
+
emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
|
|
65
|
+
|
|
66
|
+
emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
|
|
67
|
+
return emb
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
|
|
71
|
+
"""
|
|
72
|
+
embed_dim: output dimension for each position
|
|
73
|
+
pos: a list of positions to be encoded: size (M,)
|
|
74
|
+
out: (M, D)
|
|
75
|
+
"""
|
|
76
|
+
assert embed_dim % 2 == 0
|
|
77
|
+
omega = np.arange(embed_dim // 2, dtype=np.float32)
|
|
78
|
+
omega /= embed_dim / 2.0
|
|
79
|
+
omega = 1.0 / 10000**omega # (D/2,)
|
|
80
|
+
|
|
81
|
+
pos = pos.reshape(-1) # (M,)
|
|
82
|
+
out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product
|
|
83
|
+
|
|
84
|
+
emb_sin = np.sin(out) # (M, D/2)
|
|
85
|
+
emb_cos = np.cos(out) # (M, D/2)
|
|
86
|
+
|
|
87
|
+
emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
|
|
88
|
+
return emb
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class Resampler(nn.Module):
|
|
92
|
+
"""
|
|
93
|
+
A 2D perceiver-resampler network with one cross attention layers by
|
|
94
|
+
(grid_size**2) learnable queries and 2d sincos pos_emb
|
|
95
|
+
Outputs:
|
|
96
|
+
A tensor with the shape of (grid_size**2, embed_dim)
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
grid_size,
|
|
102
|
+
embed_dim,
|
|
103
|
+
num_heads,
|
|
104
|
+
kv_dim=None,
|
|
105
|
+
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
|
106
|
+
):
|
|
107
|
+
super().__init__()
|
|
108
|
+
self.num_queries = grid_size**2
|
|
109
|
+
self.embed_dim = embed_dim
|
|
110
|
+
self.num_heads = num_heads
|
|
111
|
+
|
|
112
|
+
self.pos_embed = nn.Parameter(
|
|
113
|
+
torch.from_numpy(get_2d_sincos_pos_embed(embed_dim, grid_size)).float()
|
|
114
|
+
).requires_grad_(False)
|
|
115
|
+
|
|
116
|
+
self.query = nn.Parameter(torch.zeros(self.num_queries, embed_dim))
|
|
117
|
+
trunc_normal_(self.query, std=0.02)
|
|
118
|
+
|
|
119
|
+
if kv_dim is not None and kv_dim != embed_dim:
|
|
120
|
+
self.kv_proj = nn.Linear(kv_dim, embed_dim, bias=False)
|
|
121
|
+
else:
|
|
122
|
+
self.kv_proj = nn.Identity()
|
|
123
|
+
|
|
124
|
+
self.attn = nn.MultiheadAttention(embed_dim, num_heads)
|
|
125
|
+
self.ln_q = norm_layer(embed_dim)
|
|
126
|
+
self.ln_kv = norm_layer(embed_dim)
|
|
127
|
+
|
|
128
|
+
self.ln_post = norm_layer(embed_dim)
|
|
129
|
+
self.proj = nn.Parameter(
|
|
130
|
+
(embed_dim**-0.5) * torch.randn(embed_dim, embed_dim)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
self.apply(self._init_weights)
|
|
134
|
+
|
|
135
|
+
def _init_weights(self, m):
|
|
136
|
+
if isinstance(m, nn.Linear):
|
|
137
|
+
trunc_normal_(m.weight, std=0.02)
|
|
138
|
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
|
139
|
+
nn.init.constant_(m.bias, 0)
|
|
140
|
+
elif isinstance(m, nn.LayerNorm):
|
|
141
|
+
nn.init.constant_(m.bias, 0)
|
|
142
|
+
nn.init.constant_(m.weight, 1.0)
|
|
143
|
+
|
|
144
|
+
def forward(self, x, attn_mask=None):
|
|
145
|
+
pos_embed = get_abs_pos(self.pos_embed, x.size(1))
|
|
146
|
+
|
|
147
|
+
x = self.kv_proj(x)
|
|
148
|
+
x = self.ln_kv(x).permute(1, 0, 2)
|
|
149
|
+
|
|
150
|
+
N = x.shape[1]
|
|
151
|
+
q = self.ln_q(self.query)
|
|
152
|
+
# print((self._repeat(q, N) + self.pos_embed.unsqueeze(1)).dtype, (x + pos_embed.unsqueeze(1)).dtype, x.dtype)
|
|
153
|
+
out = self.attn(
|
|
154
|
+
self._repeat(q, N) + self.pos_embed.unsqueeze(1),
|
|
155
|
+
x + pos_embed.unsqueeze(1),
|
|
156
|
+
x,
|
|
157
|
+
attn_mask=attn_mask,
|
|
158
|
+
)[0]
|
|
159
|
+
x = out.permute(1, 0, 2)
|
|
160
|
+
|
|
161
|
+
x = self.ln_post(x)
|
|
162
|
+
x = x @ self.proj
|
|
163
|
+
return x
|
|
164
|
+
|
|
165
|
+
def _repeat(self, query, N: int):
|
|
166
|
+
return query.unsqueeze(1).repeat(1, N, 1)
|