xinference 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +79 -2
  3. xinference/client/restful/restful_client.py +65 -3
  4. xinference/conftest.py +0 -7
  5. xinference/core/media_interface.py +132 -8
  6. xinference/core/model.py +44 -6
  7. xinference/core/scheduler.py +1 -10
  8. xinference/core/supervisor.py +8 -17
  9. xinference/core/worker.py +5 -27
  10. xinference/deploy/cmdline.py +6 -2
  11. xinference/model/audio/chattts.py +24 -39
  12. xinference/model/audio/cosyvoice.py +18 -30
  13. xinference/model/audio/funasr.py +42 -0
  14. xinference/model/audio/model_spec.json +71 -1
  15. xinference/model/audio/model_spec_modelscope.json +76 -2
  16. xinference/model/audio/utils.py +75 -0
  17. xinference/model/core.py +1 -0
  18. xinference/model/embedding/__init__.py +74 -18
  19. xinference/model/embedding/core.py +98 -589
  20. xinference/model/embedding/embed_family.py +133 -0
  21. xinference/{thirdparty/omnilmm/train → model/embedding/flag}/__init__.py +1 -1
  22. xinference/model/embedding/flag/core.py +282 -0
  23. xinference/model/embedding/model_spec.json +24 -0
  24. xinference/model/embedding/model_spec_modelscope.json +24 -0
  25. xinference/model/embedding/sentence_transformers/__init__.py +13 -0
  26. xinference/model/embedding/sentence_transformers/core.py +399 -0
  27. xinference/model/embedding/vllm/core.py +95 -0
  28. xinference/model/image/model_spec.json +30 -3
  29. xinference/model/image/model_spec_modelscope.json +41 -2
  30. xinference/model/image/stable_diffusion/core.py +144 -53
  31. xinference/model/llm/__init__.py +6 -54
  32. xinference/model/llm/core.py +19 -5
  33. xinference/model/llm/llama_cpp/core.py +59 -3
  34. xinference/model/llm/llama_cpp/memory.py +457 -0
  35. xinference/model/llm/llm_family.json +247 -402
  36. xinference/model/llm/llm_family.py +88 -16
  37. xinference/model/llm/llm_family_modelscope.json +260 -421
  38. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  39. xinference/model/llm/sglang/core.py +8 -0
  40. xinference/model/llm/transformers/__init__.py +27 -6
  41. xinference/model/llm/transformers/chatglm.py +4 -2
  42. xinference/model/llm/transformers/core.py +49 -28
  43. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  44. xinference/model/llm/transformers/gemma3.py +119 -164
  45. xinference/model/llm/transformers/multimodal/__init__.py +13 -0
  46. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  47. xinference/model/llm/transformers/multimodal/core.py +205 -0
  48. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  49. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  50. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  51. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  52. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  53. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  54. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  55. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  56. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  57. xinference/model/llm/transformers/opt.py +4 -2
  58. xinference/model/llm/transformers/utils.py +6 -37
  59. xinference/model/llm/utils.py +11 -0
  60. xinference/model/llm/vllm/core.py +7 -0
  61. xinference/model/rerank/core.py +91 -3
  62. xinference/model/rerank/model_spec.json +24 -0
  63. xinference/model/rerank/model_spec_modelscope.json +24 -0
  64. xinference/model/rerank/utils.py +20 -2
  65. xinference/model/utils.py +38 -1
  66. xinference/model/video/diffusers.py +65 -3
  67. xinference/model/video/model_spec.json +31 -4
  68. xinference/model/video/model_spec_modelscope.json +32 -4
  69. xinference/web/ui/build/asset-manifest.json +6 -6
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/css/main.013f296b.css +2 -0
  72. xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
  73. xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
  74. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
  82. xinference/web/ui/src/locales/en.json +21 -8
  83. xinference/web/ui/src/locales/ja.json +224 -0
  84. xinference/web/ui/src/locales/ko.json +224 -0
  85. xinference/web/ui/src/locales/zh.json +21 -8
  86. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/METADATA +14 -11
  87. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/RECORD +93 -100
  88. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/WHEEL +1 -1
  89. xinference/model/llm/transformers/cogvlm2.py +0 -442
  90. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  91. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  92. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  93. xinference/model/llm/transformers/intern_vl.py +0 -526
  94. xinference/model/llm/transformers/internlm2.py +0 -94
  95. xinference/model/llm/transformers/minicpmv25.py +0 -193
  96. xinference/model/llm/transformers/omnilmm.py +0 -132
  97. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  98. xinference/model/llm/transformers/qwen_vl.py +0 -360
  99. xinference/thirdparty/omnilmm/LICENSE +0 -201
  100. xinference/thirdparty/omnilmm/chat.py +0 -218
  101. xinference/thirdparty/omnilmm/constants.py +0 -4
  102. xinference/thirdparty/omnilmm/conversation.py +0 -332
  103. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  104. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  105. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  106. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  107. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  108. xinference/thirdparty/omnilmm/utils.py +0 -134
  109. xinference/web/ui/build/static/css/main.337afe76.css +0 -2
  110. xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
  111. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  112. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  117. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  118. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
  120. /xinference/{thirdparty/omnilmm → model/embedding/vllm}/__init__.py +0 -0
  121. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
  122. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
  123. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
  124. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
@@ -1,166 +0,0 @@
1
- # Copyright (c) Alibaba Cloud.
2
- #
3
- # This source code is licensed under the license found in the
4
- # LICENSE file in the root directory of this source tree.
5
-
6
- import math
7
- from functools import partial
8
-
9
- import numpy as np
10
- import torch
11
- from torch import nn
12
- from torch.nn import functional as F
13
- from torch.nn.init import trunc_normal_
14
-
15
-
16
- def get_abs_pos(abs_pos, tgt_size):
17
- # abs_pos: L, C
18
- # tgt_size: M
19
- # return: M, C
20
- src_size = int(math.sqrt(abs_pos.size(0)))
21
- tgt_size = int(math.sqrt(tgt_size))
22
- dtype = abs_pos.dtype
23
-
24
- if src_size != tgt_size:
25
- return (
26
- F.interpolate(
27
- abs_pos.float().reshape(1, src_size, src_size, -1).permute(0, 3, 1, 2),
28
- size=(tgt_size, tgt_size),
29
- mode="bicubic",
30
- align_corners=False,
31
- )
32
- .permute(0, 2, 3, 1)
33
- .flatten(0, 2)
34
- .to(dtype=dtype)
35
- )
36
- else:
37
- return abs_pos
38
-
39
-
40
- # https://github.com/facebookresearch/mae/blob/efb2a8062c206524e35e47d04501ed4f544c0ae8/util/pos_embed.py#L20
41
- def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
42
- """
43
- grid_size: int of the grid height and width
44
- return:
45
- pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
46
- """
47
- grid_h = np.arange(grid_size, dtype=np.float32)
48
- grid_w = np.arange(grid_size, dtype=np.float32)
49
- grid = np.meshgrid(grid_w, grid_h) # here w goes first
50
- grid = np.stack(grid, axis=0)
51
-
52
- grid = grid.reshape([2, 1, grid_size, grid_size])
53
- pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
54
- if cls_token:
55
- pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
56
- return pos_embed
57
-
58
-
59
- def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
60
- assert embed_dim % 2 == 0
61
-
62
- # use half of dimensions to encode grid_h
63
- emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
64
- emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
65
-
66
- emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
67
- return emb
68
-
69
-
70
- def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
71
- """
72
- embed_dim: output dimension for each position
73
- pos: a list of positions to be encoded: size (M,)
74
- out: (M, D)
75
- """
76
- assert embed_dim % 2 == 0
77
- omega = np.arange(embed_dim // 2, dtype=np.float32)
78
- omega /= embed_dim / 2.0
79
- omega = 1.0 / 10000**omega # (D/2,)
80
-
81
- pos = pos.reshape(-1) # (M,)
82
- out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product
83
-
84
- emb_sin = np.sin(out) # (M, D/2)
85
- emb_cos = np.cos(out) # (M, D/2)
86
-
87
- emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
88
- return emb
89
-
90
-
91
- class Resampler(nn.Module):
92
- """
93
- A 2D perceiver-resampler network with one cross attention layers by
94
- (grid_size**2) learnable queries and 2d sincos pos_emb
95
- Outputs:
96
- A tensor with the shape of (grid_size**2, embed_dim)
97
- """
98
-
99
- def __init__(
100
- self,
101
- grid_size,
102
- embed_dim,
103
- num_heads,
104
- kv_dim=None,
105
- norm_layer=partial(nn.LayerNorm, eps=1e-6),
106
- ):
107
- super().__init__()
108
- self.num_queries = grid_size**2
109
- self.embed_dim = embed_dim
110
- self.num_heads = num_heads
111
-
112
- self.pos_embed = nn.Parameter(
113
- torch.from_numpy(get_2d_sincos_pos_embed(embed_dim, grid_size)).float()
114
- ).requires_grad_(False)
115
-
116
- self.query = nn.Parameter(torch.zeros(self.num_queries, embed_dim))
117
- trunc_normal_(self.query, std=0.02)
118
-
119
- if kv_dim is not None and kv_dim != embed_dim:
120
- self.kv_proj = nn.Linear(kv_dim, embed_dim, bias=False)
121
- else:
122
- self.kv_proj = nn.Identity()
123
-
124
- self.attn = nn.MultiheadAttention(embed_dim, num_heads)
125
- self.ln_q = norm_layer(embed_dim)
126
- self.ln_kv = norm_layer(embed_dim)
127
-
128
- self.ln_post = norm_layer(embed_dim)
129
- self.proj = nn.Parameter(
130
- (embed_dim**-0.5) * torch.randn(embed_dim, embed_dim)
131
- )
132
-
133
- self.apply(self._init_weights)
134
-
135
- def _init_weights(self, m):
136
- if isinstance(m, nn.Linear):
137
- trunc_normal_(m.weight, std=0.02)
138
- if isinstance(m, nn.Linear) and m.bias is not None:
139
- nn.init.constant_(m.bias, 0)
140
- elif isinstance(m, nn.LayerNorm):
141
- nn.init.constant_(m.bias, 0)
142
- nn.init.constant_(m.weight, 1.0)
143
-
144
- def forward(self, x, attn_mask=None):
145
- pos_embed = get_abs_pos(self.pos_embed, x.size(1))
146
-
147
- x = self.kv_proj(x)
148
- x = self.ln_kv(x).permute(1, 0, 2)
149
-
150
- N = x.shape[1]
151
- q = self.ln_q(self.query)
152
- # print((self._repeat(q, N) + self.pos_embed.unsqueeze(1)).dtype, (x + pos_embed.unsqueeze(1)).dtype, x.dtype)
153
- out = self.attn(
154
- self._repeat(q, N) + self.pos_embed.unsqueeze(1),
155
- x + pos_embed.unsqueeze(1),
156
- x,
157
- attn_mask=attn_mask,
158
- )[0]
159
- x = out.permute(1, 0, 2)
160
-
161
- x = self.ln_post(x)
162
- x = x @ self.proj
163
- return x
164
-
165
- def _repeat(self, query, N: int):
166
- return query.unsqueeze(1).repeat(1, N, 1)