xinference 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +8 -0
- xinference/_version.py +3 -3
- xinference/api/oauth2/utils.py +26 -5
- xinference/core/model.py +1 -10
- xinference/device_utils.py +11 -1
- xinference/model/embedding/model_spec.json +70 -0
- xinference/model/image/core.py +20 -10
- xinference/model/image/model_spec.json +55 -3
- xinference/model/image/ocr/__init__.py +5 -0
- xinference/model/image/ocr/deepseek_ocr.py +958 -0
- xinference/model/llm/core.py +2 -0
- xinference/model/llm/llama_cpp/core.py +2 -0
- xinference/model/llm/llm_family.json +319 -6
- xinference/model/llm/lmdeploy/core.py +2 -0
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/core.py +22 -36
- xinference/model/llm/transformers/multimodal/qwen-omni.py +60 -11
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
- xinference/model/llm/transformers/utils.py +0 -20
- xinference/model/llm/vllm/core.py +2 -0
- xinference/model/rerank/model_spec.json +368 -252
- xinference/model/rerank/sentence_transformers/core.py +10 -2
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +71 -5
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +51 -1
- xinference/ui/gradio/media_interface.py +469 -4
- xinference/ui/gradio/utils/__init__.py +19 -0
- xinference/ui/gradio/utils/latex.py +342 -0
- xinference/ui/web/ui/build/asset-manifest.json +3 -3
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/js/{main.45e78536.js → main.87d6859b.js} +3 -3
- xinference/ui/web/ui/build/static/js/main.87d6859b.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/412a6b414a8267c7a349d9beda4593cdf218abf32edaaf339e6a230df40397b8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e6770a05771952175c9fbf48fce283c9bb1bc8b5763e39edc36d099d1fe16b4a.json +1 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/METADATA +11 -11
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/RECORD +40 -37
- xinference/ui/web/ui/build/static/js/main.45e78536.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +0 -1
- /xinference/ui/web/ui/build/static/js/{main.45e78536.js.LICENSE.txt → main.87d6859b.js.LICENSE.txt} +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/WHEEL +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/top_level.txt +0 -0
|
@@ -81,6 +81,7 @@ class SentenceTransformerRerankModel(RerankModel):
|
|
|
81
81
|
if (
|
|
82
82
|
self.model_family.type == "normal"
|
|
83
83
|
and "qwen3" not in self.model_family.model_name.lower()
|
|
84
|
+
and "jina-reranker-v3" not in self.model_family.model_name.lower()
|
|
84
85
|
):
|
|
85
86
|
try:
|
|
86
87
|
import sentence_transformers
|
|
@@ -109,7 +110,10 @@ class SentenceTransformerRerankModel(RerankModel):
|
|
|
109
110
|
)
|
|
110
111
|
if self._use_fp16:
|
|
111
112
|
self._model.model.half()
|
|
112
|
-
elif
|
|
113
|
+
elif (
|
|
114
|
+
"qwen3" in self.model_family.model_name.lower()
|
|
115
|
+
or "jina-reranker-v3" in self.model_family.model_name.lower()
|
|
116
|
+
):
|
|
113
117
|
# qwen3-reranker
|
|
114
118
|
# now we use transformers
|
|
115
119
|
# TODO: support engines for rerank models
|
|
@@ -225,6 +229,7 @@ class SentenceTransformerRerankModel(RerankModel):
|
|
|
225
229
|
if (
|
|
226
230
|
self.model_family.type == "normal"
|
|
227
231
|
and "qwen3" not in self.model_family.model_name.lower()
|
|
232
|
+
and "jina-reranker-v3" not in self.model_family.model_name.lower()
|
|
228
233
|
):
|
|
229
234
|
logger.debug("Passing processed sentences: %s", sentence_combinations)
|
|
230
235
|
similarity_scores = self._model.predict(
|
|
@@ -235,7 +240,10 @@ class SentenceTransformerRerankModel(RerankModel):
|
|
|
235
240
|
).cpu()
|
|
236
241
|
if similarity_scores.dtype == torch.bfloat16:
|
|
237
242
|
similarity_scores = similarity_scores.float()
|
|
238
|
-
elif
|
|
243
|
+
elif (
|
|
244
|
+
"qwen3" in self.model_family.model_name.lower()
|
|
245
|
+
or "jina-reranker-v3" in self.model_family.model_name.lower()
|
|
246
|
+
):
|
|
239
247
|
|
|
240
248
|
def format_instruction(instruction, query, doc):
|
|
241
249
|
if instruction is None:
|
|
@@ -30,8 +30,12 @@ from transformers.cache_utils import (
|
|
|
30
30
|
DynamicCache,
|
|
31
31
|
EncoderDecoderCache,
|
|
32
32
|
OffloadedCache,
|
|
33
|
-
|
|
33
|
+
QuantizedCache,
|
|
34
34
|
StaticCache,
|
|
35
|
+
SlidingWindowCache,
|
|
36
|
+
SinkCache,
|
|
37
|
+
HybridCache,
|
|
38
|
+
HybridChunkedCache,
|
|
35
39
|
)
|
|
36
40
|
from transformers.configuration_utils import PretrainedConfig
|
|
37
41
|
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
|
@@ -55,13 +59,10 @@ from transformers.generation.candidate_generator import (
|
|
|
55
59
|
AssistedCandidateGeneratorDifferentTokenizers,
|
|
56
60
|
CandidateGenerator,
|
|
57
61
|
PromptLookupCandidateGenerator,
|
|
58
|
-
_crop_past_key_values,
|
|
59
62
|
_prepare_attention_mask,
|
|
60
63
|
_prepare_token_type_ids,
|
|
61
64
|
)
|
|
62
65
|
from transformers.generation.configuration_utils import (
|
|
63
|
-
NEED_SETUP_CACHE_CLASSES_MAPPING,
|
|
64
|
-
QUANT_BACKEND_CLASSES_MAPPING,
|
|
65
66
|
GenerationConfig,
|
|
66
67
|
GenerationMode,
|
|
67
68
|
)
|
|
@@ -111,6 +112,70 @@ if TYPE_CHECKING:
|
|
|
111
112
|
|
|
112
113
|
logger = logging.get_logger(__name__)
|
|
113
114
|
|
|
115
|
+
# Compatibility with transformers 4.57.1+
|
|
116
|
+
# These mappings are needed for the removed constants
|
|
117
|
+
NEED_SETUP_CACHE_CLASSES_MAPPING = {
|
|
118
|
+
"auto": Cache,
|
|
119
|
+
"dynamic": DynamicCache,
|
|
120
|
+
"static": StaticCache,
|
|
121
|
+
"offloaded": OffloadedCache,
|
|
122
|
+
"sliding_window": SlidingWindowCache,
|
|
123
|
+
"sink": SinkCache,
|
|
124
|
+
"hybrid": HybridCache,
|
|
125
|
+
"hybrid_chunked": HybridChunkedCache,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Mapping for quantized cache backends
|
|
129
|
+
QUANT_BACKEND_CLASSES_MAPPING = {
|
|
130
|
+
"quanto": QuantizedCache,
|
|
131
|
+
"hqq": QuantizedCache,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# Compatibility class for removed QuantizedCacheConfig
|
|
135
|
+
class QuantizedCacheConfig:
|
|
136
|
+
def __init__(self, backend: str = "quanto", nbits: int = 4,
|
|
137
|
+
axis_key: int = 0, axis_value: int = 0,
|
|
138
|
+
q_group_size: int = 64, residual_length: int = 128):
|
|
139
|
+
self.backend = backend
|
|
140
|
+
self.nbits = nbits
|
|
141
|
+
self.axis_key = axis_key
|
|
142
|
+
self.axis_value = axis_value
|
|
143
|
+
self.q_group_size = q_group_size
|
|
144
|
+
self.residual_length = residual_length
|
|
145
|
+
|
|
146
|
+
# Compatibility function for removed _crop_past_key_values
|
|
147
|
+
def _crop_past_key_values(model, past_key_values, max_length):
|
|
148
|
+
"""
|
|
149
|
+
Crop past key values to a maximum length.
|
|
150
|
+
This is a compatibility function for the removed _crop_past_key_values.
|
|
151
|
+
"""
|
|
152
|
+
if past_key_values is None:
|
|
153
|
+
return past_key_values
|
|
154
|
+
|
|
155
|
+
# If past_key_values is a Cache object
|
|
156
|
+
if hasattr(past_key_values, 'crop'):
|
|
157
|
+
return past_key_values.crop(max_length)
|
|
158
|
+
|
|
159
|
+
# If it's a tuple of tensors (legacy format)
|
|
160
|
+
if isinstance(past_key_values, tuple):
|
|
161
|
+
cropped_past_key_values = []
|
|
162
|
+
for layer_past_key_values in past_key_values:
|
|
163
|
+
if isinstance(layer_past_key_values, tuple) and len(layer_past_key_values) == 2:
|
|
164
|
+
# Standard format: (key, value)
|
|
165
|
+
key, value = layer_past_key_values
|
|
166
|
+
if key.shape[-2] > max_length:
|
|
167
|
+
key = key[..., :max_length, :]
|
|
168
|
+
if value.shape[-2] > max_length:
|
|
169
|
+
value = value[..., :max_length, :]
|
|
170
|
+
cropped_past_key_values.append((key, value))
|
|
171
|
+
else:
|
|
172
|
+
# Other formats, just append as is
|
|
173
|
+
cropped_past_key_values.append(layer_past_key_values)
|
|
174
|
+
return tuple(cropped_past_key_values)
|
|
175
|
+
|
|
176
|
+
# For other cache types, return as is
|
|
177
|
+
return past_key_values
|
|
178
|
+
|
|
114
179
|
if is_accelerate_available():
|
|
115
180
|
from accelerate.hooks import AlignDevicesHook, add_hook_to_module
|
|
116
181
|
|
|
@@ -1002,7 +1067,8 @@ class GenerationMixin:
|
|
|
1002
1067
|
device=device,
|
|
1003
1068
|
)
|
|
1004
1069
|
)
|
|
1005
|
-
|
|
1070
|
+
# Compatibility with transformers 4.57.1+: forced_decoder_ids has been removed
|
|
1071
|
+
if hasattr(generation_config, 'forced_decoder_ids') and generation_config.forced_decoder_ids is not None:
|
|
1006
1072
|
# TODO (sanchit): move this exception to GenerationConfig.validate() when TF & FLAX are aligned with PT
|
|
1007
1073
|
raise ValueError(
|
|
1008
1074
|
"You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument "
|
|
@@ -32,7 +32,57 @@ import transformers
|
|
|
32
32
|
|
|
33
33
|
from indextts.gpt.transformers_generation_utils import GenerationMixin
|
|
34
34
|
from indextts.gpt.transformers_modeling_utils import PreTrainedModel
|
|
35
|
-
|
|
35
|
+
# SequenceSummary has been removed in transformers 4.57.1+
|
|
36
|
+
# Adding compatibility implementation
|
|
37
|
+
class SequenceSummary(nn.Module):
|
|
38
|
+
"""
|
|
39
|
+
Compute a single vector summary of a sequence hidden states.
|
|
40
|
+
"""
|
|
41
|
+
def __init__(self, config):
|
|
42
|
+
super().__init__()
|
|
43
|
+
self.summary_type = getattr(config, 'summary_type', 'last')
|
|
44
|
+
self.summary_use_proj = getattr(config, 'summary_use_proj', True)
|
|
45
|
+
self.summary_activation = getattr(config, 'summary_activation', None)
|
|
46
|
+
self.summary_proj_to_labels = getattr(config, 'summary_proj_to_labels', True)
|
|
47
|
+
self.summary_first_dropout = getattr(config, 'summary_first_dropout', 0.1)
|
|
48
|
+
|
|
49
|
+
if self.summary_use_proj:
|
|
50
|
+
if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
|
|
51
|
+
num_classes = config.num_labels
|
|
52
|
+
else:
|
|
53
|
+
num_classes = config.hidden_size
|
|
54
|
+
self.summary = nn.Linear(config.hidden_size, num_classes)
|
|
55
|
+
|
|
56
|
+
if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
|
|
57
|
+
self.activation = nn.Tanh()
|
|
58
|
+
else:
|
|
59
|
+
self.activation = lambda x: x
|
|
60
|
+
|
|
61
|
+
if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
|
|
62
|
+
self.dropout = nn.Dropout(config.summary_first_dropout)
|
|
63
|
+
else:
|
|
64
|
+
self.dropout = lambda x: x
|
|
65
|
+
|
|
66
|
+
def forward(self, hidden_states, cls_token_index=None):
|
|
67
|
+
if self.summary_type == 'last':
|
|
68
|
+
output = hidden_states[:, -1]
|
|
69
|
+
elif self.summary_type == 'first':
|
|
70
|
+
output = hidden_states[:, 0]
|
|
71
|
+
elif self.summary_type == 'mean':
|
|
72
|
+
output = hidden_states.mean(dim=1)
|
|
73
|
+
elif self.summary_type == 'cls_index':
|
|
74
|
+
if cls_token_index is None:
|
|
75
|
+
raise ValueError("cls_token_index must be specified when summary_type='cls_index'")
|
|
76
|
+
batch_size = hidden_states.size(0)
|
|
77
|
+
output = hidden_states[batch_size, cls_token_index]
|
|
78
|
+
else:
|
|
79
|
+
output = hidden_states[:, -1] # fallback to last
|
|
80
|
+
|
|
81
|
+
output = self.dropout(output)
|
|
82
|
+
if self.summary_use_proj:
|
|
83
|
+
output = self.summary(output)
|
|
84
|
+
output = self.activation(output)
|
|
85
|
+
return output
|
|
36
86
|
|
|
37
87
|
from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask_for_sdpa, _prepare_4d_causal_attention_mask_for_sdpa
|
|
38
88
|
from transformers.modeling_outputs import (
|