xinference 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show
  1. xinference/__init__.py +8 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/oauth2/utils.py +26 -5
  4. xinference/core/model.py +1 -10
  5. xinference/device_utils.py +11 -1
  6. xinference/model/embedding/model_spec.json +70 -0
  7. xinference/model/image/core.py +20 -10
  8. xinference/model/image/model_spec.json +55 -3
  9. xinference/model/image/ocr/__init__.py +5 -0
  10. xinference/model/image/ocr/deepseek_ocr.py +958 -0
  11. xinference/model/llm/core.py +2 -0
  12. xinference/model/llm/llama_cpp/core.py +2 -0
  13. xinference/model/llm/llm_family.json +319 -6
  14. xinference/model/llm/lmdeploy/core.py +2 -0
  15. xinference/model/llm/sglang/core.py +2 -0
  16. xinference/model/llm/transformers/core.py +22 -36
  17. xinference/model/llm/transformers/multimodal/qwen-omni.py +60 -11
  18. xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
  19. xinference/model/llm/transformers/utils.py +0 -20
  20. xinference/model/llm/vllm/core.py +2 -0
  21. xinference/model/rerank/model_spec.json +368 -252
  22. xinference/model/rerank/sentence_transformers/core.py +10 -2
  23. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +71 -5
  24. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +51 -1
  25. xinference/ui/gradio/media_interface.py +469 -4
  26. xinference/ui/gradio/utils/__init__.py +19 -0
  27. xinference/ui/gradio/utils/latex.py +342 -0
  28. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  29. xinference/ui/web/ui/build/index.html +1 -1
  30. xinference/ui/web/ui/build/static/js/{main.45e78536.js → main.87d6859b.js} +3 -3
  31. xinference/ui/web/ui/build/static/js/main.87d6859b.js.map +1 -0
  32. xinference/ui/web/ui/node_modules/.cache/babel-loader/412a6b414a8267c7a349d9beda4593cdf218abf32edaaf339e6a230df40397b8.json +1 -0
  33. xinference/ui/web/ui/node_modules/.cache/babel-loader/e6770a05771952175c9fbf48fce283c9bb1bc8b5763e39edc36d099d1fe16b4a.json +1 -0
  34. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/METADATA +11 -11
  35. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/RECORD +40 -37
  36. xinference/ui/web/ui/build/static/js/main.45e78536.js.map +0 -1
  37. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +0 -1
  38. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +0 -1
  39. /xinference/ui/web/ui/build/static/js/{main.45e78536.js.LICENSE.txt → main.87d6859b.js.LICENSE.txt} +0 -0
  40. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/WHEEL +0 -0
  41. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/entry_points.txt +0 -0
  42. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/licenses/LICENSE +0 -0
  43. {xinference-1.11.0.dist-info → xinference-1.12.0.dist-info}/top_level.txt +0 -0
@@ -81,6 +81,7 @@ class SentenceTransformerRerankModel(RerankModel):
81
81
  if (
82
82
  self.model_family.type == "normal"
83
83
  and "qwen3" not in self.model_family.model_name.lower()
84
+ and "jina-reranker-v3" not in self.model_family.model_name.lower()
84
85
  ):
85
86
  try:
86
87
  import sentence_transformers
@@ -109,7 +110,10 @@ class SentenceTransformerRerankModel(RerankModel):
109
110
  )
110
111
  if self._use_fp16:
111
112
  self._model.model.half()
112
- elif "qwen3" in self.model_family.model_name.lower():
113
+ elif (
114
+ "qwen3" in self.model_family.model_name.lower()
115
+ or "jina-reranker-v3" in self.model_family.model_name.lower()
116
+ ):
113
117
  # qwen3-reranker
114
118
  # now we use transformers
115
119
  # TODO: support engines for rerank models
@@ -225,6 +229,7 @@ class SentenceTransformerRerankModel(RerankModel):
225
229
  if (
226
230
  self.model_family.type == "normal"
227
231
  and "qwen3" not in self.model_family.model_name.lower()
232
+ and "jina-reranker-v3" not in self.model_family.model_name.lower()
228
233
  ):
229
234
  logger.debug("Passing processed sentences: %s", sentence_combinations)
230
235
  similarity_scores = self._model.predict(
@@ -235,7 +240,10 @@ class SentenceTransformerRerankModel(RerankModel):
235
240
  ).cpu()
236
241
  if similarity_scores.dtype == torch.bfloat16:
237
242
  similarity_scores = similarity_scores.float()
238
- elif "qwen3" in self.model_family.model_name.lower():
243
+ elif (
244
+ "qwen3" in self.model_family.model_name.lower()
245
+ or "jina-reranker-v3" in self.model_family.model_name.lower()
246
+ ):
239
247
 
240
248
  def format_instruction(instruction, query, doc):
241
249
  if instruction is None:
@@ -30,8 +30,12 @@ from transformers.cache_utils import (
30
30
  DynamicCache,
31
31
  EncoderDecoderCache,
32
32
  OffloadedCache,
33
- QuantizedCacheConfig,
33
+ QuantizedCache,
34
34
  StaticCache,
35
+ SlidingWindowCache,
36
+ SinkCache,
37
+ HybridCache,
38
+ HybridChunkedCache,
35
39
  )
36
40
  from transformers.configuration_utils import PretrainedConfig
37
41
  from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
@@ -55,13 +59,10 @@ from transformers.generation.candidate_generator import (
55
59
  AssistedCandidateGeneratorDifferentTokenizers,
56
60
  CandidateGenerator,
57
61
  PromptLookupCandidateGenerator,
58
- _crop_past_key_values,
59
62
  _prepare_attention_mask,
60
63
  _prepare_token_type_ids,
61
64
  )
62
65
  from transformers.generation.configuration_utils import (
63
- NEED_SETUP_CACHE_CLASSES_MAPPING,
64
- QUANT_BACKEND_CLASSES_MAPPING,
65
66
  GenerationConfig,
66
67
  GenerationMode,
67
68
  )
@@ -111,6 +112,70 @@ if TYPE_CHECKING:
111
112
 
112
113
  logger = logging.get_logger(__name__)
113
114
 
115
+ # Compatibility with transformers 4.57.1+
116
+ # These mappings are needed for the removed constants
117
+ NEED_SETUP_CACHE_CLASSES_MAPPING = {
118
+ "auto": Cache,
119
+ "dynamic": DynamicCache,
120
+ "static": StaticCache,
121
+ "offloaded": OffloadedCache,
122
+ "sliding_window": SlidingWindowCache,
123
+ "sink": SinkCache,
124
+ "hybrid": HybridCache,
125
+ "hybrid_chunked": HybridChunkedCache,
126
+ }
127
+
128
+ # Mapping for quantized cache backends
129
+ QUANT_BACKEND_CLASSES_MAPPING = {
130
+ "quanto": QuantizedCache,
131
+ "hqq": QuantizedCache,
132
+ }
133
+
134
+ # Compatibility class for removed QuantizedCacheConfig
135
+ class QuantizedCacheConfig:
136
+ def __init__(self, backend: str = "quanto", nbits: int = 4,
137
+ axis_key: int = 0, axis_value: int = 0,
138
+ q_group_size: int = 64, residual_length: int = 128):
139
+ self.backend = backend
140
+ self.nbits = nbits
141
+ self.axis_key = axis_key
142
+ self.axis_value = axis_value
143
+ self.q_group_size = q_group_size
144
+ self.residual_length = residual_length
145
+
146
+ # Compatibility function for removed _crop_past_key_values
147
+ def _crop_past_key_values(model, past_key_values, max_length):
148
+ """
149
+ Crop past key values to a maximum length.
150
+ This is a compatibility function for the removed _crop_past_key_values.
151
+ """
152
+ if past_key_values is None:
153
+ return past_key_values
154
+
155
+ # If past_key_values is a Cache object
156
+ if hasattr(past_key_values, 'crop'):
157
+ return past_key_values.crop(max_length)
158
+
159
+ # If it's a tuple of tensors (legacy format)
160
+ if isinstance(past_key_values, tuple):
161
+ cropped_past_key_values = []
162
+ for layer_past_key_values in past_key_values:
163
+ if isinstance(layer_past_key_values, tuple) and len(layer_past_key_values) == 2:
164
+ # Standard format: (key, value)
165
+ key, value = layer_past_key_values
166
+ if key.shape[-2] > max_length:
167
+ key = key[..., :max_length, :]
168
+ if value.shape[-2] > max_length:
169
+ value = value[..., :max_length, :]
170
+ cropped_past_key_values.append((key, value))
171
+ else:
172
+ # Other formats, just append as is
173
+ cropped_past_key_values.append(layer_past_key_values)
174
+ return tuple(cropped_past_key_values)
175
+
176
+ # For other cache types, return as is
177
+ return past_key_values
178
+
114
179
  if is_accelerate_available():
115
180
  from accelerate.hooks import AlignDevicesHook, add_hook_to_module
116
181
 
@@ -1002,7 +1067,8 @@ class GenerationMixin:
1002
1067
  device=device,
1003
1068
  )
1004
1069
  )
1005
- if generation_config.forced_decoder_ids is not None:
1070
+ # Compatibility with transformers 4.57.1+: forced_decoder_ids has been removed
1071
+ if hasattr(generation_config, 'forced_decoder_ids') and generation_config.forced_decoder_ids is not None:
1006
1072
  # TODO (sanchit): move this exception to GenerationConfig.validate() when TF & FLAX are aligned with PT
1007
1073
  raise ValueError(
1008
1074
  "You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument "
@@ -32,7 +32,57 @@ import transformers
32
32
 
33
33
  from indextts.gpt.transformers_generation_utils import GenerationMixin
34
34
  from indextts.gpt.transformers_modeling_utils import PreTrainedModel
35
- from transformers.modeling_utils import SequenceSummary
35
+ # SequenceSummary has been removed in transformers 4.57.1+
36
+ # Adding compatibility implementation
37
+ class SequenceSummary(nn.Module):
38
+ """
39
+ Compute a single vector summary of a sequence hidden states.
40
+ """
41
+ def __init__(self, config):
42
+ super().__init__()
43
+ self.summary_type = getattr(config, 'summary_type', 'last')
44
+ self.summary_use_proj = getattr(config, 'summary_use_proj', True)
45
+ self.summary_activation = getattr(config, 'summary_activation', None)
46
+ self.summary_proj_to_labels = getattr(config, 'summary_proj_to_labels', True)
47
+ self.summary_first_dropout = getattr(config, 'summary_first_dropout', 0.1)
48
+
49
+ if self.summary_use_proj:
50
+ if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
51
+ num_classes = config.num_labels
52
+ else:
53
+ num_classes = config.hidden_size
54
+ self.summary = nn.Linear(config.hidden_size, num_classes)
55
+
56
+ if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
57
+ self.activation = nn.Tanh()
58
+ else:
59
+ self.activation = lambda x: x
60
+
61
+ if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
62
+ self.dropout = nn.Dropout(config.summary_first_dropout)
63
+ else:
64
+ self.dropout = lambda x: x
65
+
66
+ def forward(self, hidden_states, cls_token_index=None):
67
+ if self.summary_type == 'last':
68
+ output = hidden_states[:, -1]
69
+ elif self.summary_type == 'first':
70
+ output = hidden_states[:, 0]
71
+ elif self.summary_type == 'mean':
72
+ output = hidden_states.mean(dim=1)
73
+ elif self.summary_type == 'cls_index':
74
+ if cls_token_index is None:
75
+ raise ValueError("cls_token_index must be specified when summary_type='cls_index'")
76
+ batch_size = hidden_states.size(0)
77
+ output = hidden_states[batch_size, cls_token_index]
78
+ else:
79
+ output = hidden_states[:, -1] # fallback to last
80
+
81
+ output = self.dropout(output)
82
+ if self.summary_use_proj:
83
+ output = self.summary(output)
84
+ output = self.activation(output)
85
+ return output
36
86
 
37
87
  from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask_for_sdpa, _prepare_4d_causal_attention_mask_for_sdpa
38
88
  from transformers.modeling_outputs import (