PyPI - xinference - Versions diffs - 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

xinference 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show

xinference/model/rerank/sentence_transformers/core.py CHANGED Viewed

@@ -81,6 +81,7 @@ class SentenceTransformerRerankModel(RerankModel):
         if (
             self.model_family.type == "normal"
             and "qwen3" not in self.model_family.model_name.lower()
+            and "jina-reranker-v3" not in self.model_family.model_name.lower()
         ):
             try:
                 import sentence_transformers
@@ -109,7 +110,10 @@ class SentenceTransformerRerankModel(RerankModel):
             )
             if self._use_fp16:
                 self._model.model.half()
-        elif "qwen3" in self.model_family.model_name.lower():
+        elif (
+            "qwen3" in self.model_family.model_name.lower()
+            or "jina-reranker-v3" in self.model_family.model_name.lower()
+        ):
             # qwen3-reranker
             # now we use transformers
             # TODO: support engines for rerank models
@@ -225,6 +229,7 @@ class SentenceTransformerRerankModel(RerankModel):
         if (
             self.model_family.type == "normal"
             and "qwen3" not in self.model_family.model_name.lower()
+            and "jina-reranker-v3" not in self.model_family.model_name.lower()
         ):
             logger.debug("Passing processed sentences: %s", sentence_combinations)
             similarity_scores = self._model.predict(
@@ -235,7 +240,10 @@ class SentenceTransformerRerankModel(RerankModel):
             ).cpu()
             if similarity_scores.dtype == torch.bfloat16:
                 similarity_scores = similarity_scores.float()
-        elif "qwen3" in self.model_family.model_name.lower():
+        elif (
+            "qwen3" in self.model_family.model_name.lower()
+            or "jina-reranker-v3" in self.model_family.model_name.lower()
+        ):
             def format_instruction(instruction, query, doc):
                 if instruction is None:

xinference/thirdparty/indextts/gpt/transformers_generation_utils.py CHANGED Viewed

@@ -30,8 +30,12 @@ from transformers.cache_utils import (
     DynamicCache,
     EncoderDecoderCache,
     OffloadedCache,
-    QuantizedCacheConfig,
+    QuantizedCache,
     StaticCache,
+    SlidingWindowCache,
+    SinkCache,
+    HybridCache,
+    HybridChunkedCache,
 )
 from transformers.configuration_utils import PretrainedConfig
 from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
@@ -55,13 +59,10 @@ from transformers.generation.candidate_generator import (
     AssistedCandidateGeneratorDifferentTokenizers,
     CandidateGenerator,
     PromptLookupCandidateGenerator,
-    _crop_past_key_values,
     _prepare_attention_mask,
     _prepare_token_type_ids,
 )
 from transformers.generation.configuration_utils import (
-    NEED_SETUP_CACHE_CLASSES_MAPPING,
-    QUANT_BACKEND_CLASSES_MAPPING,
     GenerationConfig,
     GenerationMode,
 )
@@ -111,6 +112,70 @@ if TYPE_CHECKING:
 logger = logging.get_logger(__name__)
+# Compatibility with transformers 4.57.1+
+# These mappings are needed for the removed constants
+NEED_SETUP_CACHE_CLASSES_MAPPING = {
+    "auto": Cache,
+    "dynamic": DynamicCache,
+    "static": StaticCache,
+    "offloaded": OffloadedCache,
+    "sliding_window": SlidingWindowCache,
+    "sink": SinkCache,
+    "hybrid": HybridCache,
+    "hybrid_chunked": HybridChunkedCache,
+}
+# Mapping for quantized cache backends
+QUANT_BACKEND_CLASSES_MAPPING = {
+    "quanto": QuantizedCache,
+    "hqq": QuantizedCache,
+}
+# Compatibility class for removed QuantizedCacheConfig
+class QuantizedCacheConfig:
+    def __init__(self, backend: str = "quanto", nbits: int = 4,
+                 axis_key: int = 0, axis_value: int = 0,
+                 q_group_size: int = 64, residual_length: int = 128):
+        self.backend = backend
+        self.nbits = nbits
+        self.axis_key = axis_key
+        self.axis_value = axis_value
+        self.q_group_size = q_group_size
+        self.residual_length = residual_length
+# Compatibility function for removed _crop_past_key_values
+def _crop_past_key_values(model, past_key_values, max_length):
+    """
+    Crop past key values to a maximum length.
+    This is a compatibility function for the removed _crop_past_key_values.
+    """
+    if past_key_values is None:
+        return past_key_values
+    # If past_key_values is a Cache object
+    if hasattr(past_key_values, 'crop'):
+        return past_key_values.crop(max_length)
+    # If it's a tuple of tensors (legacy format)
+    if isinstance(past_key_values, tuple):
+        cropped_past_key_values = []
+        for layer_past_key_values in past_key_values:
+            if isinstance(layer_past_key_values, tuple) and len(layer_past_key_values) == 2:
+                # Standard format: (key, value)
+                key, value = layer_past_key_values
+                if key.shape[-2] > max_length:
+                    key = key[..., :max_length, :]
+                if value.shape[-2] > max_length:
+                    value = value[..., :max_length, :]
+                cropped_past_key_values.append((key, value))
+            else:
+                # Other formats, just append as is
+                cropped_past_key_values.append(layer_past_key_values)
+        return tuple(cropped_past_key_values)
+    # For other cache types, return as is
+    return past_key_values
 if is_accelerate_available():
     from accelerate.hooks import AlignDevicesHook, add_hook_to_module
@@ -1002,7 +1067,8 @@ class GenerationMixin:
                     device=device,
                 )
             )
-        if generation_config.forced_decoder_ids is not None:
+        # Compatibility with transformers 4.57.1+: forced_decoder_ids has been removed
+        if hasattr(generation_config, 'forced_decoder_ids') and generation_config.forced_decoder_ids is not None:
             # TODO (sanchit): move this exception to GenerationConfig.validate() when TF & FLAX are aligned with PT
             raise ValueError(
                 "You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument "

xinference/thirdparty/indextts/gpt/transformers_gpt2.py CHANGED Viewed

@@ -32,7 +32,57 @@ import transformers
 from indextts.gpt.transformers_generation_utils import GenerationMixin
 from indextts.gpt.transformers_modeling_utils import PreTrainedModel
-from transformers.modeling_utils import SequenceSummary
+# SequenceSummary has been removed in transformers 4.57.1+
+# Adding compatibility implementation
+class SequenceSummary(nn.Module):
+    """
+    Compute a single vector summary of a sequence hidden states.
+    """
+    def __init__(self, config):
+        super().__init__()
+        self.summary_type = getattr(config, 'summary_type', 'last')
+        self.summary_use_proj = getattr(config, 'summary_use_proj', True)
+        self.summary_activation = getattr(config, 'summary_activation', None)
+        self.summary_proj_to_labels = getattr(config, 'summary_proj_to_labels', True)
+        self.summary_first_dropout = getattr(config, 'summary_first_dropout', 0.1)
+        if self.summary_use_proj:
+            if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
+                num_classes = config.num_labels
+            else:
+                num_classes = config.hidden_size
+            self.summary = nn.Linear(config.hidden_size, num_classes)
+        if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
+            self.activation = nn.Tanh()
+        else:
+            self.activation = lambda x: x
+        if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
+            self.dropout = nn.Dropout(config.summary_first_dropout)
+        else:
+            self.dropout = lambda x: x
+    def forward(self, hidden_states, cls_token_index=None):
+        if self.summary_type == 'last':
+            output = hidden_states[:, -1]
+        elif self.summary_type == 'first':
+            output = hidden_states[:, 0]
+        elif self.summary_type == 'mean':
+            output = hidden_states.mean(dim=1)
+        elif self.summary_type == 'cls_index':
+            if cls_token_index is None:
+                raise ValueError("cls_token_index must be specified when summary_type='cls_index'")
+            batch_size = hidden_states.size(0)
+            output = hidden_states[batch_size, cls_token_index]
+        else:
+            output = hidden_states[:, -1]  # fallback to last
+        output = self.dropout(output)
+        if self.summary_use_proj:
+            output = self.summary(output)
+        output = self.activation(output)
+        return output
 from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask_for_sdpa, _prepare_4d_causal_attention_mask_for_sdpa
 from transformers.modeling_outputs import (

xinference 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

Potentially problematic release.

xinference 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl