PyPI - tirex-mirror - Versions diffs - 2025.10.16__tar.gz → 2025.10.18__tar.gz - Mend

tirex-mirror 2025.10.16tar.gz → 2025.10.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{tirex_mirror-2025.10.16/src/tirex_mirror.egg-info → tirex_mirror-2025.10.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tirex-mirror
-Version: 2025.10.16
+Version: 2025.10.18
 Summary: Unofficial mirror of NX-AI/tirex for packaging
 Author-email: Arpad Rozsas <rozsasarpi@gmail.com>
 License: NXAI COMMUNITY LICENSE AGREEMENT

{tirex_mirror-2025.10.16 → tirex_mirror-2025.10.18}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tirex-mirror"
-version = "2025.10.16"
+version = "2025.10.18"
 description = "Unofficial mirror of NX-AI/tirex for packaging"
 readme = "README.md"
 requires-python = ">=3.11"

{tirex_mirror-2025.10.16 → tirex_mirror-2025.10.18}/src/tirex/models/slstm/cell.py RENAMED Viewed

@@ -100,7 +100,7 @@ class sLSTMCell(nn.Module):
     def _get_input(self, x: torch.Tensor) -> torch.Tensor:
         assert x.shape[-1] == self.config.embedding_dim * self.config.num_gates, (
-            f"Input size mismatch: Expected input size {self.config.embedding_dim * self.config.num_gates}, but got {input.size(-1)}."
+            f"Input size mismatch: Expected input size {self.config.embedding_dim * self.config.num_gates}, but got {x.size(-1)}."
         )
         return x.view(x.shape[0], x.shape[1], self.config.num_gates, self.config.num_heads, -1).permute(1, 0, 2, 3, 4)
@@ -128,7 +128,7 @@ class sLSTMCellTorch:
         states: torch.Tensor,  # [4, B, H] only the first is used for recurrence!
         R: torch.Tensor,  # [K, R*H, H] - K num_heads
         b: torch.Tensor,  # [T*H]
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         num_gates = 4
         num_heads = R.shape[0]
         S, B, _ = x.shape
@@ -167,7 +167,7 @@ class sLSTMCellTorch:
         iraw, fraw, zraw, oraw = torch.unbind(raw.view(raw.shape[0], 4, -1), dim=1)
         # Equations reference the xlstm paper on page 4: https://arxiv.org/pdf/2405.04517
-        logfplusm = m + F.logsigmoid(fraw)  # eq 15
+        logfplusm = m + F.logsigmoid(torch.clamp(fraw, max=15))  # eq 15 # Clamp to avoid subnomals
         mnew = torch.where(torch.all(n == 0.0), iraw, torch.max(iraw, logfplusm))  # eq 15
         ogate = torch.sigmoid(oraw)  # eq 14
         igate = torch.minimum(torch.exp(iraw - mnew), torch.ones_like(iraw))  # eq 16

{tirex_mirror-2025.10.16 → tirex_mirror-2025.10.18}/src/tirex/models/slstm/layer.py RENAMED Viewed

@@ -20,7 +20,7 @@ class sLSTMLayer(nn.Module):
         self.ogate = LinearHeadwiseExpand(in_features, num_heads)
         self.slstm_cell = sLSTMCell(self.config, backend)
-        self.group_norm = MultiHeadLayerNorm(ndim=in_features)
+        self.group_norm = MultiHeadLayerNorm(ndim=in_features, num_heads=num_heads)
     def forward(self, x: torch.Tensor, slstm_state: torch.Tensor | None = None) -> torch.Tensor:
         x_g = torch.cat((self.fgate(x), self.igate(x), self.zgate(x), self.ogate(x)), dim=-1)
@@ -50,18 +50,20 @@ class LinearHeadwiseExpand(nn.Module):
 class MultiHeadLayerNorm(nn.Module):
-    def __init__(self, ndim: int):
+    def __init__(self, ndim: int, num_heads: int):
         super().__init__()
         self.weight = nn.Parameter(torch.zeros(ndim))
+        self.num_heads = num_heads
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         assert input.dim() == 4, "Input must be 4D tensor (B, NH, S, DH)"
         B, NH, S, DH = input.shape
+        assert NH == self.num_heads
         gn_in_1 = input.transpose(1, 2)  # (B, S, NH, DH)
         gn_in_2 = gn_in_1.reshape(B * S, NH * DH)  # (B * S, NH * DH)
         residual_weight = 1.0 + self.weight
-        out = F.group_norm(gn_in_2, num_groups=NH, weight=residual_weight)
+        out = F.group_norm(gn_in_2, num_groups=self.num_heads, weight=residual_weight)
         # (B * S), (NH * DH) -> (B, S, NH, DH) -> (B, NH, S, DH)
         out = out.view(B, S, NH, DH).transpose(1, 2)
         return out

{tirex_mirror-2025.10.16 → tirex_mirror-2025.10.18}/src/tirex/models/tirex.py RENAMED Viewed

@@ -79,12 +79,18 @@ class TiRexZero(nn.Module, PretrainedModel, ForecastModel):
         training_quantile_levels = self.config.quantiles
         if set(quantile_levels).issubset(set(training_quantile_levels)):
-            quantiles = predictions[..., [training_quantile_levels.index(q) for q in quantile_levels]]
+            quantile_indices = torch.tensor(
+                [training_quantile_levels.index(q) for q in quantile_levels],
+                dtype=torch.long,
+                device=predictions.device,
+            )
+            quantiles = torch.index_select(predictions, dim=-1, index=quantile_indices)
         else:
             quantiles = self._interpolate_quantiles(predictions, quantile_levels)
         # median as mean
-        mean = predictions[:, :, training_quantile_levels.index(0.5)]
+        median_idx = torch.tensor([training_quantile_levels.index(0.5)], dtype=torch.long, device=predictions.device)
+        mean = torch.index_select(predictions, dim=-1, index=median_idx).squeeze(-1)
         return quantiles, mean
     @torch.inference_mode()
@@ -105,24 +111,8 @@ class TiRexZero(nn.Module, PretrainedModel, ForecastModel):
         context = context.to(dtype=torch.float32)
         while remaining > 0:
-            if context.shape[-1] > max_context:
-                context = context[..., -max_context:]
-            if context.shape[-1] < min_context:
-                pad = torch.full(
-                    (context.shape[0], min_context - context.shape[-1]),
-                    fill_value=torch.nan,
-                    device=context.device,
-                    dtype=context.dtype,
-                )
-                context = torch.concat((pad, context), dim=1)
-            tokenized_tensor, tokenizer_state = self.tokenizer.context_input_transform(context)
             fut_rollouts = min(remaining, max_accelerated_rollout_steps)
-            with torch.no_grad():
-                prediction, _ = self._forward_model_tokenized(input_token=tokenized_tensor, rollouts=fut_rollouts)
-                prediction = prediction[:, :, -fut_rollouts:, :].to(tokenized_tensor)  # predicted token
-                # [bs, num_quantiles, num_predicted_token, output_patch_size]
-            prediction = self.tokenizer.output_transform(prediction, tokenizer_state)
-            prediction = prediction.flatten(start_dim=2)
+            prediction, fut_rollouts = self._forecast_single_step(context, max_context, min_context, fut_rollouts)
             predictions.append(prediction)
             remaining -= fut_rollouts
@@ -134,6 +124,33 @@ class TiRexZero(nn.Module, PretrainedModel, ForecastModel):
         return torch.cat(predictions, dim=-1)[..., :prediction_length].to(dtype=torch.float32)
+    def _forecast_single_step(
+        self,
+        context: torch.Tensor,
+        max_context: int,
+        min_context: int,
+        new_patch_count: int = 1,
+    ) -> tuple[torch.Tensor, int]:
+        if context.shape[-1] > max_context:
+            context = context[..., -max_context:]
+        if context.shape[-1] < min_context:
+            pad = torch.full(
+                (context.shape[0], min_context - context.shape[-1]),
+                fill_value=torch.nan,
+                device=context.device,
+                dtype=context.dtype,
+            )
+            context = torch.concat((pad, context), dim=1)
+        tokenized_tensor, tokenizer_state = self.tokenizer.context_input_transform(context)
+        prediction, _ = self._forward_model_tokenized(input_token=tokenized_tensor, rollouts=new_patch_count)
+        prediction = prediction[:, :, -new_patch_count:, :].to(tokenized_tensor)  # predicted token
+        # Shape: [bs, num_quantiles, num_predicted_token, output_patch_size]
+        prediction = self.tokenizer.output_transform(prediction, tokenizer_state)
+        prediction = prediction.flatten(start_dim=2)
+        return prediction, new_patch_count
     def _forward_model_tokenized(
         self,
         input_token: torch.Tensor,
@@ -165,21 +182,7 @@ class TiRexZero(nn.Module, PretrainedModel, ForecastModel):
         input_token = torch.nan_to_num(input_token, nan=self.config.nan_mask_value)
-        hidden_states = self.input_patch_embedding(torch.cat((input_token, input_mask), dim=2))
-        for block in self.blocks:
-            hidden_states = block(hidden_states)
-        hidden_states = self.out_norm(hidden_states)
-        quantile_preds = self.output_patch_embedding(hidden_states)
-        quantile_preds = torch.unflatten(
-            quantile_preds, -1, (len(self.config.quantiles), self.config.output_patch_size)
-        )
-        quantile_preds = torch.transpose(quantile_preds, 1, 2)  # switch quantile and num_token_dimension
-        # quantile_preds: [batch_size, num_quantiles, num_token, output_patch_size]
-        quantile_preds = self._forward_model(torch.cat((input_token, input_mask), dim=2))
+        quantile_preds, hidden_states = self._forward_model(torch.cat((input_token, input_mask), dim=2))
         quantile_preds = torch.unflatten(
             quantile_preds, -1, (len(self.config.quantiles), self.config.output_patch_size)
@@ -196,7 +199,7 @@ class TiRexZero(nn.Module, PretrainedModel, ForecastModel):
         hidden_states = self.out_norm(hidden_states)
-        return self.output_patch_embedding(hidden_states)
+        return self.output_patch_embedding(hidden_states), hidden_states
     def _interpolate_quantiles(self, predictions: torch.Tensor, quantile_levels: list[float]):
         training_quantile_levels = self.config.quantiles

{tirex_mirror-2025.10.16 → tirex_mirror-2025.10.18/src/tirex_mirror.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tirex-mirror
-Version: 2025.10.16
+Version: 2025.10.18
 Summary: Unofficial mirror of NX-AI/tirex for packaging
 Author-email: Arpad Rozsas <rozsasarpi@gmail.com>
 License: NXAI COMMUNITY LICENSE AGREEMENT