PyPI - tirex-mirror - Versions diffs - 2025.10.2__tar.gz → 2025.10.3__tar.gz - Mend

tirex-mirror 2025.10.2tar.gz → 2025.10.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{tirex_mirror-2025.10.2/src/tirex_mirror.egg-info → tirex_mirror-2025.10.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tirex-mirror
-Version: 2025.10.2
+Version: 2025.10.3
 Summary: Unofficial mirror of NX-AI/tirex for packaging
 Author-email: Arpad Rozsas <rozsasarpi@gmail.com>
 License: NXAI COMMUNITY LICENSE AGREEMENT

{tirex_mirror-2025.10.2 → tirex_mirror-2025.10.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tirex-mirror"
-version = "2025.10.02"
+version = "2025.10.03"
 description = "Unofficial mirror of NX-AI/tirex for packaging"
 readme = "README.md"
 requires-python = ">=3.11"

{tirex_mirror-2025.10.2 → tirex_mirror-2025.10.3}/src/tirex/base.py RENAMED Viewed

@@ -1,6 +1,7 @@
 # Copyright (c) NXAI GmbH.
 # This software may be used and distributed according to the terms of the NXAI Community License Agreement.
+import logging
 import os
 from abc import ABC, abstractmethod
 from typing import Literal, TypeVar
@@ -8,6 +9,8 @@ from typing import Literal, TypeVar
 import torch
 from huggingface_hub import hf_hub_download
+from tirex.models.slstm.cell import sLSTMCellTorch
 T = TypeVar("T", bound="PretrainedModel")
@@ -38,7 +41,7 @@ class PretrainedModel(ABC):
     @classmethod
     def from_pretrained(
-        cls: type[T], path: str, backend: str, device: str | None = None, hf_kwargs=None, ckp_kwargs=None
+        cls: type[T], path: str, backend: str, device: str | None = None, compile=False, hf_kwargs=None, ckp_kwargs=None
     ) -> T:
         if hf_kwargs is None:
             hf_kwargs = {}
@@ -58,9 +61,10 @@ class PretrainedModel(ABC):
         model: T = cls(backend=backend, **checkpoint["hyper_parameters"])
         model.on_load_checkpoint(checkpoint)
         model.load_state_dict(checkpoint["state_dict"])
+        model = model.to(device)
-        if backend == "cuda":
-            model = model.to(device)
+        if compile and backend == "torch":
+            sLSTMCellTorch.slstm_forward = torch.compile(sLSTMCellTorch.slstm_forward, mode="max-autotune")
         return model
     @classmethod
@@ -76,6 +80,7 @@ def load_model(
     path: str,
     device: str | None = None,
     backend: Literal["torch", "cuda"] | None = None,
+    compile: bool = False,
     hf_kwargs=None,
     ckp_kwargs=None,
 ) -> PretrainedModel:
@@ -85,6 +90,7 @@ def load_model(
         path (str): Hugging Face path to the model (e.g. NX-AI/TiRex)
         device (str, optional): The device on which to load the model (e.g., "cuda:0", "cpu").
         backend (torch | cuda): What backend to use, torch or the custom CUDA kernels. Defaults to cuda when xlstm is installed, else torch.
+        compile (bool, optional): toch.compile the sLSTM cells, only works with the torch backend
         hf_kwargs (dict, optional): Keyword arguments to pass to the Hugging Face Hub download method.
         ckp_kwargs (dict, optional): Keyword arguments to pass when loading the checkpoint.
@@ -106,4 +112,6 @@ def load_model(
     if model_cls is None:
         raise ValueError(f"Invalid model id {model_id}")
-    return model_cls.from_pretrained(path, device=device, backend=backend, hf_kwargs=hf_kwargs, ckp_kwargs=ckp_kwargs)
+    return model_cls.from_pretrained(
+        path, device=device, backend=backend, compile=compile, hf_kwargs=hf_kwargs, ckp_kwargs=ckp_kwargs
+    )

{tirex_mirror-2025.10.2 → tirex_mirror-2025.10.3}/src/tirex/models/slstm/cell.py RENAMED Viewed

@@ -43,13 +43,11 @@ class sLSTMCell(nn.Module):
         state = self._get_state(input, state)
         if self.backend == "torch":
-            all_states = self._impl_torch(input, state)
+            output, state = self._impl_torch(input, state)
         elif self.backend == "cuda":
-            all_states = self._impl_cuda(input, state)
+            output, state = self._impl_cuda(input, state)
-        state = all_states[:, -1]
-        output = self._permute_output(all_states[0][1:])
-        return output.to(input.dtype), state.to(input.dtype)
+        return self._permute_output(output).to(input.dtype), state.to(input.dtype)
     def _impl_torch(self, input: torch.Tensor, state: torch.Tensor) -> torch.Tensor:
         input = input.to(dtype=torch.bfloat16)
@@ -64,7 +62,7 @@ class sLSTMCell(nn.Module):
             .reshape(-1)
         )
-        return slstm_forward(input, state, recurrent_kernel, bias)[0]
+        return sLSTMCellTorch.slstm_forward(input, state, recurrent_kernel, bias)
     def _impl_cuda(self, input: torch.Tensor, state: torch.Tensor) -> torch.Tensor:
         if input.device.type != "cuda":
@@ -88,7 +86,7 @@ class sLSTMCell(nn.Module):
         input = input.permute(0, 1, 3, 2, 4).reshape(input.shape[0], input.shape[1], -1)
-        return self.func.apply(
+        all_states = self.func.apply(
             False,
             input.contiguous(),
             state.contiguous(),
@@ -96,6 +94,10 @@ class sLSTMCell(nn.Module):
             self._bias_.contiguous(),
         )
+        state = all_states[:, -1]
+        output = all_states[0][1:]
+        return output, state
     def _get_input(self, x: torch.Tensor) -> torch.Tensor:
         assert x.shape[-1] == self.config.embedding_dim * self.config.num_gates, (
             f"Input size mismatch: Expected input size {self.config.embedding_dim * self.config.num_gates}, but got {input.size(-1)}."
@@ -119,73 +121,60 @@ class sLSTMCell(nn.Module):
         return output.permute(1, 2, 0, 3)
-def slstm_forward(
-    x: torch.Tensor,  # [S, B, G*I]
-    states: torch.Tensor,  # [4, B, H] only the first is used for recurrence!
-    R: torch.Tensor,  # [K, R*H, H] - K num_heads
-    b: torch.Tensor,  # [T*H]
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-    num_states = states.shape[0]
-    sequence_dim = x.shape[0]
-    # this only works for a fully-connected RNN, for a hin change this
-    num_gates_r = R.shape[2] // R.shape[1]
-    hidden_dim = R.shape[1] * R.shape[0]
-    batch_dim = x.shape[1]
-    num_heads = R.shape[0]
-    assert batch_dim == states.shape[1]
-    assert hidden_dim == states.shape[2]
-    states_all = torch.zeros(
-        [num_states, sequence_dim + 1, batch_dim, hidden_dim],
-        device=x.device,
-        dtype=x.dtype,
-    )
-    states_all[:, 0] = states
-    for i, Wx_t in enumerate(x.unbind(dim=0)):
-        Ry = (
-            states[0]
-            .reshape(batch_dim, num_heads, 1, -1)
-            .matmul(R.unsqueeze(0))
-            .reshape(batch_dim, num_heads, num_gates_r, -1)
-            .transpose(1, 2)
-            .reshape(batch_dim, -1)
-        )
-        sdtype = states.dtype
-        Wx_t, Ry, b, states = Wx_t.float(), Ry.float(), b.float(), states.float()
-        states, gates = slstm_forward_pointwise(Wx_t, Ry, b, states)
-        states = states.to(dtype=sdtype)
-        states_all[:, i + 1] = states
-    # shapes ([S, B, H], ([B,H], [B,H], [B,H])
-    return states_all, states
-def slstm_forward_pointwise(
-    Wx: torch.Tensor,  # dim [B, 4*H]
-    Ry: torch.Tensor,  # dim [B, 4*H]
-    b: torch.Tensor,  # dim [1, 4*H]
-    states: torch.Tensor,  # dim [4, B, H]
-) -> tuple[torch.Tensor, torch.Tensor]:
-    raw = Wx + Ry + b
-    iraw, fraw, zraw, oraw = torch.unbind(raw.view(raw.shape[0], 4, -1), dim=1)
-    y, c, n, m = torch.unbind(states.view(4, states.shape[1], -1), dim=0)
-    # with torch.no_grad():  # THE difference to maxg aka max_gradient (here max / max_static)
-    # Equations reference the xlstm paper on page 4: https://arxiv.org/pdf/2405.04517
-    logfplusm = m + F.logsigmoid(fraw)  # eq 15
-    if torch.all(n == 0.0):
-        mnew = iraw
-    else:
-        mnew = torch.max(iraw, logfplusm)  # eq 15
-    ogate = torch.sigmoid(oraw)  # eq 14
-    igate = torch.minimum(torch.exp(iraw - mnew), torch.ones_like(iraw))  # eq 16
-    fgate = torch.minimum(torch.exp(logfplusm - mnew), torch.ones_like(iraw))  # eq 17
-    zgate = torch.tanh(zraw)  # eq 11
-    cnew = fgate * c + igate * zgate  # eq 8
-    nnew = fgate * n + igate  # eq 9
-    hnew = ogate * cnew / nnew  # eq 10
-    # y (4, B, H), state (4, B, H)
-    return torch.stack((hnew, cnew, nnew, mnew), dim=0), torch.stack((igate, fgate, zraw, ogate), dim=0)
+class sLSTMCellTorch:
+    @staticmethod
+    def slstm_forward(
+        x: torch.Tensor,  # [S, B, G*I]
+        states: torch.Tensor,  # [4, B, H] only the first is used for recurrence!
+        R: torch.Tensor,  # [K, R*H, H] - K num_heads
+        b: torch.Tensor,  # [T*H]
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        num_gates = 4
+        num_heads = R.shape[0]
+        S, B, _ = x.shape
+        H = R.shape[1] * num_heads
+        assert states.shape == (num_gates, B, H)
+        states = states.to(R.dtype).unbind(dim=0)
+        output = []
+        for i in range(S):
+            Ry = (
+                states[0]
+                .reshape(B, num_heads, 1, -1)
+                .matmul(R.unsqueeze(0))
+                .reshape(B, num_heads, num_gates, -1)
+                .transpose(1, 2)
+                .reshape(B, -1)
+            )
+            states = sLSTMCellTorch.slstm_forward_pointwise(
+                x[i].float(), Ry.float(), b.float(), [s.float() for s in states]
+            )
+            states = [s.to(dtype=R.dtype) for s in states]
+            output.append(states[0])
+        return torch.stack(output), torch.stack(states)  # (S, B, H), 4 x (B, H)
+    @staticmethod
+    def slstm_forward_pointwise(
+        Wx: torch.Tensor,  # dim [B, 4*H]
+        Ry: torch.Tensor,  # dim [B, 4*H]
+        b: torch.Tensor,  # dim [1, 4*H]
+        states: torch.Tensor,  # dim 4 x [B, H]
+    ) -> list[torch.Tensor]:
+        y, c, n, m = states
+        raw = Wx + Ry + b
+        iraw, fraw, zraw, oraw = torch.unbind(raw.view(raw.shape[0], 4, -1), dim=1)
+        # Equations reference the xlstm paper on page 4: https://arxiv.org/pdf/2405.04517
+        logfplusm = m + F.logsigmoid(fraw)  # eq 15
+        mnew = torch.where(torch.all(n == 0.0), iraw, torch.max(iraw, logfplusm))  # eq 15
+        ogate = torch.sigmoid(oraw)  # eq 14
+        igate = torch.minimum(torch.exp(iraw - mnew), torch.ones_like(iraw))  # eq 16
+        fgate = torch.minimum(torch.exp(logfplusm - mnew), torch.ones_like(iraw))  # eq 17
+        zgate = torch.tanh(zraw)  # eq 11
+        cnew = fgate * c + igate * zgate  # eq 8
+        nnew = fgate * n + igate  # eq 9
+        hnew = ogate * cnew / nnew  # eq 10
+        return [hnew, cnew, nnew, mnew]  # 4 x (B, H)

{tirex_mirror-2025.10.2 → tirex_mirror-2025.10.3}/src/tirex/models/tirex.py RENAMED Viewed

@@ -179,8 +179,25 @@ class TiRexZero(nn.Module, PretrainedModel, ForecastModel):
         quantile_preds = torch.transpose(quantile_preds, 1, 2)  # switch quantile and num_token_dimension
         # quantile_preds: [batch_size, num_quantiles, num_token, output_patch_size]
+        quantile_preds = self._forward_model(torch.cat((input_token, input_mask), dim=2))
+        quantile_preds = torch.unflatten(
+            quantile_preds, -1, (len(self.config.quantiles), self.config.output_patch_size)
+        )
+        quantile_preds = torch.transpose(quantile_preds, 1, 2)  # switch quantile and num_token_dimension
+        # quantile_preds: [batch_size, num_quantiles, num_token, output_patch_size]
         return quantile_preds, hidden_states
+    def _forward_model(self, input: torch.Tensor):
+        hidden_states = self.input_patch_embedding(input)
+        for block in self.blocks:
+            hidden_states = block(hidden_states)
+        hidden_states = self.out_norm(hidden_states)
+        return self.output_patch_embedding(hidden_states)
     def _interpolate_quantiles(self, predictions: torch.Tensor, quantile_levels: list[float]):
         training_quantile_levels = self.config.quantiles
         if min(quantile_levels) < min(training_quantile_levels) or max(quantile_levels) > max(training_quantile_levels):

{tirex_mirror-2025.10.2 → tirex_mirror-2025.10.3/src/tirex_mirror.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tirex-mirror
-Version: 2025.10.2
+Version: 2025.10.3
 Summary: Unofficial mirror of NX-AI/tirex for packaging
 Author-email: Arpad Rozsas <rozsasarpi@gmail.com>
 License: NXAI COMMUNITY LICENSE AGREEMENT