PyPI - torchrl-nightly - Versions diffs - 2025.7.19__cp312-cp312-macosx_10_13_universal2.whl → 2025.7.21__cp312-cp312-macosx_10_13_universal2.whl - Mend

torchrl-nightly 2025.7.19__cp312-cp312-macosx_10_13_universal2.whl → 2025.7.21__cp312-cp312-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

torchrl/_extension.py CHANGED Viewed

@@ -10,9 +10,10 @@ import warnings
 from packaging.version import parse
 try:
-    from .version import __version__
+    from .version import __version__, pytorch_version
 except ImportError:
     __version__ = None
+    pytorch_version = "unknown"
 def is_module_available(*modules: str) -> bool:
@@ -47,13 +48,16 @@ if _is_nightly(__version__):
         " - make sure ninja and cmake were installed\n"
         " - make sure you ran `python setup.py clean && python setup.py develop` and that no error was raised\n"
         " - make sure the version of PyTorch you are using matches the one that was present in your virtual env during "
-        "setup."
+        f"setup. This package was built with PyTorch {pytorch_version}."
     )
 else:
     EXTENSION_WARNING = (
         "Failed to import torchrl C++ binaries. Some modules (eg, prioritized replay buffers) may not work with your installation. "
-        "This is likely due to a discrepancy between your package version and the PyTorch version. Make sure both are compatible. "
-        "Usually, torchrl majors follow the pytorch majors within a few days around the release. "
-        "For instance, TorchRL 0.5 requires PyTorch 2.4.0, and TorchRL 0.6 requires PyTorch 2.5.0."
+        "This is likely due to a discrepancy between your package version and the PyTorch version. "
+        "TorchRL does not tightly pin PyTorch versions to give users freedom, but the trade-off is that C++ extensions like "
+        "prioritized replay buffers can only be used with the PyTorch version they were built against. "
+        f"This package was built with PyTorch {pytorch_version}. "
+        "Workarounds include: (1) upgrading/downgrading PyTorch or TorchRL to compatible versions, "
+        "or (2) making a local install using `pip install git+https://github.com/pytorch/rl.git@<version>`."
     )

torchrl/_torchrl.cpython-312-darwin.so CHANGED Viewed

Binary file

torchrl/objectives/sac.py CHANGED Viewed

@@ -687,6 +687,102 @@ class SACLoss(LossModule):
             )
         return self._alpha * log_prob - min_q_logprob, {"log_prob": log_prob.detach()}
+    @dispatch
+    def actor_loss(
+        self, tensordict: TensorDictBase
+    ) -> tuple[Tensor, dict[str, Tensor]]:
+        """Compute the actor loss for SAC.
+        This method computes the actor loss which encourages the policy to maximize
+        the expected Q-value while maintaining high entropy.
+        Args:
+            tensordict (TensorDictBase): A tensordict containing the data needed for
+                computing the actor loss. Should contain the observation and other
+                required keys for the actor network.
+        Returns:
+            A tuple containing:
+                - The actor loss tensor
+                - A dictionary with metadata including the log probability of actions
+        """
+        return self._actor_loss(tensordict)
+    @dispatch
+    def qvalue_loss(
+        self, tensordict: TensorDictBase
+    ) -> tuple[Tensor, dict[str, Tensor]]:
+        """Compute the Q-value loss for SAC.
+        This method computes the Q-value loss which trains the Q-networks to estimate
+        the expected return for state-action pairs.
+        Args:
+            tensordict (TensorDictBase): A tensordict containing the data needed for
+                computing the Q-value loss. Should contain the observation, action,
+                reward, done, and terminated keys.
+        Returns:
+            A tuple containing:
+                - The Q-value loss tensor
+                - A dictionary with metadata including the TD error
+        """
+        if self._version == 1:
+            return self._qvalue_v1_loss(tensordict)
+        else:
+            return self._qvalue_v2_loss(tensordict)
+    @dispatch
+    def value_loss(
+        self, tensordict: TensorDictBase
+    ) -> tuple[Tensor, dict[str, Tensor]]:
+        """Compute the value loss for SAC (version 1 only).
+        This method computes the value loss which trains the value network to estimate
+        the expected return for states. This is only used in SAC version 1.
+        Args:
+            tensordict (TensorDictBase): A tensordict containing the data needed for
+                computing the value loss. Should contain the observation and other
+                required keys for the value network.
+        Returns:
+            A tuple containing:
+                - The value loss tensor
+                - An empty dictionary (no metadata for value loss)
+        Raises:
+            RuntimeError: If called on SAC version 2 (which doesn't use a value network)
+        """
+        if self._version != 1:
+            raise RuntimeError(
+                "Value loss is only available in SAC version 1. "
+                "SAC version 2 doesn't use a separate value network."
+            )
+        return self._value_loss(tensordict)
+    def alpha_loss(self, log_prob: Tensor) -> Tensor:
+        """Compute the alpha loss for SAC.
+        This method computes the alpha loss which adapts the entropy coefficient
+        to maintain the target entropy level.
+        Args:
+            log_prob (Tensor): The log probability of actions from the actor network.
+        Returns:
+            The alpha loss tensor
+        """
+        return self._alpha_loss(log_prob)
+    @property
+    def _alpha(self):
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
+            self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
+        with torch.no_grad():
+            alpha = self.log_alpha.exp()
+        return alpha
     @property
     @_cache_values
     def _cached_target_params_actor_value(self):
@@ -882,14 +978,6 @@ class SACLoss(LossModule):
             alpha_loss = torch.zeros_like(log_prob)
         return alpha_loss
-    @property
-    def _alpha(self):
-        if self.min_log_alpha is not None or self.max_log_alpha is not None:
-            self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
-        with torch.no_grad():
-            alpha = self.log_alpha.exp()
-        return alpha
 class DiscreteSACLoss(LossModule):
     """Discrete SAC Loss module.
@@ -1352,6 +1440,48 @@ class DiscreteSACLoss(LossModule):
             target_value = self.value_estimator.value_estimate(tensordict).squeeze(-1)
             return target_value
+    @dispatch
+    def actor_loss(
+        self, tensordict: TensorDictBase
+    ) -> tuple[Tensor, dict[str, Tensor]]:
+        """Compute the actor loss for discrete SAC.
+        This method computes the actor loss which encourages the policy to maximize
+        the expected Q-value while maintaining high entropy for discrete actions.
+        Args:
+            tensordict (TensorDictBase): A tensordict containing the data needed for
+                computing the actor loss. Should contain the observation and other
+                required keys for the actor network.
+        Returns:
+            A tuple containing:
+                - The actor loss tensor
+                - A dictionary with metadata including the log probability of actions
+        """
+        return self._actor_loss(tensordict)
+    @dispatch
+    def qvalue_loss(
+        self, tensordict: TensorDictBase
+    ) -> tuple[Tensor, dict[str, Tensor]]:
+        """Compute the Q-value loss for discrete SAC.
+        This method computes the Q-value loss which trains the Q-networks to estimate
+        the expected return for state-action pairs in discrete action spaces.
+        Args:
+            tensordict (TensorDictBase): A tensordict containing the data needed for
+                computing the Q-value loss. Should contain the observation, action,
+                reward, done, and terminated keys.
+        Returns:
+            A tuple containing:
+                - The Q-value loss tensor
+                - A dictionary with metadata including the TD error
+        """
+        return self._value_loss(tensordict)
     def _value_loss(
         self, tensordict: TensorDictBase
     ) -> tuple[Tensor, dict[str, Tensor]]:
@@ -1427,6 +1557,20 @@ class DiscreteSACLoss(LossModule):
             alpha_loss = torch.zeros_like(log_prob)
         return alpha_loss
+    def alpha_loss(self, log_prob: Tensor) -> Tensor:
+        """Compute the alpha loss for discrete SAC.
+        This method computes the alpha loss which adapts the entropy coefficient
+        to maintain the target entropy level for discrete actions.
+        Args:
+            log_prob (Tensor): The log probability of actions from the actor network.
+        Returns:
+            The alpha loss tensor
+        """
+        return self._alpha_loss(log_prob)
     @property
     def _alpha(self):
         if self.min_log_alpha is not None or self.max_log_alpha is not None:

torchrl/version.py CHANGED Viewed

@@ -1,2 +1,3 @@
-__version__ = '2025.7.19'
-git_version = '163e23f9c930b58990287206958e2e036af5b902'
+__version__ = '2025.7.21'
+git_version = '851a0410a7c72ad656ab3712395450ae482ce3f6'
+pytorch_version = '2.9.0.dev20250721'

{torchrl_nightly-2025.7.19.dist-info → torchrl_nightly-2025.7.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torchrl-nightly
-Version: 2025.7.19
+Version: 2025.7.21
 Home-page: https://github.com/pytorch/rl
 Author: torchrl contributors
 Author-email: vmoens@fb.com

{torchrl_nightly-2025.7.19.dist-info → torchrl_nightly-2025.7.21.dist-info}/RECORD RENAMED Viewed

@@ -2,10 +2,10 @@ build_tools/__init__.py,sha256=D9ECsap08rcUtHyaR111nTjArlKkUjQwP8usXwXAAVQ,179
 build_tools/setup_helpers/__init__.py,sha256=7l8TvVqxKezgzKCLuRv20mvGLloprFVZYm8CWS8a6CU,276
 build_tools/setup_helpers/extension.py,sha256=4-PDLr-pw40bJnd9SfxnTaSjUyuXU_Tg8yOg69Kl0o4,5914
 torchrl/__init__.py,sha256=mhDBx2UIuBKc0gmi8dVNHokQ6tCbIovruZmyAxcSsy8,2938
-torchrl/_extension.py,sha256=z7wQ8i1iYWYcnygq_j0nq9sT-koY13tfHhTLNbMk17Q,2353
-torchrl/_torchrl.cpython-312-darwin.so,sha256=4Ad5Nb4Gnbgvl0g_aVkBSsGJtjxka7gplR6UZiQ3-7g,1692224
+torchrl/_extension.py,sha256=YmBlmR6Kt0P1rFLSqA9IbkQlhHuJlzySY-ZlUpckxBY,2736
+torchrl/_torchrl.cpython-312-darwin.so,sha256=D48AeTXfz_agfVAfGuv00nykEj2aTD_tIxcyYUhDbF8,1692224
 torchrl/_utils.py,sha256=Cw5EG6x5oSZF1iE3YCs1a32VUKp0rTXIs2u67q9zKUI,41078
-torchrl/version.py,sha256=n7bZHGPZ1ZnUt9WqWLyrTICG9OX_i1Q-9iAARTUGQ4Q,83
+torchrl/version.py,sha256=r9rIVz69C9oNJra3D2dU2cL9iEKutdEWy1fSsSIEcVI,121
 torchrl/collectors/__init__.py,sha256=hJ3JD6shRku0BL6SzJQq44FZ5Q1RGR8LealFyU3FRn4,799
 torchrl/collectors/collectors.py,sha256=HpaW-y0bQOaOql8_7VyEPJ084CulrVwn6iBpGYoHyH4,178287
 torchrl/collectors/utils.py,sha256=MlXrkYuDmV0Em-tVNQiLL32FWgPNDgceYYG_GgpiviA,11320
@@ -191,7 +191,7 @@ torchrl/objectives/iql.py,sha256=1jvlSznWke6NZSwfuYyHVnVBE7Cz3q169GnCRC7iel4,429
 torchrl/objectives/ppo.py,sha256=0soC2aiCOFNM5hCL20-99LX_NZi6XIXDmG2IkGEHSek,76082
 torchrl/objectives/redq.py,sha256=4usM-nG2UWujeL-VEqzf7-uOwRFx6itkKCeitKuJhtw,28507
 torchrl/objectives/reinforce.py,sha256=ySXLp5C-OOUYayqjrf4taQmL8LgRvMgPCgHDsle8JDc,22339
-torchrl/objectives/sac.py,sha256=Oq9Iq90s9KFbnM4KSRUd2onU1JfW6aW80LWGdtO0CY8,63993
+torchrl/objectives/sac.py,sha256=21AR3GC9tcvXAlWE4m2abp1UZgDKyLegEDiD8-EjJbQ,69232
 torchrl/objectives/td3.py,sha256=RnlkGzBBTY0KrfRKytsFbNyoVUy2HLfwSL4_9YQRep8,23190
 torchrl/objectives/td3_bc.py,sha256=jHGwCzPuCbN37zAxsiDQIe92yR1UE7rjcnJoy8b_NjE,25950
 torchrl/objectives/utils.py,sha256=qDP_ZCjsJyPQwBqGf5Vzrdw6HtNyWqQeAEb2QpvwW1Q,24739
@@ -223,8 +223,8 @@ torchrl/trainers/helpers/losses.py,sha256=sHlJqjh02t8cKN73X35Azd_OoWGurohLuviB8Y
 torchrl/trainers/helpers/models.py,sha256=ihTERG2c96E8cS3Tnul6a_ys6iDEEJmHh05p9blQTW8,21807
 torchrl/trainers/helpers/replay_buffer.py,sha256=ZUZHOa0TILyeWJ3iahzTJ6UvMl_0FdxuZfJEja94Bn8,2001
 torchrl/trainers/helpers/trainers.py,sha256=j6B5XA7_FFHMQeOIQwjNcO0CGE_4mZKUC9_jH_iqqh4,12071
-torchrl_nightly-2025.7.19.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
-torchrl_nightly-2025.7.19.dist-info/METADATA,sha256=C0nxjedQsznrXBcFdop1QrOtdDH6LiA_41_iJWSaMgQ,42990
-torchrl_nightly-2025.7.19.dist-info/WHEEL,sha256=9_3tTSxMJq-dgdzMiScNvtT5eTBVd3l6RgHS7HwTzpA,115
-torchrl_nightly-2025.7.19.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
-torchrl_nightly-2025.7.19.dist-info/RECORD,,
+torchrl_nightly-2025.7.21.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
+torchrl_nightly-2025.7.21.dist-info/METADATA,sha256=fmyatCEgcxBi17W8uVg1hZ-9fslugseRebMHAvwZq8I,42990
+torchrl_nightly-2025.7.21.dist-info/WHEEL,sha256=9_3tTSxMJq-dgdzMiScNvtT5eTBVd3l6RgHS7HwTzpA,115
+torchrl_nightly-2025.7.21.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
+torchrl_nightly-2025.7.21.dist-info/RECORD,,

{torchrl_nightly-2025.7.19.dist-info → torchrl_nightly-2025.7.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.19.dist-info → torchrl_nightly-2025.7.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.19.dist-info → torchrl_nightly-2025.7.21.dist-info}/top_level.txt RENAMED Viewed

File without changes