PyPI - torchrl-nightly - Versions diffs - 2025.7.15__cp313-cp313-macosx_10_13_universal2.whl → 2025.7.16__cp313-cp313-macosx_10_13_universal2.whl - Mend

torchrl-nightly 2025.7.15__cp313-cp313-macosx_10_13_universal2.whl → 2025.7.16__cp313-cp313-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

torchrl/_torchrl.cpython-313-darwin.so CHANGED Viewed

Binary file

torchrl/data/tensor_specs.py CHANGED Viewed

@@ -4449,12 +4449,18 @@ class Binary(Categorical):
                 f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
-            n=self.shape[-1], shape=shape, device=self.device, dtype=self.dtype
+            n=self.shape[-1] if len(self.shape) > 0 else None,
+            shape=shape,
+            device=self.device,
+            dtype=self.dtype,
         )
     def _reshape(self, shape):
         return self.__class__(
-            n=self.shape[-1], shape=shape, device=self.device, dtype=self.dtype
+            n=self.shape[-1] if len(self.shape) > 0 else None,
+            shape=shape,
+            device=self.device,
+            dtype=self.dtype,
         )
     def _unflatten(self, dim, sizes):
@@ -4464,7 +4470,10 @@ class Binary(Categorical):
             .shape
         )
         return self.__class__(
-            n=self.shape[-1], shape=shape, device=self.device, dtype=self.dtype
+            n=self.shape[-1] if len(self.shape) > 0 else None,
+            shape=shape,
+            device=self.device,
+            dtype=self.dtype,
         )
     def squeeze(self, dim=None):
@@ -4472,13 +4481,19 @@ class Binary(Categorical):
         if shape is None:
             return self
         return self.__class__(
-            n=self.shape[-1], shape=shape, device=self.device, dtype=self.dtype
+            n=self.shape[-1] if len(self.shape) > 0 else None,
+            shape=shape,
+            device=self.device,
+            dtype=self.dtype,
         )
     def unsqueeze(self, dim: int):
         shape = _unsqueezed_shape(self.shape, dim)
         return self.__class__(
-            n=self.shape[-1], shape=shape, device=self.device, dtype=self.dtype
+            n=self.shape[-1] if len(self.shape) > 0 else None,
+            shape=shape,
+            device=self.device,
+            dtype=self.dtype,
         )
     def unbind(self, dim: int = 0):
@@ -4495,7 +4510,10 @@ class Binary(Categorical):
         shape = tuple(s for i, s in enumerate(self.shape) if i != dim)
         return tuple(
             self.__class__(
-                n=self.shape[-1], shape=shape, device=self.device, dtype=self.dtype
+                n=self.shape[-1] if len(self.shape) > 0 else None,
+                shape=shape,
+                device=self.device,
+                dtype=self.dtype,
             )
             for i in range(self.shape[dim])
         )
@@ -4512,12 +4530,15 @@ class Binary(Categorical):
         if dest_device == self.device and dest_dtype == self.dtype:
             return self
         return self.__class__(
-            n=self.shape[-1], shape=self.shape, device=dest_device, dtype=dest_dtype
+            n=self.shape[-1] if len(self.shape) > 0 else None,
+            shape=self.shape,
+            device=dest_device,
+            dtype=dest_dtype,
         )
     def clone(self) -> Binary:
         return self.__class__(
-            n=self.shape[-1],
+            n=self.shape[-1] if len(self.shape) > 0 else None,
             shape=self.shape,
             device=self.device,
             dtype=self.dtype,
@@ -4528,6 +4549,8 @@ class Binary(Categorical):
         The last dimension of the spec (length n of the binary vector) cannot be indexed.
         """
+        if not len(self.shape):
+            raise ValueError("Cannot index a Binary spec with an empty shape")
         indexed_shape = _shape_indexing(self.shape[:-1], idx)
         return self.__class__(
             n=self.shape[-1],
@@ -5533,8 +5556,10 @@ class Composite(TensorSpec):
         sub_str = [
             indent(f"{k}: {str(item)}", 4 * " ") for k, item in self._specs.items()
         ]
+        if len(sub_str) == 0:
+            return f"{self.__class__.__name__}(device={self._device}, shape={self.shape}, data_cls={self.data_cls})"
         sub_str = ",\n".join(sub_str)
-        return f"Composite(\n{sub_str},\n    device={self._device},\n    shape={self.shape})"
+        return f"{self.__class__.__name__}(\n{sub_str},\n    device={self._device},\n    shape={self.shape},\n    data_cls={self.data_cls})"
     def type_check(
         self,

torchrl/envs/transforms/transforms.py CHANGED Viewed

@@ -1211,7 +1211,6 @@ but got an object of type {type(transform)}."""
         if tensordict is not None:
             # We must avoid modifying the original tensordict so a shallow copy is necessary.
             # We just select the input data and reset signal, which is all we need.
-            self.transform.transform_input_spec(self.base_env.input_spec.unlock_())
             tensordict = tensordict.select(
                 *self.reset_keys, *self.state_spec.keys(True, True), strict=False
             )

torchrl/modules/llm/policies/common.py CHANGED Viewed

@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 from __future__ import annotations
+import warnings
 import weakref
 from typing import Any, Literal, overload
@@ -171,6 +172,33 @@ class ChatHistory(TensorClass["nocast"]):
             step_mdp_static=True,
         )
+    def __post_init__(self):
+        # Check that all history objects have one more batch dimension than the ChatHistory object
+        if self.prompt is not None:
+            if self.prompt.batch_dims != self.batch_dims + 1:
+                warnings.warn(
+                    "Prompt history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
+                    f"got {self.prompt.batch_dims} and {self.batch_dims}. "
+                    "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
+                )
+                self.prompt = self.prompt.unsqueeze(-1)
+        if self.response is not None:
+            if self.response.batch_dims != self.batch_dims + 1:
+                warnings.warn(
+                    "Response history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
+                    f"got {self.response.batch_dims} and {self.batch_dims}. "
+                    "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
+                )
+                self.response = self.response.unsqueeze(-1)
+        if self.full is not None:
+            if self.full.batch_dims != self.batch_dims + 1:
+                warnings.warn(
+                    "Full history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
+                    f"got {self.full.batch_dims} and {self.batch_dims}. "
+                    "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
+                )
+                self.full = self.full.unsqueeze(-1)
 class LogProbs(TensorClass["nocast"]):
     """A log-probability container.

torchrl/objectives/a2c.py CHANGED Viewed

@@ -70,7 +70,7 @@ class A2CLoss(LossModule):
             samples will be used to compute this estimate.
             Defaults to ``1``.
         entropy_coeff (:obj:`float`): the weight of the entropy loss. Defaults to `0.01``.
-        critic_coef (:obj:`float`): the weight of the critic loss. Defaults to ``1.0``. If ``None``, the critic
+        critic_coeff (:obj:`float`): the weight of the critic loss. Defaults to ``1.0``. If ``None``, the critic
             loss won't be included and the in-keys will miss the critic inputs.
         loss_critic_type (str): loss function for the value discrepancy.
             Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
@@ -156,7 +156,7 @@ class A2CLoss(LossModule):
     the expected keyword arguments are:
     ``["action", "next_reward", "next_done", "next_terminated"]`` + in_keys of the actor and critic.
     The return value is a tuple of tensors in the following order:
-    ``["loss_objective"]`` + ``["loss_critic"]`` if critic_coef is not None + ``["entropy", "loss_entropy"]`` if entropy_bonus is True and critic_coef is not None
+    ``["loss_objective"]`` + ``["loss_critic"]`` if critic_coeff is not None + ``["entropy", "loss_entropy"]`` if entropy_bonus is True and critic_coeff is not None
     Examples:
         >>> import torch
@@ -277,8 +277,8 @@ class A2CLoss(LossModule):
         *,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float = 0.01,
-        critic_coef: float = 1.0,
+        entropy_coeff: float | None = None,
+        critic_coeff: float = 1.0,
         loss_critic_type: str = "smooth_l1",
         gamma: float | None = None,
         separate_losses: bool = False,
@@ -291,13 +291,32 @@ class A2CLoss(LossModule):
         clip_value: float | None = None,
         **kwargs,
     ):
+        # Handle deprecated entropy_coef argument
         if "entropy_coef" in kwargs:
+            if entropy_coeff is not None:  # Check if entropy_coeff was explicitly set
+                raise ValueError(
+                    "Cannot specify both 'entropy_coef' and 'entropy_coeff'"
+                )
             warnings.warn(
                 "'entropy_coef' is deprecated and will be removed in torchrl v0.11. Please use 'entropy_coeff' instead.",
                 DeprecationWarning,
             )
             entropy_coeff = kwargs.pop("entropy_coef")
+        # Set default value if None
+        if entropy_coeff is None:
+            entropy_coeff = 0.01
+        # Handle deprecated critic_coef argument
+        if "critic_coef" in kwargs:
+            if critic_coeff != 1.0:  # Check if critic_coeff was explicitly set
+                raise ValueError("Cannot specify both 'critic_coef' and 'critic_coeff'")
+            warnings.warn(
+                "'critic_coef' is deprecated and will be removed in torchrl v0.11. Please use 'critic_coeff' instead.",
+                DeprecationWarning,
+            )
+            critic_coeff = kwargs.pop("critic_coef")
         if actor is not None:
             actor_network = actor
             del actor
@@ -349,12 +368,12 @@ class A2CLoss(LossModule):
         self.register_buffer(
             "entropy_coeff", torch.as_tensor(entropy_coeff, device=device)
         )
-        if critic_coef is not None:
+        if critic_coeff is not None:
             self.register_buffer(
-                "critic_coef", torch.as_tensor(critic_coef, device=device)
+                "critic_coeff", torch.as_tensor(critic_coeff, device=device)
             )
         else:
-            self.critic_coef = None
+            self.critic_coeff = None
         if gamma is not None:
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
@@ -399,7 +418,7 @@ class A2CLoss(LossModule):
             *self.actor_network.in_keys,
             *[("next", key) for key in self.actor_network.in_keys],
         ]
-        if self.critic_coef is not None:
+        if self.critic_coeff is not None:
             keys.extend(self.critic_network.in_keys)
         return list(set(keys))
@@ -407,7 +426,7 @@ class A2CLoss(LossModule):
     def out_keys(self):
         if self._out_keys is None:
             outs = ["loss_objective"]
-            if self.critic_coef is not None:
+            if self.critic_coeff is not None:
                 outs.append("loss_critic")
             if self.entropy_bonus:
                 outs.append("entropy")
@@ -478,7 +497,7 @@ class A2CLoss(LossModule):
         return log_prob, dist
     def loss_critic(self, tensordict: TensorDictBase) -> tuple[torch.Tensor, float]:
-        """Returns the loss value of the critic, multiplied by ``critic_coef`` if it is not ``None``.
+        """Returns the loss value of the critic, multiplied by ``critic_coeff`` if it is not ``None``.
         Returns the loss and the clip-fraction.
@@ -539,8 +558,8 @@ class A2CLoss(LossModule):
             "target_actor_network_params",
             "target_critic_network_params",
         )
-        if self.critic_coef is not None:
-            return self.critic_coef * loss_value, clip_fraction
+        if self.critic_coeff is not None:
+            return self.critic_coeff * loss_value, clip_fraction
         return loss_value, clip_fraction
     @property
@@ -568,7 +587,7 @@ class A2CLoss(LossModule):
             entropy = self.get_entropy_bonus(dist)
             td_out.set("entropy", entropy.detach().mean())  # for logging
             td_out.set("loss_entropy", -self.entropy_coeff * entropy)
-        if self.critic_coef is not None:
+        if self.critic_coeff is not None:
             loss_critic, value_clip_fraction = self.loss_critic(tensordict)
             td_out.set("loss_critic", loss_critic)
             if value_clip_fraction is not None:

torchrl/objectives/ppo.py CHANGED Viewed

@@ -102,13 +102,13 @@ class PPOLoss(LossModule):
             Defaults to ``1``.
         entropy_coeff: scalar | Mapping[str, scalar], optional): entropy multiplier when computing the total loss.
             * **Scalar**: one value applied to the summed entropy of every action head.
-            * **Mapping** ``{head_name: coef}`` gives an individual coefficient for each action-head's entropy.
+            * **Mapping** ``{head_name: coeff}`` gives an individual coefficient for each action-head's entropy.
             Defaults to ``0.01``.
         log_explained_variance (bool, optional): if ``True``, the explained variance of the critic
             predictions w.r.t. value targets will be computed and logged as ``"explained_variance"``.
             This can help monitor critic quality during training. Best possible score is 1.0, lower values are worse. Defaults to ``True``.
-        critic_coef (scalar, optional): critic loss multiplier when computing the total
-            loss. Defaults to ``1.0``. Set ``critic_coef`` to ``None`` to exclude the value
+        critic_coeff (scalar, optional): critic loss multiplier when computing the total
+            loss. Defaults to ``1.0``. Set ``critic_coeff`` to ``None`` to exclude the value
             loss from the forward outputs.
         loss_critic_type (str, optional): loss function for the value discrepancy.
             Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
@@ -239,7 +239,7 @@ class PPOLoss(LossModule):
     the expected keyword arguments are:
     ``["action", "sample_log_prob", "next_reward", "next_done", "next_terminated"]`` + in_keys of the actor and value network.
     The return value is a tuple of tensors in the following order:
-    ``["loss_objective"]`` + ``["entropy", "loss_entropy"]`` if entropy_bonus is set + ``"loss_critic"`` if critic_coef is not ``None``.
+    ``["loss_objective"]`` + ``["entropy", "loss_entropy"]`` if entropy_bonus is set + ``"loss_critic"`` if critic_coeff is not ``None``.
     The output keys can also be filtered using :meth:`PPOLoss.select_out_keys` method.
     Examples:
@@ -351,9 +351,9 @@ class PPOLoss(LossModule):
         *,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float | Mapping[str, float] = 0.01,
+        entropy_coeff: float | Mapping[str, float] | None = None,
         log_explained_variance: bool = True,
-        critic_coef: float | None = None,
+        critic_coeff: float | None = None,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,
         normalize_advantage_exclude_dims: tuple[int] = (),
@@ -377,13 +377,23 @@ class PPOLoss(LossModule):
             critic_network = critic
             del critic
-        if critic_coef is None and critic_network is not None:
-            critic_coef = 1.0
-        elif critic_coef in (None, 0) and critic_network is not None:
-            critic_coef = None
+        # Handle deprecated critic_coef argument
+        if "critic_coef" in kwargs:
+            if critic_coeff is not None:
+                raise ValueError("Cannot specify both 'critic_coef' and 'critic_coeff'")
+            warnings.warn(
+                "'critic_coef' is deprecated and will be removed in torchrl v0.11. Please use 'critic_coeff' instead.",
+                DeprecationWarning,
+            )
+            critic_coeff = kwargs.pop("critic_coef")
+        if critic_coeff is None and critic_network is not None:
+            critic_coeff = 1.0
+        elif critic_coeff in (None, 0) and critic_network is not None:
+            critic_coeff = None
         if actor_network is None or (
-            critic_network is None and critic_coef not in (None, 0.0)
+            critic_network is None and critic_coeff not in (None, 0.0)
         ):
             raise TypeError(
                 "Missing positional arguments actor_network or critic_network."
@@ -431,13 +441,21 @@ class PPOLoss(LossModule):
                     torch, "get_default_device", lambda: torch.device("cpu")
                 )()
-        # Handle deprecated entropy_coeff argument
-        if "entropy_coeff" in kwargs:
+        # Handle deprecated entropy_coef argument
+        if "entropy_coef" in kwargs:
+            if entropy_coeff is not None:  # Check if entropy_coeff was explicitly set
+                raise ValueError(
+                    "Cannot specify both 'entropy_coef' and 'entropy_coeff'"
+                )
             warnings.warn(
-                "'entropy_coeff' is deprecated and will be removed in torchrl v0.11. Please use 'entropy_coeff' instead.",
+                "'entropy_coef' is deprecated and will be removed in torchrl v0.11. Please use 'entropy_coeff' instead.",
                 DeprecationWarning,
             )
-            entropy_coeff = kwargs.pop("entropy_coeff")
+            entropy_coeff = kwargs.pop("entropy_coef")
+        # Set default value if None
+        if entropy_coeff is None:
+            entropy_coeff = 0.01
         if isinstance(entropy_coeff, Mapping):
             # Store the mapping for per-head coefficients
@@ -457,13 +475,13 @@ class PPOLoss(LossModule):
             self._entropy_coeff_map = None
         else:
             raise TypeError("entropy_coeff must be a float or a Mapping[str, float]")
-        if critic_coef is not None:
+        if critic_coeff is not None:
             self.register_buffer(
-                "critic_coef", torch.tensor(critic_coef, device=device)
+                "critic_coeff", torch.tensor(critic_coeff, device=device)
             )
         else:
-            self.critic_coef = None
-        self._has_critic = bool(self.critic_coef is not None and self.critic_coef > 0)
+            self.critic_coeff = None
+        self._has_critic = bool(self.critic_coeff is not None and self.critic_coeff > 0)
         self.loss_critic_type = loss_critic_type
         self.normalize_advantage = normalize_advantage
         self.normalize_advantage_exclude_dims = normalize_advantage_exclude_dims
@@ -692,7 +710,7 @@ class PPOLoss(LossModule):
     def loss_critic(
         self, tensordict: TensorDictBase
     ) -> tuple[torch.Tensor | TensorDict, ...]:
-        """Returns the critic loss multiplied by ``critic_coef``, if it is not ``None``."""
+        """Returns the critic loss multiplied by ``critic_coeff``, if it is not ``None``."""
         # TODO: if the advantage is gathered by forward, this introduces an
         # overhead that we could easily reduce.
         if self.separate_losses:
@@ -766,7 +784,7 @@ class PPOLoss(LossModule):
             "target_critic_network_params",
         )
         if self._has_critic:
-            return self.critic_coef * loss_value, clip_fraction, explained_variance
+            return self.critic_coeff * loss_value, clip_fraction, explained_variance
         return loss_value, clip_fraction, explained_variance
     @property
@@ -954,10 +972,10 @@ class ClipPPOLoss(PPOLoss):
             Defaults to ``1``.
         entropy_coeff: (scalar | Mapping[str, scalar], optional): entropy multiplier when computing the total loss.
             * **Scalar**: one value applied to the summed entropy of every action head.
-            * **Mapping** ``{head_name: coef}`` gives an individual coefficient for each action-head's entropy.
+            * **Mapping** ``{head_name: coeff}`` gives an individual coefficient for each action-head's entropy.
             Defaults to ``0.01``.
-        critic_coef (scalar, optional): critic loss multiplier when computing the total
-            loss. Defaults to ``1.0``. Set ``critic_coef`` to ``None`` to exclude the value
+        critic_coeff (scalar, optional): critic loss multiplier when computing the total
+            loss. Defaults to ``1.0``. Set ``critic_coeff`` to ``None`` to exclude the value
             loss from the forward outputs.
         loss_critic_type (str, optional): loss function for the value discrepancy.
             Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
@@ -1057,8 +1075,8 @@ class ClipPPOLoss(PPOLoss):
         clip_epsilon: float = 0.2,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float | Mapping[str, float] = 0.01,
-        critic_coef: float | None = None,
+        entropy_coeff: float | Mapping[str, float] | None = None,
+        critic_coeff: float | None = None,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,
         normalize_advantage_exclude_dims: tuple[int] = (),
@@ -1079,7 +1097,7 @@ class ClipPPOLoss(PPOLoss):
             entropy_bonus=entropy_bonus,
             samples_mc_entropy=samples_mc_entropy,
             entropy_coeff=entropy_coeff,
-            critic_coef=critic_coef,
+            critic_coeff=critic_coeff,
             loss_critic_type=loss_critic_type,
             normalize_advantage=normalize_advantage,
             normalize_advantage_exclude_dims=normalize_advantage_exclude_dims,
@@ -1247,9 +1265,9 @@ class KLPENPPOLoss(PPOLoss):
             Defaults to ``1``.
         entropy_coeff: scalar | Mapping[str, scalar], optional): entropy multiplier when computing the total loss.
             * **Scalar**: one value applied to the summed entropy of every action head.
-            * **Mapping** ``{head_name: coef}`` gives an individual coefficient for each action-head's entropy.
+            * **Mapping** ``{head_name: coeff}`` gives an individual coefficient for each action-head's entropy.
             Defaults to ``0.01``.
-        critic_coef (scalar, optional): critic loss multiplier when computing the total
+        critic_coeff (scalar, optional): critic loss multiplier when computing the total
             loss. Defaults to ``1.0``.
         loss_critic_type (str, optional): loss function for the value discrepancy.
             Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
@@ -1351,8 +1369,8 @@ class KLPENPPOLoss(PPOLoss):
         samples_mc_kl: int = 1,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float | Mapping[str, float] = 0.01,
-        critic_coef: float | None = None,
+        entropy_coeff: float | Mapping[str, float] | None = None,
+        critic_coeff: float | None = None,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,
         normalize_advantage_exclude_dims: tuple[int] = (),
@@ -1369,7 +1387,7 @@ class KLPENPPOLoss(PPOLoss):
             entropy_bonus=entropy_bonus,
             samples_mc_entropy=samples_mc_entropy,
             entropy_coeff=entropy_coeff,
-            critic_coef=critic_coef,
+            critic_coeff=critic_coeff,
             loss_critic_type=loss_critic_type,
             normalize_advantage=normalize_advantage,
             normalize_advantage_exclude_dims=normalize_advantage_exclude_dims,

torchrl/trainers/helpers/losses.py CHANGED Viewed

@@ -86,7 +86,7 @@ class A2CLossConfig:
     # Decay factor for return computation. Default=0.99.
     entropy_coeff: float = 1e-3
     # Entropy factor for the A2C loss
-    critic_coef: float = 1.0
+    critic_coeff: float = 1.0
     # Critic factor for the A2C loss
     critic_loss_function: str = "smooth_l1"
     # loss function for the value network. Either one of l1, l2 or smooth_l1 (default).
@@ -112,7 +112,7 @@ class PPOLossConfig:
     # Number of samples to use for a Monte-Carlo estimate if the policy distribution has not closed formula.
     loss_function: str = "smooth_l1"
     # loss function for the value network. Either one of l1, l2 or smooth_l1 (default).
-    critic_coef: float = 1.0
+    critic_coeff: float = 1.0
     # Critic loss multiplier when computing the total loss.
     # ClipPPOLoss parameters:

torchrl/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = '2025.7.15'
-git_version = '77c00b910e6fdd85aa94b4d354390b724af4ec94'
+__version__ = '2025.7.16'
+git_version = '361a8da6edc77979e17409cf19396230d18c18a9'

{torchrl_nightly-2025.7.15.dist-info → torchrl_nightly-2025.7.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torchrl-nightly
-Version: 2025.7.15
+Version: 2025.7.16
 Home-page: https://github.com/pytorch/rl
 Author: torchrl contributors
 Author-email: vmoens@fb.com

{torchrl_nightly-2025.7.15.dist-info → torchrl_nightly-2025.7.16.dist-info}/RECORD RENAMED Viewed

@@ -3,9 +3,9 @@ build_tools/setup_helpers/__init__.py,sha256=7l8TvVqxKezgzKCLuRv20mvGLloprFVZYm8
 build_tools/setup_helpers/extension.py,sha256=4-PDLr-pw40bJnd9SfxnTaSjUyuXU_Tg8yOg69Kl0o4,5914
 torchrl/__init__.py,sha256=mhDBx2UIuBKc0gmi8dVNHokQ6tCbIovruZmyAxcSsy8,2938
 torchrl/_extension.py,sha256=z7wQ8i1iYWYcnygq_j0nq9sT-koY13tfHhTLNbMk17Q,2353
-torchrl/_torchrl.cpython-313-darwin.so,sha256=Oc5ssTpuyTv6h4iaS_pHmxT44bNN96SH0V9beynYRSc,1692464
+torchrl/_torchrl.cpython-313-darwin.so,sha256=hKxRdhDxuVm-1ogc0Pnu7F8xo6djoCREE7psO-strBc,1692464
 torchrl/_utils.py,sha256=Cw5EG6x5oSZF1iE3YCs1a32VUKp0rTXIs2u67q9zKUI,41078
-torchrl/version.py,sha256=Fow5OPjVvk1yM4tQyBX-t6Un4hGKcYsr4kuvYN_gGPs,83
+torchrl/version.py,sha256=aShA8Hert0ILH-D5pqVnCE_n2SCgvZPjiPpxsO1x4eM,83
 torchrl/collectors/__init__.py,sha256=hJ3JD6shRku0BL6SzJQq44FZ5Q1RGR8LealFyU3FRn4,799
 torchrl/collectors/collectors.py,sha256=WoeR-MAfzcLiy8EHPWQ3uknm_jTWjA9Wi45CODG8NZI,177782
 torchrl/collectors/utils.py,sha256=MlXrkYuDmV0Em-tVNQiLL32FWgPNDgceYYG_GgpiviA,11320
@@ -25,7 +25,7 @@ torchrl/collectors/llm/weight_update/__init__.py,sha256=bKjvD7yZG5VnHgvYc4EmKI1s
 torchrl/collectors/llm/weight_update/vllm.py,sha256=slKUmrIo4eL6R4J1oEnmlP6Q7Zer09p92JU8zbIHFUM,11515
 torchrl/data/__init__.py,sha256=oowsio6ZUOZnJV8JV43xgs17B37XO1yKAYIQPdk8yt0,4819
 torchrl/data/rlhf.py,sha256=JUmdYBWgkN229DwpXuDrhy9ddjduNvU2kyHzHR6MoA0,963
-torchrl/data/tensor_specs.py,sha256=_t6-iobtJClJ50zvo1KzHSaYS5CvL2Ca6x8btlAc3rs,253067
+torchrl/data/tensor_specs.py,sha256=RlMckj6PJo9MQMzneHzbcVe9xUyMB_n7pnSz0jytB9s,253907
 torchrl/data/utils.py,sha256=attuNwzfgjszyp0lJSrV06f2peX3r0qTjRZWEwfl6Yg,12108
 torchrl/data/datasets/__init__.py,sha256=NQpXsHecbZmza8AocX9mkqQQNkdFzeUrMTZoi6hbbU4,733
 torchrl/data/datasets/atari_dqn.py,sha256=3ij6-UGfKev-QJuUEhZEEmn_3yL210CqKJALaFvlc5M,40739
@@ -131,7 +131,7 @@ torchrl/envs/transforms/llm.py,sha256=rQDzuut807wvFpSPCm5tynt8-cMKTgVKVjSVu9D99P
 torchrl/envs/transforms/r3m.py,sha256=sdTVLpnxHfzFVo5rO8WnXf2uUg9cr4LBOLBsWaFgGT8,13478
 torchrl/envs/transforms/rb_transforms.py,sha256=6ohnKXHHAEh2Hz3Seaw6eDrcFMu-1IVQrT7RVywh3YQ,7447
 torchrl/envs/transforms/rlhf.py,sha256=lOVXYqQaoDfm4_n77Dxw_wjicBpMtDvavKmBIK2N3lU,628
-torchrl/envs/transforms/transforms.py,sha256=QnPV5R0sDbR9bHJnRSG8JBy6cnMIeKG7vYUQjRVw5a8,482966
+torchrl/envs/transforms/transforms.py,sha256=cDv_NxElzTOW8qQO-2krvOBmlKVGPOKMfqM6XyuLckU,482882
 torchrl/envs/transforms/utils.py,sha256=7ToVFnD4-DkOMtML91g4bqXeY0bZ-gmCaSLxC93oaKM,3264
 torchrl/envs/transforms/vc1.py,sha256=mho5BvdAK-f9hD9t-iah52wT2B06qPmaJO7chrfIOWY,10534
 torchrl/envs/transforms/vecnorm.py,sha256=XahMcWvK3zjOB6EACSZtJ6UMP3yQ2zD9xf87UEB37Eg,34047
@@ -147,7 +147,7 @@ torchrl/modules/llm/utils.py,sha256=gf_F-4bEMwkcI3jLQM7ifB7nsjRctGebB5E2c-AznO0,
 torchrl/modules/llm/backends/__init__.py,sha256=WdVy9EdiAfk8i5zFa49TEkRvcUd0L4Un4v6wqWBy8l8,438
 torchrl/modules/llm/backends/vllm.py,sha256=x57Xop1xd5ZShicsh47ZFmz4VpfZ3eCzVx7k0COvpqQ,9387
 torchrl/modules/llm/policies/__init__.py,sha256=nfZ2mcVuucxnY3WCuzIQrTLIf1yEd36k8-AlvwnSa8Y,545
-torchrl/modules/llm/policies/common.py,sha256=zuaw0CVBAuMcd857JkdVWfSaxGFgwDXWOPF8GflqIkw,36379
+torchrl/modules/llm/policies/common.py,sha256=jmWoaR6fgKQ5lKa9HqDVW73K3oySpb2zO3WJlNTz6iQ,38117
 torchrl/modules/llm/policies/transformers_wrapper.py,sha256=HTkubIsbEui2hWqAZ3GwsATI2NGmA0kry1nW5RjnEJ0,74326
 torchrl/modules/llm/policies/vllm_wrapper.py,sha256=u0ITRdVI8pNhpRRMy2yXEh9bK_TkYRUOUEzix2m2aR0,78231
 torchrl/modules/models/__init__.py,sha256=DrOG-7hynjjUh_tc2EqysiUiNMRiDR0WLtZql9TPNcI,1743
@@ -176,7 +176,7 @@ torchrl/modules/utils/__init__.py,sha256=KXaF_xEghKSPsNg0JyfxChK6KWHFRy0lwkL2Rip
 torchrl/modules/utils/mappings.py,sha256=VMYrPxDk1ywgl2l_f6HXZaRsVOKcYR7VF5DNkmi3lHk,362
 torchrl/modules/utils/utils.py,sha256=WPfcE-AoemnrP7Ny4FxJ-_LoQsBnX-y77Zb7MnZjXV0,2916
 torchrl/objectives/__init__.py,sha256=pnprzIXA6E9Ph7isYgNLh4SFTU0pxIQg4oUNcaQ6doc,2148
-torchrl/objectives/a2c.py,sha256=K8mWcLVLUnuW5DgPZCS8P9nN1t30Gvw0j-EgcnO-QGE,27895
+torchrl/objectives/a2c.py,sha256=_xdp8D2ErOPyHwpxqPHtUr-EvZw7MqcuhhK9Isnewgo,28791
 torchrl/objectives/common.py,sha256=40inZ0z3bFdQUkXuup3PWP_KmCx1m13cKTksjOp_b6I,28571
 torchrl/objectives/cql.py,sha256=8faIZmA9e65NQ39HAi6torMofr98bkngjtBXm0UbnVM,54925
 torchrl/objectives/crossq.py,sha256=a_vAjET5GG-2U7zZDgMnA0QP1iPCtv2ho6q-XvvLsnc,28858
@@ -188,7 +188,7 @@ torchrl/objectives/dreamer.py,sha256=vIJQN91oPXYnPubDFQpaF5d3fR_WwIYuIVYtoCvw0TY
 torchrl/objectives/functional.py,sha256=ZaglBjEGuOTNGeFA-Ox-ugZVcNegQMUj--KWHDRBmaU,2106
 torchrl/objectives/gail.py,sha256=0m34XmcN-EDk5OfNIo5bKYbKKZfATsYRv4zQe3v2UwA,9576
 torchrl/objectives/iql.py,sha256=1jvlSznWke6NZSwfuYyHVnVBE7Cz3q169GnCRC7iel4,42991
-torchrl/objectives/ppo.py,sha256=x3wJ3k7jVZWPAZCxdk4bgzhoTYukPwTj39Yo6ZgBbCM,75250
+torchrl/objectives/ppo.py,sha256=0soC2aiCOFNM5hCL20-99LX_NZi6XIXDmG2IkGEHSek,76082
 torchrl/objectives/redq.py,sha256=4usM-nG2UWujeL-VEqzf7-uOwRFx6itkKCeitKuJhtw,28507
 torchrl/objectives/reinforce.py,sha256=ySXLp5C-OOUYayqjrf4taQmL8LgRvMgPCgHDsle8JDc,22339
 torchrl/objectives/sac.py,sha256=Oq9Iq90s9KFbnM4KSRUd2onU1JfW6aW80LWGdtO0CY8,63993
@@ -219,12 +219,12 @@ torchrl/trainers/helpers/__init__.py,sha256=HhDB2Ubq2gZodV-hB6xw4ZgCgwaZKUoZgOfV
 torchrl/trainers/helpers/collectors.py,sha256=NjMMvGWEe4TWkVXzx7AlJ_Qa_AxEzMl6EUmEgUzHkoE,18715
 torchrl/trainers/helpers/envs.py,sha256=1yqJZgz7mc5wa58HmSDGpPQINeDHFZB0_KTgwdKm9QE,22084
 torchrl/trainers/helpers/logger.py,sha256=FtuEiLnK4NmxVVNyEEWaoCu3nG7WbNpHP3UYGQRJmgo,1278
-torchrl/trainers/helpers/losses.py,sha256=qH-2YJwMtDAYAPXTTYy3cOPiq4ILC6xTjfnGUU__6vo,5270
+torchrl/trainers/helpers/losses.py,sha256=sHlJqjh02t8cKN73X35Azd_OoWGurohLuviB8Yeo4JQ,5272
 torchrl/trainers/helpers/models.py,sha256=ihTERG2c96E8cS3Tnul6a_ys6iDEEJmHh05p9blQTW8,21807
 torchrl/trainers/helpers/replay_buffer.py,sha256=ZUZHOa0TILyeWJ3iahzTJ6UvMl_0FdxuZfJEja94Bn8,2001
 torchrl/trainers/helpers/trainers.py,sha256=j6B5XA7_FFHMQeOIQwjNcO0CGE_4mZKUC9_jH_iqqh4,12071
-torchrl_nightly-2025.7.15.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
-torchrl_nightly-2025.7.15.dist-info/METADATA,sha256=j4RRTr55v80t_WJvysde-14_KWj9VMI3H7eXvuAmbeQ,42990
-torchrl_nightly-2025.7.15.dist-info/WHEEL,sha256=A6iggJuFsuu67bHdjxJADhwSEJmqwgO3xFoNCIwjOxc,115
-torchrl_nightly-2025.7.15.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
-torchrl_nightly-2025.7.15.dist-info/RECORD,,
+torchrl_nightly-2025.7.16.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
+torchrl_nightly-2025.7.16.dist-info/METADATA,sha256=vNEXmcQPsZZGDFx6pCdysVMqIIBiKpH4RpRNO6AMuTs,42990
+torchrl_nightly-2025.7.16.dist-info/WHEEL,sha256=A6iggJuFsuu67bHdjxJADhwSEJmqwgO3xFoNCIwjOxc,115
+torchrl_nightly-2025.7.16.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
+torchrl_nightly-2025.7.16.dist-info/RECORD,,

{torchrl_nightly-2025.7.15.dist-info → torchrl_nightly-2025.7.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.15.dist-info → torchrl_nightly-2025.7.16.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.15.dist-info → torchrl_nightly-2025.7.16.dist-info}/top_level.txt RENAMED Viewed

File without changes