PyPI - torchrl-nightly - Versions diffs - 2025.6.21__cp312-cp312-macosx_10_13_universal2.whl → 2025.6.23__cp312-cp312-macosx_10_13_universal2.whl - Mend

torchrl-nightly 2025.6.21__cp312-cp312-macosx_10_13_universal2.whl → 2025.6.23__cp312-cp312-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

torchrl/_torchrl.cpython-312-darwin.so +0 -0
torchrl/envs/llm/__init__.py +2 -0
torchrl/envs/llm/chat.py +14 -1
torchrl/envs/llm/datasets/gsm8k.py +4 -1
torchrl/envs/llm/datasets/ifeval.py +3 -0
torchrl/envs/llm/reward/gsm8k.py +24 -2
torchrl/envs/llm/transforms/__init__.py +2 -0
torchrl/envs/llm/transforms/dataloading.py +12 -0
torchrl/envs/llm/transforms/reason.py +260 -0
torchrl/envs/transforms/transforms.py +52 -1
torchrl/objectives/ppo.py +13 -7
torchrl/version.py +2 -2
{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/METADATA +117 -107
{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/RECORD +17 -16
{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/WHEEL +1 -1
{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info/licenses}/LICENSE +0 -0
{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/top_level.txt +0 -0

torchrl/_torchrl.cpython-312-darwin.so CHANGED Viewed

Binary file

torchrl/envs/llm/__init__.py CHANGED Viewed

@@ -15,6 +15,7 @@ from .envs import LLMEnv, LLMHashingEnv
 from .libs import make_mlgym, MLGymWrapper
 from .reward import GSM8KRewardParser, IFEvalScoreData, IfEvalScorer
 from .transforms import (
+    AddThinkingPrompt,
     as_nested_tensor,
     as_padded_tensor,
     BrowserTransform,
@@ -33,6 +34,7 @@ __all__ = [
     "ChatEnv",
     "DataLoadingPrimer",
     "DatasetChatEnv",
+    "AddThinkingPrompt",
     "GSM8KEnv",
     "GSM8KPrepareQuestion",
     "GSM8KRewardParser",

torchrl/envs/llm/chat.py CHANGED Viewed

@@ -284,6 +284,7 @@ class DatasetChatEnv(TransformedEnv):
     Keyword Args:
         dataset (str): The name of the dataset.
+        shuffle (bool, optional): Whether to shuffle the dataset. Defaults to `True`.
         name (str, optional): name of the dataset configuration.
         split (str, optional): the split to use (usually from `"train"`, `"val"` or `"test"`). Defaults to `None` (no split).
         num_envs (int, optional): The number of environments to create. Defaults to `1`.
@@ -317,6 +318,7 @@ class DatasetChatEnv(TransformedEnv):
         self,
         *,
         dataset: str,
+        shuffle: bool = True,
         name: str | None = None,
         split: Literal["train", "val", "test"] | None = None,
         num_envs: int = 1,
@@ -355,7 +357,7 @@ class DatasetChatEnv(TransformedEnv):
         dataloader = DataLoader(  # noqa: TOR401
             dataset,
             batch_size=batch_size_dl,
-            shuffle=True,
+            shuffle=shuffle,
             collate_fn=collate_fn,
             generator=generator,
         )
@@ -375,3 +377,14 @@ class DatasetChatEnv(TransformedEnv):
             apply_template=apply_template,
         )
         return super().__init__(env_base, primer)
+    def reset_dataloader(self):
+        """Reset the dataloader.
+        This is useful when the dataloader is not infinite and we want to reset it.
+        Returns:
+            self: The environment itself.
+        """
+        self.transform[0].reset_dataloader()
+        return self

torchrl/envs/llm/datasets/gsm8k.py CHANGED Viewed

@@ -135,6 +135,7 @@ class GSM8KEnv(DatasetChatEnv):
     Keyword Args:
         dataset (str, optional): The name of the dataset. Defaults to `"gsm8k"`.
+        shuffle (bool, optional): Whether to shuffle the dataset. Defaults to `True`.
         num_envs (int, optional): The number of environments to create. Defaults to `1`.
         repeats (int | None, optional): The number of times to repeat each sample from the dataset (mainly for Monte-Carlo
             based value estimation). If `None`, the dataset is not repeated. Defaults to `None`.
@@ -284,12 +285,13 @@ class GSM8KEnv(DatasetChatEnv):
     SYSTEM_PROMPT = """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
 The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
 The reasoning process and answer are enclosed within <think></think> and <answer></answer> tags, respectively,
-i.e., <think>reasoning process here</think> <answer>answer here</answer>."""
+i.e., <think>reasoning process here</think> <answer>answer here</answer>. The answer should be a number."""
     def __init__(
         self,
         *,
         dataset: str = "gsm8k",
+        shuffle: bool = True,
         num_envs: int = 1,
         repeats: int | None = None,
         batch_size_dl: int = 1,
@@ -307,6 +309,7 @@ i.e., <think>reasoning process here</think> <answer>answer here</answer>."""
             collate_fn = _collate_fn
         super().__init__(
             dataset=dataset,
+            shuffle=shuffle,
             name="main",
             num_envs=num_envs,
             repeats=repeats,

torchrl/envs/llm/datasets/ifeval.py CHANGED Viewed

@@ -41,6 +41,7 @@ class IFEvalEnv(DatasetChatEnv):
     Keyword Args:
         dataset (str, optional): The name of the dataset. Defaults to `"google/IFeval"`.
+        shuffle (bool, optional): Whether to shuffle the dataset. Defaults to `True`.
         num_envs (int, optional): The number of environments to create. Defaults to `1`.
         repeats (int | None, optional): The number of times to repeat each sample from the dataset (mainly for Monte-Carlo
             based value estimation). If `None`, the dataset is not repeated. Defaults to `None`.
@@ -146,6 +147,7 @@ You will be assessed by the content of the answer block only, so make sure it co
         self,
         *,
         dataset: str = "google/IFeval",
+        shuffle: bool = True,
         num_envs: int = 1,
         repeats: int | None = None,
         batch_size_dl: int = 1,
@@ -163,6 +165,7 @@ You will be assessed by the content of the answer block only, so make sure it co
             collate_fn = _collate_fn
         super().__init__(
             dataset=dataset,
+            shuffle=shuffle,
             num_envs=num_envs,
             repeats=repeats,
             batch_size_dl=batch_size_dl,

torchrl/envs/llm/reward/gsm8k.py CHANGED Viewed

@@ -20,6 +20,7 @@ class GSM8KRewardParser(Transform):
         in_keys (list of NestedKey): the input keys. Defaults to `["text_response", "answer"]`.
         out_keys (list of NestedKey): the output keys. Defaults to `[ "reward_answer", "reward_think", "reward_right", "reward_contained", "reward", "success"]`.
         eos_token (str): the end of sentence token. Defaults to `tokenizer.eos_token` if not provided.
+        set_done_if_answer (bool): whether to set the done flag to `True` when an answer is present. Defaults to `True`.
     """
@@ -29,10 +30,18 @@ class GSM8KRewardParser(Transform):
         in_keys: list[NestedKey] | None = None,
         out_keys: list[NestedKey] | None = None,
         eos_token: str | None = None,
+        set_done_if_answer: bool = True,
     ):
         super().__init__()
         self.tokenizer = tokenizer
-        self.eos_token = eos_token if eos_token is not None else tokenizer.eos_token
+        self.eos_token = (
+            eos_token
+            if eos_token is not None
+            else tokenizer.eos_token
+            if tokenizer is not None
+            else None
+        )
+        self.set_done_if_answer = set_done_if_answer
         if in_keys is None:
             in_keys = ["text_response", "answer"]
         if not isinstance(in_keys, list) or len(in_keys) != 2:
@@ -118,7 +127,20 @@ class GSM8KRewardParser(Transform):
             tds = tds.add(
                 next_td_exist, default=torch.zeros((), device=next_tensordict.device)
             )
-        return next_tensordict.update(tds)
+        next_tensordict = next_tensordict.update(tds)
+        if (
+            self.set_done_if_answer
+            and (reward_answer := (next_tensordict["reward_answer"] > 0)).any()
+        ):
+            done = next_tensordict.get("done")
+            if done is not None:
+                next_tensordict.set("done", reward_answer.view_as(done) | done)
+            terminated = next_tensordict.get("terminated")
+            if terminated is not None:
+                next_tensordict.set(
+                    "terminated", reward_answer.view_as(terminated) | terminated
+                )
+        return next_tensordict
     def transform_reward_spec(self, reward_spec: Composite) -> Composite:
         shape = reward_spec.shape + (1, 1)

torchrl/envs/llm/transforms/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .dataloading import as_nested_tensor, as_padded_tensor, DataLoadingPrimer
 from .format import TemplateTransform
 from .kl import KLRewardTransform, RetrieveLogProb
 from .policy_version import PolicyVersion
+from .reason import AddThinkingPrompt
 from .tokenizer import Tokenizer
 from .tools import MCPToolTransform, PythonInterpreter
@@ -19,6 +20,7 @@ __all__ = [
     "MCPToolTransform",
     "PolicyVersion",
     "PythonInterpreter",
+    "AddThinkingPrompt",
     "TemplateTransform",
     "Tokenizer",
     "as_nested_tensor",

torchrl/envs/llm/transforms/dataloading.py CHANGED Viewed

@@ -447,6 +447,18 @@ class DataLoadingPrimer(TensorDictPrimer):
         )
         self._reset_key = "_reset"
+    def reset_dataloader(self):
+        """Reset the dataloader.
+        This is useful when the dataloader is not infinite and we want to reset it.
+        Returns:
+            self: The transform itself.
+        """
+        self._queue.clear()
+        self.endless_dataloader = self._endless_iter(self.dataloader)
+        return self
     @classmethod
     def _endless_iter(self, obj):
         while True:

torchrl/envs/llm/transforms/reason.py ADDED Viewed

@@ -0,0 +1,260 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import re
+from typing import Callable, Literal
+from tensordict import lazy_stack, TensorDictBase
+from torchrl.data.llm.chat import History
+from torchrl.envs import Transform
+from torchrl.envs.common import EnvBase
+class AddThinkingPrompt(Transform):
+    """A transform that adds thinking prompts to encourage the LLM to reconsider its response.
+    This transform can either add a new thinking prompt as a separate message or edit the last
+    assistant response to include a thinking prompt before the final answer. This is useful for
+    training LLMs to self-correct and think more carefully when their initial responses are
+    incorrect or incomplete.
+    Args:
+        cond (Callable[[TensorDictBase], bool], optional): Condition function that determines
+            when to add the thinking prompt. Takes a tensordict and returns `True` if the prompt
+            should be added.
+        prompt (str, optional): The thinking prompt to add. If None, a default prompt is used.
+            Defaults to `"But wait, let me think about this more carefully..."`.
+        random_prompt (bool, optional): Whether to randomly select from predefined prompts.
+            Defaults to `False`.
+        role (Literal["user", "assistant"], optional): The role for the thinking prompt.
+            If `"assistant"`, the prompt is added to the assistant's response. If `"user"`, it's
+            added as a separate user message. Defaults to `"assistant"`.
+        edit_last_turn (bool, optional): Whether to edit the last assistant response instead
+            of adding a new message. Only works with `role="assistant"`. Defaults to `True`.
+        zero_reward (bool, optional): Whether to zero out the reward when the thinking prompt
+            is added. If `None`, defaults to the value of `edit_last_turn`. Defaults to the same value as `edit_last_turn`.
+        undo_done (bool, optional): Whether to undo the done flag when the thinking prompt
+            is added. Defaults to `True`.
+    Examples:
+        >>> from torchrl.envs.llm.transforms import AddThinkingPrompt
+        >>> from torchrl.envs.llm import GSM8KEnv
+        >>> from transformers import AutoTokenizer
+        >>> import torch
+        >>>
+        >>> # Create environment with thinking prompt transform
+        >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B")
+        >>> env = GSM8KEnv(tokenizer=tokenizer, max_steps=10)
+        >>> env = env.append_transform(
+        ...     AddThinkingPrompt(
+        ...         cond=lambda td: td["reward"] < 50,
+        ...         role="assistant",
+        ...         edit_last_turn=True,
+        ...         zero_reward=True,
+        ...         undo_done=True
+        ...     )
+        ... )
+        >>>
+        >>> # Test with wrong answer (low reward)
+        >>> reset = env.reset()
+        >>> wrong_answer = (
+        ...     "<think>Let me solve this step by step. Natalia sold clips to 48 friends in April. "
+        ...     "Then she sold half as many in May. Half of 48 is 24. So in May she sold 24 clips. "
+        ...     "To find the total, I need to add April and May: 48 + 24 = 72. "
+        ...     "Therefore, Natalia sold 72 clips altogether in April and May.</think>"
+        ...     "<answer>322 clips</answer><|im_end|>"
+        ... )
+        >>> reset["text_response"] = [wrong_answer]
+        >>> s = env.step(reset)
+        >>> assert (s["next", "reward"] == 0).all()  # Reward zeroed
+        >>> assert (s["next", "done"] == 0).all()    # Done undone
+        >>> assert s["next", "history"].shape == (1, 3)  # History modified
+        >>>
+        >>> # Test with correct answer (high reward)
+        >>> reset = env.reset()
+        >>> correct_answer = (
+        ...     "<think>Let me solve this step by step. Natalia sold clips to 48 friends in April. "
+        ...     "Then she sold half as many in May. Half of 48 is 24. So in May she sold 24 clips. "
+        ...     "To find the total, I need to add April and May: 48 + 24 = 72. "
+        ...     "Therefore, Natalia sold 72 clips altogether in April and May.</think>"
+        ...     "<answer>72</answer><|im_end|>"
+        ... )
+        >>> reset["text_response"] = [correct_answer]
+        >>> s = env.step(reset)
+        >>> assert (s["next", "reward"] != 0).all()  # Reward not zeroed
+        >>> assert s["next", "done"].all()           # Done remains True
+        >>> assert s["next", "history"].shape == (1, 3)  # History unchanged
+    """
+    # Predefined thinking prompts
+    DEFAULT_PROMPTS = [
+        "But wait, let me think about this more carefully...",
+        "Actually, let me reconsider this...",
+        "Let me think about it step by step...",
+        "Wait, I need to double-check my reasoning...",
+        "Actually, let me think about it more carefully...",
+    ]
+    def __init__(
+        self,
+        cond: Callable[[TensorDictBase], bool],
+        prompt: str | None = None,
+        random_prompt: bool = False,
+        role: Literal["user", "assistant"] = "assistant",
+        edit_last_turn: bool = True,
+        zero_reward: bool | None = None,
+        undo_done: bool = True,
+    ) -> None:
+        super().__init__()
+        # Set the prompt
+        if prompt is None:
+            prompt = self.DEFAULT_PROMPTS[0]
+        self._prompt = prompt
+        self.random_prompt = random_prompt
+        # Set condition and role
+        self.cond = cond
+        self.role = role
+        # Validate edit_last_turn constraint
+        if edit_last_turn and role != "assistant":
+            raise ValueError("edit_last_turn can only be used with role='assistant'")
+        self.edit_last_turn = edit_last_turn
+        # Set zero_reward behavior
+        if zero_reward is None:
+            zero_reward = edit_last_turn
+        self.zero_reward = zero_reward
+        self.undo_done = undo_done
+    @property
+    def prompt(self) -> str:
+        if self.random_prompt:
+            import random
+            return random.choice(self.DEFAULT_PROMPTS)
+        return self._prompt
+    def _step(
+        self, tensordict: TensorDictBase, next_tensordict: TensorDictBase
+    ) -> TensorDictBase:
+        """Process the tensordict and add thinking prompts based on the condition.
+        Args:
+            tensordict: The current tensordict
+            next_tensordict: The next tensordict containing the most recent history and reward
+        Returns:
+            The modified next_tensordict
+        """
+        print("Reward", next_tensordict["reward"])
+        # Handle batch dimensions
+        if next_tensordict.batch_dims >= 1:
+            ntds = []
+            for td, next_td in zip(tensordict.unbind(0), next_tensordict.unbind(0)):
+                ntds.append(self._step(td, next_td))
+            next_tensordict.update(lazy_stack(ntds))
+            return next_tensordict
+        # Check if we should add the thinking prompt
+        if self.cond(next_tensordict):
+            history: History = next_tensordict["history"]
+            last_turn = history[..., -1]
+            if self.edit_last_turn:
+                # Edit the last assistant response
+                content = last_turn.content
+                modified_content = self._replace_answer_with_prompt(content)
+                # Create new history entry with modified content
+                new_turn = History(
+                    role="assistant",
+                    content=modified_content,
+                    batch_size=last_turn.batch_size,
+                    device=last_turn.device,
+                )
+                # Replace the last turn in history
+                history = history[..., :-1].append(new_turn)
+                next_tensordict["history"] = history
+            else:
+                # Add a new message
+                prompt = self.prompt
+                history = history.append(History(role=self.role, content=prompt))
+                next_tensordict["history"] = history
+            if self.undo_done:
+                parent: EnvBase = self.parent
+                if parent is not None:
+                    done_keys = parent.done_keys
+                    for key in done_keys:
+                        done = next_tensordict.get(key)
+                        if done is not None:
+                            next_tensordict.set(key, done.zero_())
+            # Zero out reward if requested
+            if self.zero_reward:
+                parent: EnvBase = self.parent
+                if parent is not None:
+                    reward_keys = parent.reward_keys
+                    for key in reward_keys:
+                        reward = next_tensordict.get(key)
+                        if reward is not None:
+                            next_tensordict.set(key, reward.zero_())
+        return next_tensordict
+    def _replace_answer_with_prompt(self, content: str) -> str:
+        """Replace the answer section with a thinking prompt.
+        This method uses regex to find and replace the <answer>...</answer> section
+        with the thinking prompt, preserving any content before the answer tag.
+        Args:
+            content: The original content string
+        Returns:
+            The modified content with the answer replaced by the thinking prompt
+        """
+        # Pattern to match <answer>...</answer> with optional EOS token
+        answer_pattern = r"<answer>.*?</answer>(?:\s*<\|im_end\|>)?"
+        # Check if there's an answer tag
+        if "<answer>" in content:
+            # Replace the answer section with the thinking prompt
+            prompt = self.prompt
+            # Replace the answer section
+            modified_content = re.sub(answer_pattern, prompt, content, flags=re.DOTALL)
+            # Clean up any trailing whitespace
+            modified_content = modified_content.rstrip()
+        else:
+            # No answer tag found, just append the prompt
+            prompt = self.prompt
+            modified_content = content.rstrip() + "\n\n" + prompt
+        return modified_content
+    def _reset(
+        self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase
+    ) -> TensorDictBase:
+        """Reset the transform state.
+        Args:
+            tensordict: The current tensordict
+            tensordict_reset: The reset tensordict
+        Returns:
+            The reset tensordict
+        """
+        return tensordict_reset

torchrl/envs/transforms/transforms.py CHANGED Viewed

@@ -738,7 +738,7 @@ class Transform(nn.Module):
         self.__dict__.update(state)
     @property
-    def parent(self) -> EnvBase | None:
+    def parent(self) -> TransformedEnv | None:
         """Returns the parent env of the transform.
         The parent env is the env that contains all the transforms up until the current one.
@@ -1249,6 +1249,7 @@ but got an object of type {type(transform)}."""
     def empty_cache(self):
         self.__dict__["_output_spec"] = None
         self.__dict__["_input_spec"] = None
+        self.transform.empty_cache()
         super().empty_cache()
     def append_transform(
@@ -1429,6 +1430,50 @@ class Compose(Transform):
         for t in transforms:
             t.set_container(self)
+    def pop(self, index: int | None = None) -> Transform:
+        """Pop a transform from the chain.
+        Args:
+            index (int, optional): The index of the transform to pop. If None, the last transform is popped.
+        Returns:
+            The popped transform.
+        """
+        if index is None:
+            index = len(self.transforms) - 1
+        result = self.transforms.pop(index)
+        parent = self.parent
+        self.empty_cache()
+        if parent is not None:
+            parent.empty_cache()
+        return result
+    def __delitem__(self, index: int | slice | list):
+        """Delete a transform in the chain.
+        :class:`~torchrl.envs.transforms.Transform` or callable are accepted.
+        """
+        del self.transforms[index]
+        parent = self.parent
+        self.empty_cache()
+        if parent is not None:
+            parent.empty_cache()
+    def __setitem__(
+        self,
+        index: int | slice | list,
+        value: Transform | Callable[[TensorDictBase], TensorDictBase],
+    ):
+        """Set a transform in the chain.
+        :class:`~torchrl.envs.transforms.Transform` or callable are accepted.
+        """
+        self.transforms[index] = value
+        parent = self.parent
+        self.empty_cache()
+        if parent is not None:
+            parent.empty_cache()
     def close(self):
         """Close the transform."""
         for t in self.transforms:
@@ -1594,6 +1639,9 @@ class Compose(Transform):
         else:
             self.transforms.append(transform)
         transform.set_container(self)
+        parent = self.parent
+        if parent is not None:
+            parent.empty_cache()
     def set_container(self, container: Transform | EnvBase) -> None:
         self.reset_parent()
@@ -1626,6 +1674,9 @@ class Compose(Transform):
         # empty cache of all transforms to reset parents and specs
         self.empty_cache()
+        parent = self.parent
+        if parent is not None:
+            parent.empty_cache()
         if index < 0:
             index = index + len(self.transforms)
         transform.eval()

torchrl/objectives/ppo.py CHANGED Viewed

@@ -752,10 +752,10 @@ class PPOLoss(LossModule):
         explained_variance = None
         if self.log_explained_variance:
-            with torch.no_grad():                               # <‑‑ break grad‐flow
-                tgt   = target_return.detach()
-                pred  = state_value.detach()
-                eps   = torch.finfo(tgt.dtype).eps
+            with torch.no_grad():  # <‑‑ break grad‐flow
+                tgt = target_return.detach()
+                pred = state_value.detach()
+                eps = torch.finfo(tgt.dtype).eps
                 resid = torch.var(tgt - pred, unbiased=False, dim=0)
                 total = torch.var(tgt, unbiased=False, dim=0)
                 explained_variance = 1.0 - resid / (total + eps)
@@ -819,7 +819,9 @@ class PPOLoss(LossModule):
                 td_out.set("entropy", entropy.detach().mean())  # for logging
             td_out.set("loss_entropy", self._weighted_loss_entropy(entropy))
         if self._has_critic:
-            loss_critic, value_clip_fraction, explained_variance = self.loss_critic(tensordict)
+            loss_critic, value_clip_fraction, explained_variance = self.loss_critic(
+                tensordict
+            )
             td_out.set("loss_critic", loss_critic)
             if value_clip_fraction is not None:
                 td_out.set("value_clip_fraction", value_clip_fraction)
@@ -1189,7 +1191,9 @@ class ClipPPOLoss(PPOLoss):
                 td_out.set("entropy", entropy.detach().mean())  # for logging
             td_out.set("loss_entropy", self._weighted_loss_entropy(entropy))
         if self._has_critic:
-            loss_critic, value_clip_fraction, explained_variance = self.loss_critic(tensordict)
+            loss_critic, value_clip_fraction, explained_variance = self.loss_critic(
+                tensordict
+            )
             td_out.set("loss_critic", loss_critic)
             if value_clip_fraction is not None:
                 td_out.set("value_clip_fraction", value_clip_fraction)
@@ -1537,7 +1541,9 @@ class KLPENPPOLoss(PPOLoss):
                 td_out.set("entropy", entropy.detach().mean())  # for logging
             td_out.set("loss_entropy", self._weighted_loss_entropy(entropy))
         if self._has_critic:
-            loss_critic, value_clip_fraction, explained_variance = self.loss_critic(tensordict_copy)
+            loss_critic, value_clip_fraction, explained_variance = self.loss_critic(
+                tensordict_copy
+            )
             td_out.set("loss_critic", loss_critic)
             if value_clip_fraction is not None:
                 td_out.set("value_clip_fraction", value_clip_fraction)

torchrl/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = '2025.6.21'
-git_version = '77dbc6c9ffbce3d2ce3f26b659355cd46d8132c3'
+__version__ = '2025.6.23'
+git_version = 'ed051bc3e5b33d00f64f2a785023bca9a6c72c9b'

{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: torchrl-nightly
-Version: 2025.6.21
+Version: 2025.6.23
 Home-page: https://github.com/pytorch/rl
 Author: torchrl contributors
 Author-email: vmoens@fb.com
@@ -18,119 +18,129 @@ Classifier: License :: OSI Approved :: BSD License
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: torch >=2.1.0
+Requires-Dist: torch>=2.1.0
 Requires-Dist: numpy
 Requires-Dist: packaging
 Requires-Dist: cloudpickle
 Requires-Dist: tensordict-nightly
-Provides-Extra: all
-Requires-Dist: accelerate ; extra == 'all'
-Requires-Dist: datasets ; extra == 'all'
-Requires-Dist: dm-meltingpot ; extra == 'all'
-Requires-Dist: dm-control ; extra == 'all'
-Requires-Dist: einops ; extra == 'all'
-Requires-Dist: git ; extra == 'all'
-Requires-Dist: gymnasium <1.0 ; extra == 'all'
-Requires-Dist: gymnasium[atari] ; extra == 'all'
-Requires-Dist: h5py ; extra == 'all'
-Requires-Dist: huggingface-hub ; extra == 'all'
-Requires-Dist: hydra-core >=1.1 ; extra == 'all'
-Requires-Dist: hydra-submitit-launcher ; extra == 'all'
-Requires-Dist: immutabledict ; extra == 'all'
-Requires-Dist: langdetect ; extra == 'all'
-Requires-Dist: minari ; extra == 'all'
-Requires-Dist: moviepy <2.0.0 ; extra == 'all'
-Requires-Dist: mujoco ; extra == 'all'
-Requires-Dist: nltk ; extra == 'all'
-Requires-Dist: open-spiel >=1.5 ; extra == 'all'
-Requires-Dist: pandas ; extra == 'all'
-Requires-Dist: pettingzoo >=1.24.1 ; extra == 'all'
-Requires-Dist: pillow ; extra == 'all'
-Requires-Dist: playwright ; extra == 'all'
-Requires-Dist: protobuf ; extra == 'all'
-Requires-Dist: pytest ; extra == 'all'
-Requires-Dist: pytest-asyncio ; extra == 'all'
-Requires-Dist: pytest-benchmark ; extra == 'all'
-Requires-Dist: pytest-cov ; extra == 'all'
-Requires-Dist: pytest-error-for-skips ; extra == 'all'
-Requires-Dist: pytest-instafail ; extra == 'all'
-Requires-Dist: pytest-mock ; extra == 'all'
-Requires-Dist: pytest-rerunfailures ; extra == 'all'
-Requires-Dist: pyyaml ; extra == 'all'
-Requires-Dist: requests ; extra == 'all'
-Requires-Dist: safetensors ; extra == 'all'
-Requires-Dist: scikit-learn ; extra == 'all'
-Requires-Dist: scipy ; extra == 'all'
-Requires-Dist: sentencepiece ; extra == 'all'
-Requires-Dist: tensorboard ; extra == 'all'
-Requires-Dist: torch >=2.7.0 ; extra == 'all'
-Requires-Dist: torchsnapshot ; extra == 'all'
-Requires-Dist: torchvision ; extra == 'all'
-Requires-Dist: tqdm ; extra == 'all'
-Requires-Dist: transformers ; extra == 'all'
-Requires-Dist: vllm ; extra == 'all'
-Requires-Dist: vmas >=1.2.10 ; extra == 'all'
-Requires-Dist: wandb ; extra == 'all'
 Provides-Extra: atari
-Requires-Dist: gymnasium[atari] ; extra == 'atari'
-Provides-Extra: checkpointing
-Requires-Dist: torchsnapshot ; extra == 'checkpointing'
-Provides-Extra: dm_control
-Requires-Dist: dm-control ; extra == 'dm_control'
-Provides-Extra: gym_continuous
-Requires-Dist: gymnasium <1.0 ; extra == 'gym_continuous'
-Requires-Dist: mujoco ; extra == 'gym_continuous'
-Provides-Extra: llm
-Requires-Dist: transformers ; extra == 'llm'
-Requires-Dist: vllm ; extra == 'llm'
-Requires-Dist: playwright ; extra == 'llm'
-Requires-Dist: datasets ; extra == 'llm'
-Requires-Dist: langdetect ; extra == 'llm'
-Requires-Dist: nltk ; extra == 'llm'
-Requires-Dist: immutabledict ; extra == 'llm'
-Requires-Dist: accelerate ; extra == 'llm'
-Requires-Dist: sentencepiece ; extra == 'llm'
-Requires-Dist: protobuf ; extra == 'llm'
-Requires-Dist: einops ; extra == 'llm'
-Requires-Dist: safetensors ; extra == 'llm'
-Provides-Extra: marl
-Requires-Dist: vmas >=1.2.10 ; extra == 'marl'
-Requires-Dist: pettingzoo >=1.24.1 ; extra == 'marl'
-Requires-Dist: dm-meltingpot ; extra == 'marl'
-Provides-Extra: offline-data
-Requires-Dist: huggingface-hub ; extra == 'offline-data'
-Requires-Dist: minari ; extra == 'offline-data'
-Requires-Dist: requests ; extra == 'offline-data'
-Requires-Dist: tqdm ; extra == 'offline-data'
-Requires-Dist: torchvision ; extra == 'offline-data'
-Requires-Dist: scikit-learn ; extra == 'offline-data'
-Requires-Dist: pandas ; extra == 'offline-data'
-Requires-Dist: h5py ; extra == 'offline-data'
-Requires-Dist: pillow ; extra == 'offline-data'
-Provides-Extra: open_spiel
-Requires-Dist: open-spiel >=1.5 ; extra == 'open_spiel'
+Requires-Dist: gymnasium[atari]; extra == "atari"
+Provides-Extra: dm-control
+Requires-Dist: dm_control; extra == "dm-control"
+Provides-Extra: replay-buffer
+Requires-Dist: torch>=2.7.0; extra == "replay-buffer"
+Provides-Extra: gym-continuous
+Requires-Dist: gymnasium<1.0; extra == "gym-continuous"
+Requires-Dist: mujoco; extra == "gym-continuous"
 Provides-Extra: rendering
-Requires-Dist: moviepy <2.0.0 ; extra == 'rendering'
-Provides-Extra: replay_buffer
-Requires-Dist: torch >=2.7.0 ; extra == 'replay_buffer'
+Requires-Dist: moviepy<2.0.0; extra == "rendering"
 Provides-Extra: tests
-Requires-Dist: pytest ; extra == 'tests'
-Requires-Dist: pyyaml ; extra == 'tests'
-Requires-Dist: pytest-instafail ; extra == 'tests'
-Requires-Dist: scipy ; extra == 'tests'
-Requires-Dist: pytest-mock ; extra == 'tests'
-Requires-Dist: pytest-cov ; extra == 'tests'
-Requires-Dist: pytest-asyncio ; extra == 'tests'
-Requires-Dist: pytest-benchmark ; extra == 'tests'
-Requires-Dist: pytest-rerunfailures ; extra == 'tests'
-Requires-Dist: pytest-error-for-skips ; extra == 'tests'
+Requires-Dist: pytest; extra == "tests"
+Requires-Dist: pyyaml; extra == "tests"
+Requires-Dist: pytest-instafail; extra == "tests"
+Requires-Dist: scipy; extra == "tests"
+Requires-Dist: pytest-mock; extra == "tests"
+Requires-Dist: pytest-cov; extra == "tests"
+Requires-Dist: pytest-asyncio; extra == "tests"
+Requires-Dist: pytest-benchmark; extra == "tests"
+Requires-Dist: pytest-rerunfailures; extra == "tests"
+Requires-Dist: pytest-error-for-skips; extra == "tests"
 Provides-Extra: utils
-Requires-Dist: tensorboard ; extra == 'utils'
-Requires-Dist: wandb ; extra == 'utils'
-Requires-Dist: tqdm ; extra == 'utils'
-Requires-Dist: hydra-core >=1.1 ; extra == 'utils'
-Requires-Dist: hydra-submitit-launcher ; extra == 'utils'
-Requires-Dist: git ; extra == 'utils'
+Requires-Dist: tensorboard; extra == "utils"
+Requires-Dist: wandb; extra == "utils"
+Requires-Dist: tqdm; extra == "utils"
+Requires-Dist: hydra-core>=1.1; extra == "utils"
+Requires-Dist: hydra-submitit-launcher; extra == "utils"
+Requires-Dist: git; extra == "utils"
+Provides-Extra: checkpointing
+Requires-Dist: torchsnapshot; extra == "checkpointing"
+Provides-Extra: offline-data
+Requires-Dist: huggingface_hub; extra == "offline-data"
+Requires-Dist: minari; extra == "offline-data"
+Requires-Dist: requests; extra == "offline-data"
+Requires-Dist: tqdm; extra == "offline-data"
+Requires-Dist: torchvision; extra == "offline-data"
+Requires-Dist: scikit-learn; extra == "offline-data"
+Requires-Dist: pandas; extra == "offline-data"
+Requires-Dist: h5py; extra == "offline-data"
+Requires-Dist: pillow; extra == "offline-data"
+Provides-Extra: marl
+Requires-Dist: vmas>=1.2.10; extra == "marl"
+Requires-Dist: pettingzoo>=1.24.1; extra == "marl"
+Requires-Dist: dm-meltingpot; extra == "marl"
+Provides-Extra: open-spiel
+Requires-Dist: open_spiel>=1.5; extra == "open-spiel"
+Provides-Extra: llm
+Requires-Dist: transformers; extra == "llm"
+Requires-Dist: vllm; extra == "llm"
+Requires-Dist: playwright; extra == "llm"
+Requires-Dist: datasets; extra == "llm"
+Requires-Dist: langdetect; extra == "llm"
+Requires-Dist: nltk; extra == "llm"
+Requires-Dist: immutabledict; extra == "llm"
+Requires-Dist: accelerate; extra == "llm"
+Requires-Dist: sentencepiece; extra == "llm"
+Requires-Dist: protobuf; extra == "llm"
+Requires-Dist: einops; extra == "llm"
+Requires-Dist: safetensors; extra == "llm"
+Provides-Extra: all
+Requires-Dist: accelerate; extra == "all"
+Requires-Dist: datasets; extra == "all"
+Requires-Dist: dm-meltingpot; extra == "all"
+Requires-Dist: dm_control; extra == "all"
+Requires-Dist: einops; extra == "all"
+Requires-Dist: git; extra == "all"
+Requires-Dist: gymnasium<1.0; extra == "all"
+Requires-Dist: gymnasium[atari]; extra == "all"
+Requires-Dist: h5py; extra == "all"
+Requires-Dist: huggingface_hub; extra == "all"
+Requires-Dist: hydra-core>=1.1; extra == "all"
+Requires-Dist: hydra-submitit-launcher; extra == "all"
+Requires-Dist: immutabledict; extra == "all"
+Requires-Dist: langdetect; extra == "all"
+Requires-Dist: minari; extra == "all"
+Requires-Dist: moviepy<2.0.0; extra == "all"
+Requires-Dist: mujoco; extra == "all"
+Requires-Dist: nltk; extra == "all"
+Requires-Dist: open_spiel>=1.5; extra == "all"
+Requires-Dist: pandas; extra == "all"
+Requires-Dist: pettingzoo>=1.24.1; extra == "all"
+Requires-Dist: pillow; extra == "all"
+Requires-Dist: playwright; extra == "all"
+Requires-Dist: protobuf; extra == "all"
+Requires-Dist: pytest; extra == "all"
+Requires-Dist: pytest-asyncio; extra == "all"
+Requires-Dist: pytest-benchmark; extra == "all"
+Requires-Dist: pytest-cov; extra == "all"
+Requires-Dist: pytest-error-for-skips; extra == "all"
+Requires-Dist: pytest-instafail; extra == "all"
+Requires-Dist: pytest-mock; extra == "all"
+Requires-Dist: pytest-rerunfailures; extra == "all"
+Requires-Dist: pyyaml; extra == "all"
+Requires-Dist: requests; extra == "all"
+Requires-Dist: safetensors; extra == "all"
+Requires-Dist: scikit-learn; extra == "all"
+Requires-Dist: scipy; extra == "all"
+Requires-Dist: sentencepiece; extra == "all"
+Requires-Dist: tensorboard; extra == "all"
+Requires-Dist: torch>=2.7.0; extra == "all"
+Requires-Dist: torchsnapshot; extra == "all"
+Requires-Dist: torchvision; extra == "all"
+Requires-Dist: tqdm; extra == "all"
+Requires-Dist: transformers; extra == "all"
+Requires-Dist: vllm; extra == "all"
+Requires-Dist: vmas>=1.2.10; extra == "all"
+Requires-Dist: wandb; extra == "all"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
 [![Unit-tests](https://github.com/pytorch/rl/actions/workflows/test-linux.yml/badge.svg)](https://github.com/pytorch/rl/actions/workflows/test-linux.yml)
 [![Documentation](https://img.shields.io/badge/Documentation-blue.svg)](https://pytorch.org/rl/)

{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/RECORD RENAMED Viewed

@@ -3,9 +3,9 @@ build_tools/setup_helpers/__init__.py,sha256=7l8TvVqxKezgzKCLuRv20mvGLloprFVZYm8
 build_tools/setup_helpers/extension.py,sha256=4-PDLr-pw40bJnd9SfxnTaSjUyuXU_Tg8yOg69Kl0o4,5914
 torchrl/__init__.py,sha256=mhDBx2UIuBKc0gmi8dVNHokQ6tCbIovruZmyAxcSsy8,2938
 torchrl/_extension.py,sha256=z7wQ8i1iYWYcnygq_j0nq9sT-koY13tfHhTLNbMk17Q,2353
-torchrl/_torchrl.cpython-312-darwin.so,sha256=TU6MyMJjef82BMjWzfoA-jerlMeKCo4x02TNL8y-IrU,1691072
+torchrl/_torchrl.cpython-312-darwin.so,sha256=2G08KUB1lgllQaOiK_aiC-u0bm9sPqvrm5fF8LPT_bc,1691072
 torchrl/_utils.py,sha256=Cw5EG6x5oSZF1iE3YCs1a32VUKp0rTXIs2u67q9zKUI,41078
-torchrl/version.py,sha256=9TpIavFD2hzZlpmXpm_tjHh5avX5AXfLXuNk_r1S5wc,83
+torchrl/version.py,sha256=4YeSUDGHrB3YeBHYcVaU3pmlvqaCzwjkSwnDvVAiGUQ,83
 torchrl/collectors/__init__.py,sha256=hJ3JD6shRku0BL6SzJQq44FZ5Q1RGR8LealFyU3FRn4,799
 torchrl/collectors/collectors.py,sha256=CdTerIwhCTr6n5OoJLNad0bNQ5OLliPZFWkU18QBKSA,177625
 torchrl/collectors/utils.py,sha256=MlXrkYuDmV0Em-tVNQiLL32FWgPNDgceYYG_GgpiviA,11320
@@ -97,28 +97,29 @@ torchrl/envs/libs/smacv2.py,sha256=i0TRHuZ9S9v0NfufPgQAcTlvAjf6JKv8hHvOzjSgsaw,2
 torchrl/envs/libs/unity_mlagents.py,sha256=Z3qSU0H3o2NXbS2lNvQ7OmYxkr3AWAMyRHfxeCtNZrk,49667
 torchrl/envs/libs/utils.py,sha256=RgiR16KJWFEtQim44-AIcHByGTq_NrtpjWoYIC13aYA,5207
 torchrl/envs/libs/vmas.py,sha256=a71_jU4r627hFXcMsT5wNSb4TMpyd3punLdOF3Cc8O0,36297
-torchrl/envs/llm/__init__.py,sha256=o8uAVGHYngy_k6xM5qIkqgHaz__S1HyG7QjLd78gtaA,1265
-torchrl/envs/llm/chat.py,sha256=mVLjmBTwd6IWdlKJMRcynDJNVVbiHjCop5EVUXpaaAA,17794
+torchrl/envs/llm/__init__.py,sha256=HGpJZYZHR3tJVZ0EKq-Zh2r715JSH_H82PNa1z8F9V0,1313
+torchrl/envs/llm/chat.py,sha256=2j1S1-_EC52_RpIdN4Gy6_mkxBG8aXL8Yo93SQ2YRIM,18201
 torchrl/envs/llm/envs.py,sha256=Er-ahjgvtYG4LB7_EWOMbdobiUV5DOHPBQYkVTu80r4,34677
 torchrl/envs/llm/datasets/__init__.py,sha256=FFethtv8unJWzphGLPQVC5QD9NMdaygEjx25O1DHHZk,473
-torchrl/envs/llm/datasets/gsm8k.py,sha256=wTntpV-bi0gbyvJ-JnuHQmPXjXgV4hEssGFed8GRGGc,15299
-torchrl/envs/llm/datasets/ifeval.py,sha256=fVbMSVjpnlZR36B0yDUgDcM1Ye-EP6ui7g9nPRHX_vc,8327
+torchrl/envs/llm/datasets/gsm8k.py,sha256=pAOWJh8ArCvTdOKWmr7bQb4o6Hqpoq6PjS0h9HAaRDE,15475
+torchrl/envs/llm/datasets/ifeval.py,sha256=dzvSgOgqVxogFq0rC8O1SMqPfjQD8NEAGCGg5LnmXiU,8472
 torchrl/envs/llm/libs/__init__.py,sha256=vhEm5Fhz1sLWt107zfZLy5pzGmfQi0fNBGazTq1m7dU,266
 torchrl/envs/llm/libs/mlgym.py,sha256=ECnkrNoPV73L1fIO05SlTTXuTSNOM2pdX6aJcEYJVlo,31372
 torchrl/envs/llm/reward/__init__.py,sha256=a-Xsye29z2LugO1cOCFM2FNsqNwEp-5XwQk4saVQlu8,370
-torchrl/envs/llm/reward/gsm8k.py,sha256=2pUXYkCw6_arM6HCZJcrEYwRZMDntsFAzdpf3QXNthI,7862
+torchrl/envs/llm/reward/gsm8k.py,sha256=GYd0l_YRaIiivZBLRGjhJeQiFj6jm-BUh9T3pEze3a8,8760
 torchrl/envs/llm/reward/ifeval/__init__.py,sha256=g5NtrwfwqK22hRcoIdz8-KWBh5Ogre9J-Bf3uGWE9Pg,314
 torchrl/envs/llm/reward/ifeval/_instructions.py,sha256=rAoTdwG42smCLJgwW7kAwJrNonjIS6OwdohDE70oMOA,61696
 torchrl/envs/llm/reward/ifeval/_instructions_main.py,sha256=CofKXvG0J2H-1ZXP1fL6UZI8ArNCIO2w5R_37drRIW8,4117
 torchrl/envs/llm/reward/ifeval/_instructions_registry.py,sha256=3_guc8LZ0mWQc-n6E4cQgYMgZRYa6xfgvXgrze9aO_w,3814
 torchrl/envs/llm/reward/ifeval/_instructions_util.py,sha256=aA3fupO8MvqBCqD7Y_Qk6y32toWF1lZGAflWON1ruXM,26042
 torchrl/envs/llm/reward/ifeval/_scorer.py,sha256=zJHBgaGlluEv6czsI6ZtLqArV_J_W9zY7UPAJhT5YIo,14563
-torchrl/envs/llm/transforms/__init__.py,sha256=roEOZVFOs1PhC1cGF-LIXQt5DlXZx6mgIJ-1k0JDTfI,788
+torchrl/envs/llm/transforms/__init__.py,sha256=PNwdol9ItWXPfzyKcf4Id7Yu6oKFFAtta2J78ksSrf0,851
 torchrl/envs/llm/transforms/browser.py,sha256=zF7jHHHrdpxUCjFFtiYK-vhw-p1YqsqwP8_b4SiK0Rs,10423
-torchrl/envs/llm/transforms/dataloading.py,sha256=dv4IV3OWEa6-evxBk3WAZjkBi1_yKUs2NQ2gGmL2lKQ,24533
+torchrl/envs/llm/transforms/dataloading.py,sha256=4P-e5yjUdxRtfaOmMxtNRisJLLtCqurAhWAqV7GiXHI,24872
 torchrl/envs/llm/transforms/format.py,sha256=ESn0S9k5G4FQPBICq9h6ZsLKXZqiU71tYW8UnW4rgLI,2519
 torchrl/envs/llm/transforms/kl.py,sha256=N68378chSx54X5a7YLJzIV6d870H5xrBb5-qWqzpX1U,22744
 torchrl/envs/llm/transforms/policy_version.py,sha256=by2TjsZLwVjQbq7ggBoAco2Iq_2aEYgyxh9asTXL1vk,6893
+torchrl/envs/llm/transforms/reason.py,sha256=Q3LRbl7QmatRfAt7bOjOw_aLuZJgqRZvmKwT67cWX7s,10561
 torchrl/envs/llm/transforms/tokenizer.py,sha256=CcuKRu33YnyDgLtQtyxTGDFC6iI3b3fUA6Nb1Lnh7h8,13953
 torchrl/envs/llm/transforms/tools.py,sha256=I-HR0zjH4tFMp9xPH556H5Q5JqmqXdsAXwElAR93e5U,29498
 torchrl/envs/model_based/__init__.py,sha256=AkgZvTP5AerIg6ZwXfCfk3bnSr01hlwZWDiRd3UjBE4,331
@@ -131,7 +132,7 @@ torchrl/envs/transforms/llm.py,sha256=rQDzuut807wvFpSPCm5tynt8-cMKTgVKVjSVu9D99P
 torchrl/envs/transforms/r3m.py,sha256=sdTVLpnxHfzFVo5rO8WnXf2uUg9cr4LBOLBsWaFgGT8,13478
 torchrl/envs/transforms/rb_transforms.py,sha256=pxtL1VHvzEq6djuWsccLu4P-tnbAKsavemLGyt80I6c,7448
 torchrl/envs/transforms/rlhf.py,sha256=lOVXYqQaoDfm4_n77Dxw_wjicBpMtDvavKmBIK2N3lU,628
-torchrl/envs/transforms/transforms.py,sha256=59WHIbGryXTSvswHxvQSxHAza1k5-qtxwfWRzd6MQ6M,479710
+torchrl/envs/transforms/transforms.py,sha256=8aXDl-NfugfqlBK-FcPBKYuDU-oIXeabW3uIXZ6QMik,481272
 torchrl/envs/transforms/utils.py,sha256=VXGH69Jxdmnw5eP9L3uM8ronQA5aIbT-Ktpjn5Frds0,2058
 torchrl/envs/transforms/vc1.py,sha256=mho5BvdAK-f9hD9t-iah52wT2B06qPmaJO7chrfIOWY,10534
 torchrl/envs/transforms/vecnorm.py,sha256=XahMcWvK3zjOB6EACSZtJ6UMP3yQ2zD9xf87UEB37Eg,34047
@@ -188,7 +189,7 @@ torchrl/objectives/dreamer.py,sha256=vIJQN91oPXYnPubDFQpaF5d3fR_WwIYuIVYtoCvw0TY
 torchrl/objectives/functional.py,sha256=ZaglBjEGuOTNGeFA-Ox-ugZVcNegQMUj--KWHDRBmaU,2106
 torchrl/objectives/gail.py,sha256=0m34XmcN-EDk5OfNIo5bKYbKKZfATsYRv4zQe3v2UwA,9576
 torchrl/objectives/iql.py,sha256=1jvlSznWke6NZSwfuYyHVnVBE7Cz3q169GnCRC7iel4,42991
-torchrl/objectives/ppo.py,sha256=qoG7YiHHz6M5jn3XgtE32AmMERianoZqs-lSHQA35Rg,75284
+torchrl/objectives/ppo.py,sha256=4fzV-DSFSGv0VHrI0YCk0EBUB35gkuWyo3j_4KhSoqE,75340
 torchrl/objectives/redq.py,sha256=4usM-nG2UWujeL-VEqzf7-uOwRFx6itkKCeitKuJhtw,28507
 torchrl/objectives/reinforce.py,sha256=ySXLp5C-OOUYayqjrf4taQmL8LgRvMgPCgHDsle8JDc,22339
 torchrl/objectives/sac.py,sha256=Oq9Iq90s9KFbnM4KSRUd2onU1JfW6aW80LWGdtO0CY8,63993
@@ -223,8 +224,8 @@ torchrl/trainers/helpers/losses.py,sha256=qH-2YJwMtDAYAPXTTYy3cOPiq4ILC6xTjfnGUU
 torchrl/trainers/helpers/models.py,sha256=ihTERG2c96E8cS3Tnul6a_ys6iDEEJmHh05p9blQTW8,21807
 torchrl/trainers/helpers/replay_buffer.py,sha256=ZUZHOa0TILyeWJ3iahzTJ6UvMl_0FdxuZfJEja94Bn8,2001
 torchrl/trainers/helpers/trainers.py,sha256=j6B5XA7_FFHMQeOIQwjNcO0CGE_4mZKUC9_jH_iqqh4,12071
-torchrl_nightly-2025.6.21.dist-info/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
-torchrl_nightly-2025.6.21.dist-info/METADATA,sha256=FbFAW_HINLwiA_5Vi6WG31aQU6K9088TRaz-QcHO5nA,39023
-torchrl_nightly-2025.6.21.dist-info/WHEEL,sha256=3K-ZUOK4xUOAXNNICzKF-g_5h4y1OCqLtypLLrsO4lc,115
-torchrl_nightly-2025.6.21.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
-torchrl_nightly-2025.6.21.dist-info/RECORD,,
+torchrl_nightly-2025.6.23.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
+torchrl_nightly-2025.6.23.dist-info/METADATA,sha256=Akc3RKlo_nIxX-wV8Dm44Cgrmskwm90hWXd6FV5xJe0,39131
+torchrl_nightly-2025.6.23.dist-info/WHEEL,sha256=9_3tTSxMJq-dgdzMiScNvtT5eTBVd3l6RgHS7HwTzpA,115
+torchrl_nightly-2025.6.23.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
+torchrl_nightly-2025.6.23.dist-info/RECORD,,

{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.2.0)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: false
 Tag: cp312-cp312-macosx_10_13_universal2

{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.6.21.dist-info → torchrl_nightly-2025.6.23.dist-info}/top_level.txt RENAMED Viewed

File without changes