PyPI - torchrl-nightly - Versions diffs - 2025.7.16__cp310-cp310-win_amd64.whl → 2025.7.17__cp310-cp310-win_amd64.whl - Mend

torchrl-nightly 2025.7.16__cp310-cp310-win_amd64.whl → 2025.7.17__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

torchrl/_torchrl.cp310-win_amd64.pyd CHANGED Viewed

Binary file

torchrl/collectors/collectors.py CHANGED Viewed

@@ -686,6 +686,10 @@ class SyncDataCollector(DataCollectorBase):
                 policy = RandomPolicy(env.full_action_spec)
         elif policy_factory is not None:
             raise TypeError("policy_factory cannot be used with policy argument.")
+        # If the underlying policy has a state_dict, we keep a reference to the policy and
+        # do all policy weight saving/loading through it
+        if hasattr(policy, "state_dict"):
+            self._policy_w_state_dict = policy
         if trust_policy is None:
             trust_policy = isinstance(policy, (RandomPolicy, CudaGraphModule))
@@ -1686,8 +1690,8 @@ class SyncDataCollector(DataCollectorBase):
         else:
             env_state_dict = OrderedDict()
-        if hasattr(self.policy, "state_dict"):
-            policy_state_dict = self.policy.state_dict()
+        if hasattr(self, "_policy_w_state_dict"):
+            policy_state_dict = self._policy_w_state_dict.state_dict()
             state_dict = OrderedDict(
                 policy_state_dict=policy_state_dict,
                 env_state_dict=env_state_dict,
@@ -1711,7 +1715,13 @@ class SyncDataCollector(DataCollectorBase):
         if strict or "env_state_dict" in state_dict:
             self.env.load_state_dict(state_dict["env_state_dict"], **kwargs)
         if strict or "policy_state_dict" in state_dict:
-            self.policy.load_state_dict(state_dict["policy_state_dict"], **kwargs)
+            if not hasattr(self, "_policy_w_state_dict"):
+                raise ValueError(
+                    "Underlying policy does not have state_dict to load policy_state_dict into."
+                )
+            self._policy_w_state_dict.load_state_dict(
+                state_dict["policy_state_dict"], **kwargs
+            )
         self._frames = state_dict["frames"]
         self._iter = state_dict["iter"]

torchrl/modules/distributions/discrete.py CHANGED Viewed

@@ -352,7 +352,7 @@ class MaskedCategorical(D.Categorical):
                 logits = self.logits
                 if logits.ndim > 2:
                     # Bring channels in 2nd dim
-                    logits = logits.transpose(-1, 1)
+                    logits = logits.permute(0, -1, *range(1, logits.ndim - 1))
                 original_value_shape = None
                 if logits.ndim == 1 and value.ndim >= 1:
                     if value.ndim >= 2:

torchrl/modules/llm/policies/common.py CHANGED Viewed

@@ -9,8 +9,8 @@ import weakref
 from typing import Any, Literal, overload
 import torch
-from tensordict import NestedKey, TensorDictBase
-from tensordict.nn import TensorDictModuleBase, TensorDictSequential
+from tensordict import lazy_stack, NestedKey, TensorDictBase
+from tensordict.nn import TensorDictModuleBase
 from tensordict.tensorclass import TensorClass
 from tensordict.utils import _zip_strict
 from torch import distributions as D
@@ -175,29 +175,35 @@ class ChatHistory(TensorClass["nocast"]):
     def __post_init__(self):
         # Check that all history objects have one more batch dimension than the ChatHistory object
         if self.prompt is not None:
-            if self.prompt.batch_dims != self.batch_dims + 1:
+            if getattr(self.prompt, "batch_dims", None) == self.batch_dims:
                 warnings.warn(
                     "Prompt history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
                     f"got {self.prompt.batch_dims} and {self.batch_dims}. "
                     "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
                 )
-                self.prompt = self.prompt.unsqueeze(-1)
+                self.prompt = lazy_stack(
+                    [self.prompt], -1
+                )  # equivalent to unsqueeze(-1) but make sure it's a lazy stack
         if self.response is not None:
-            if self.response.batch_dims != self.batch_dims + 1:
+            if getattr(self.response, "batch_dims", None) == self.batch_dims:
                 warnings.warn(
                     "Response history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
                     f"got {self.response.batch_dims} and {self.batch_dims}. "
                     "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
                 )
-                self.response = self.response.unsqueeze(-1)
+                self.response = lazy_stack(
+                    [self.response], -1
+                )  # equivalent to unsqueeze(-1) but make sure it's a lazy stack
         if self.full is not None:
-            if self.full.batch_dims != self.batch_dims + 1:
+            if getattr(self.full, "batch_dims", None) == self.batch_dims:
                 warnings.warn(
                     "Full history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
                     f"got {self.full.batch_dims} and {self.batch_dims}. "
                     "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
                 )
-                self.full = self.full.unsqueeze(-1)
+                self.full = lazy_stack(
+                    [self.full], -1
+                )  # equivalent to unsqueeze(-1) but make sure it's a lazy stack
 class LogProbs(TensorClass["nocast"]):
@@ -482,7 +488,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         # Get logits/log-probs
         if as_padded_tensor is None:
@@ -557,7 +563,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_prompt_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         # Try to get prompt tokens first
         if self.pad_output:
@@ -668,7 +674,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_assistant_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         # Update the tokens key to reflect the tokenized history when querying the log-probs
         tensordict.update(
             td_out,
@@ -737,7 +743,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_attention_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         if self.pad_output:
             logits = td_out.get(logits_key)
             attention_mask = td_out.get(attention_mask_key)
@@ -794,7 +800,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_custom_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         if self.pad_output:
             logits = td_out.get(logits_key)
         else:
@@ -841,8 +847,24 @@ class LLMWrapperBase(TensorDictModuleBase):
         """
         return self._get_dist_with_attention_mask(tensordict, **kwargs)
-    # Sampling is taken care of by the sub-modules
-    forward = TensorDictSequential.forward
+    def forward(
+        self,
+        tensordict: TensorDictBase,
+        *,
+        tensordict_out: TensorDictBase | None = None,
+        logits_only: bool = False,
+        **kwargs,
+    ) -> TensorDictBase:  # noqa: D417
+        """Forward pass for the LLM policy.
+        Args:
+            tensordict (TensorDictBase): The input tensordict.
+        Keyword Args:
+            tensordict_out (TensorDictBase | None): The output tensordict.
+            logits_only (bool): Whether to return only the logits. Only effective if generate=False. Defaults to `False`.
+        """
+        raise NotImplementedError
     def _check_padded(self, val: torch.Tensor) -> torch.Tensor:
         """Check that a value is a padded tensor."""

torchrl/modules/llm/policies/transformers_wrapper.py CHANGED Viewed

@@ -13,6 +13,7 @@ from typing import Literal
 import torch
 from tensordict import (
     lazy_stack,
+    LazyStackedTensorDict,
     MetaData,
     NonTensorStack,
     set_list_to_stack,
@@ -468,19 +469,32 @@ class TransformersWrapper(LLMWrapperBase):
     def forward(
         self,
         tensordict: TensorDictBase,
+        *,
         tensordict_out: TensorDictBase | None = None,
+        logits_only: bool = False,
         **kwargs,
     ) -> TensorDictBase:
+        tensordict_orig = tensordict
         if not tensordict.ndim:
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim == 0. If this is needed, "
+                    "please submit an issue on github."
+                )
             # unsqueeze - squeeze the input
-            try:
-                return self(lazy_stack([tensordict])).squeeze(0)
-            except Exception as e:
-                raise RuntimeError(
-                    f"Unsqueeze/squeeze failed. Inputs to {type(self).__name__} should ideally be 1 dimensional."
-                ) from e
+            return self.forward(lazy_stack([tensordict]), logits_only=logits_only)[0]
         elif tensordict.ndim > 1:
-            return self(tensordict.reshape(-1)).view(tensordict.shape)
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim > 1. If this is needed, "
+                    "please submit an issue on github."
+                )
+            return self.forward(tensordict.reshape(-1), logits_only=logits_only).view(
+                tensordict.shape
+            )
+        if not isinstance(tensordict, LazyStackedTensorDict):
+            tensordict = tensordict.to_lazystack(0)
         _source_device = None
         if self._device:
@@ -517,17 +531,23 @@ class TransformersWrapper(LLMWrapperBase):
             if self.generate:
                 out = self._from_transformers_generate_history(tensordict, cfg, out)
             else:
-                out = self._from_transformers_logprobs_history(tensordict, cfg, out)
+                out = self._from_transformers_logprobs_history(
+                    tensordict, cfg, out, logits_only=logits_only
+                )
         elif self.input_mode == "text":
             if self.generate:
                 out = self._from_transformers_generate_text(tensordict, cfg, out)
             else:
-                out = self._from_transformers_logprobs_text(tensordict, cfg, out)
+                out = self._from_transformers_logprobs_text(
+                    tensordict, cfg, out, logits_only=logits_only
+                )
         elif self.input_mode == "tokens":
             if self.generate:
                 out = self._from_transformers_generate_tokens(tensordict, cfg, out)
             else:
-                out = self._from_transformers_logprobs_tokens(tensordict, cfg, out)
+                out = self._from_transformers_logprobs_tokens(
+                    tensordict, cfg, out, logits_only=logits_only
+                )
         if _source_device:
             out = out.to(_source_device)
@@ -535,7 +555,7 @@ class TransformersWrapper(LLMWrapperBase):
         if tensordict_out is None:
             if self.inplace is True:
                 # The output is the input
-                tensordict_out = tensordict
+                tensordict_out = tensordict_orig
             elif self.inplace is False:
                 # The output is the new structure
                 tensordict_out = out
@@ -690,7 +710,7 @@ class TransformersWrapper(LLMWrapperBase):
         result.set(self.history_key, history_chat)
         return result
-    def _from_transformers_logprobs_history(self, td, cfg, out):
+    def _from_transformers_logprobs_history(self, td, cfg, out, logits_only=False):
         """Compute log-probs from history input."""
         from torchrl.data.llm import History
@@ -731,7 +751,9 @@ class TransformersWrapper(LLMWrapperBase):
             raise ValueError(
                 f"Expected TensorDictBase for history input, got {type(response_tokens)}"
             )
-        result = self._logprobs_from_history_tokens(response_tokens, cfg, out)
+        result = self._logprobs_from_history_tokens(
+            response_tokens, cfg, out, logits_only=logits_only
+        )
         text_result = Text._from_tensordict(result.empty())
         result.set(self.text_key, text_result)
         result[self.text_key, "full"] = text_full
@@ -952,7 +974,9 @@ class TransformersWrapper(LLMWrapperBase):
                 result = result.to(cast)
             return result
-    def _logprobs_from_history_tokens(self, response_tokens, cfg, out):
+    def _logprobs_from_history_tokens(
+        self, response_tokens, cfg, out, logits_only=False
+    ):
         """Compute log-probs from history tokens."""
         pad_val = self.tokenizer.pad_token_id
@@ -996,6 +1020,7 @@ class TransformersWrapper(LLMWrapperBase):
             tokens_full_padded,
             attention_mask_full_padded,
             pad_val,
+            logits_only=logits_only,
         )
         # Build output TensorClass objects
@@ -1051,19 +1076,20 @@ class TransformersWrapper(LLMWrapperBase):
         tokens_obj.padded = MetaData(self.pad_output)
         out.set(self.tokens_key, tokens_obj)
-        log_probs_obj = LogProbs._from_tensordict(
-            TensorDict(batch_size=out.batch_size).to_lazystack(0)
-        )
-        if self.pad_output:
-            log_probs_obj.full = log_probs_full_padded
-        else:
-            log_probs_full_unpadded = _unpad_tensors(
-                log_probs_full_padded, attention_mask_full_padded, as_nested=False
+        if not logits_only:
+            log_probs_obj = LogProbs._from_tensordict(
+                TensorDict(batch_size=out.batch_size).to_lazystack(0)
             )
-            log_probs_obj.full = log_probs_full_unpadded
-        log_probs_obj.response = None
-        log_probs_obj.padded = MetaData(self.pad_output)
-        out.set(self.log_probs_key, log_probs_obj)
+            if self.pad_output:
+                log_probs_obj.full = log_probs_full_padded
+            else:
+                log_probs_full_unpadded = _unpad_tensors(
+                    log_probs_full_padded, attention_mask_full_padded, as_nested=False
+                )
+                log_probs_obj.full = log_probs_full_unpadded
+            log_probs_obj.response = None
+            log_probs_obj.padded = MetaData(self.pad_output)
+            out.set(self.log_probs_key, log_probs_obj)
         # Add logits to output if we're in a get_dist call
         if self._in_get_dist_call:
@@ -1095,7 +1121,7 @@ class TransformersWrapper(LLMWrapperBase):
             raise ValueError(f"Expected list of text for text input, got {type(text)}")
         return self._generate_from_text(text, cfg, out)
-    def _from_transformers_logprobs_text(self, td, cfg, out):
+    def _from_transformers_logprobs_text(self, td, cfg, out, logits_only=False):
         """Compute log-probs from text input."""
         # Validate input
         if self.input_key not in td:
@@ -1168,6 +1194,7 @@ class TransformersWrapper(LLMWrapperBase):
             input_ids_full_padded,
             attention_mask_full_padded,
             self.tokenizer.pad_token_id,
+            logits_only=logits_only,
         )
         # Build output TensorClass objects
@@ -1212,19 +1239,20 @@ class TransformersWrapper(LLMWrapperBase):
         masks_obj.padded = MetaData(self.pad_output)
         out.set(self.masks_key, masks_obj)
-        log_probs_obj = LogProbs._from_tensordict(
-            TensorDict(batch_size=out.batch_size).to_lazystack(0)
-        )
-        if self.pad_output:
-            log_probs_obj.full = log_probs_full_padded
-        else:
-            log_probs_full_unpadded = _unpad_tensors(
-                log_probs_full_padded, attention_mask_full_padded, as_nested=False
+        if not logits_only:
+            log_probs_obj = LogProbs._from_tensordict(
+                TensorDict(batch_size=out.batch_size).to_lazystack(0)
             )
-            log_probs_obj.full = log_probs_full_unpadded
-        log_probs_obj.response = None
-        log_probs_obj.padded = MetaData(self.pad_output)
-        out.set(self.log_probs_key, log_probs_obj)
+            if self.pad_output:
+                log_probs_obj.full = log_probs_full_padded
+            else:
+                log_probs_full_unpadded = _unpad_tensors(
+                    log_probs_full_padded, attention_mask_full_padded, as_nested=False
+                )
+                log_probs_obj.full = log_probs_full_unpadded
+            log_probs_obj.response = None
+            log_probs_obj.padded = MetaData(self.pad_output)
+            out.set(self.log_probs_key, log_probs_obj)
         # Add logits to output if we're in a get_dist call
         if self._in_get_dist_call:
@@ -1416,7 +1444,11 @@ class TransformersWrapper(LLMWrapperBase):
         return out
     def _from_transformers_logprobs_tokens(
-        self, td: TensorDictBase, cfg: dict | None, out: TensorDictBase
+        self,
+        td: TensorDictBase,
+        cfg: dict | None,
+        out: TensorDictBase,
+        logits_only=False,
     ) -> TensorDictBase:
         """Compute log-probs from tokens input."""
         # Validate input
@@ -1470,6 +1502,7 @@ class TransformersWrapper(LLMWrapperBase):
             input_ids_full_padded,
             attention_mask_full_padded,
             self.tokenizer.pad_token_id,
+            logits_only=logits_only,
         )
         # Build output TensorClass objects
@@ -1514,19 +1547,20 @@ class TransformersWrapper(LLMWrapperBase):
         masks_obj.padded = MetaData(self.pad_output)
         out.set(self.masks_key, masks_obj)
-        log_probs_obj = LogProbs._from_tensordict(
-            TensorDict(batch_size=out.batch_size).to_lazystack(0)
-        )
-        if self.pad_output:
-            log_probs_obj.full = log_probs_full_padded
-        else:
-            log_probs_full_unpadded = _unpad_tensors(
-                log_probs_full_padded, attention_mask_full_padded, as_nested=False
+        if not logits_only:
+            log_probs_obj = LogProbs._from_tensordict(
+                TensorDict(batch_size=out.batch_size).to_lazystack(0)
             )
-            log_probs_obj.full = log_probs_full_unpadded
-        log_probs_obj.response = None
-        log_probs_obj.padded = MetaData(self.pad_output)
-        out.set(self.log_probs_key, log_probs_obj)
+            if self.pad_output:
+                log_probs_obj.full = log_probs_full_padded
+            else:
+                log_probs_full_unpadded = _unpad_tensors(
+                    log_probs_full_padded, attention_mask_full_padded, as_nested=False
+                )
+                log_probs_obj.full = log_probs_full_unpadded
+            log_probs_obj.response = None
+            log_probs_obj.padded = MetaData(self.pad_output)
+            out.set(self.log_probs_key, log_probs_obj)
         # Add logits to output if we're in a get_dist call
         if self._in_get_dist_call:
@@ -1567,7 +1601,7 @@ class TransformersWrapper(LLMWrapperBase):
         return log_probs, logits
     def _compute_log_probs_from_model_output(
-        self, model_output, input_ids, attention_mask, pad_val
+        self, model_output, input_ids, attention_mask, pad_val, logits_only=False
     ):
         """Compute log-probs from model output without modifying original tensors.
@@ -1576,6 +1610,7 @@ class TransformersWrapper(LLMWrapperBase):
             input_ids: Original input token ids
             attention_mask: Original attention mask
             pad_val: Padding token value to ignore in loss computation
+            logits_only: Whether to return only the logits.
         Returns:
             tuple: (log_probs, shifted_logits) where log_probs are the computed log probabilities
@@ -1600,6 +1635,8 @@ class TransformersWrapper(LLMWrapperBase):
             raise ValueError(
                 f"The logits shape {shifted_logits.shape} does not match the input ids shape {shifted_input_ids.shape}"
             )
+        if logits_only:
+            return None, shifted_logits
         # Compute log-probs
         td = TensorDict(

torchrl/modules/llm/policies/vllm_wrapper.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Any, Literal
 import torch
 from tensordict import (
     lazy_stack,
+    LazyStackedTensorDict,
     MetaData,
     NonTensorStack,
     set_list_to_stack,
@@ -500,19 +501,32 @@ class vLLMWrapper(LLMWrapperBase):
     def forward(
         self,
         tensordict: TensorDictBase,
+        *,
         tensordict_out: TensorDictBase | None = None,
+        logits_only: bool = False,
         **kwargs,
     ) -> TensorDictBase:
+        tensordict_orig = tensordict
         if not tensordict.ndim:
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim == 0. If this is needed, "
+                    "please submit an issue on github."
+                )
             # unsqueeze - squeeze the input
-            try:
-                return self(lazy_stack([tensordict])).squeeze(0)
-            except Exception as e:
-                raise RuntimeError(
-                    f"Unsqueeze/squeeze failed. Inputs to {type(self).__name__} should ideally be 1 dimensional."
-                ) from e
+            return self.forward(lazy_stack([tensordict]), logits_only=logits_only)[0]
         elif tensordict.ndim > 1:
-            return self(tensordict.reshape(-1)).view(tensordict.shape)
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim > 1. If this is needed, "
+                    "please submit an issue on github."
+                )
+            return self.forward(tensordict.reshape(-1), logits_only=logits_only).view(
+                tensordict.shape
+            )
+        if not isinstance(tensordict, LazyStackedTensorDict):
+            tensordict = tensordict.to_lazystack(0)
         _source_device = None
         if self._device:
@@ -567,7 +581,7 @@ class vLLMWrapper(LLMWrapperBase):
         if tensordict_out is None:
             if self.inplace is True:
                 # The output is the input
-                tensordict_out = tensordict
+                tensordict_out = tensordict_orig
             elif self.inplace is False:
                 # The output is the new structure
                 tensordict_out = out
@@ -1242,12 +1256,14 @@ class vLLMWrapper(LLMWrapperBase):
         generate_kwargs = {"sampling_params": sampling_params}
         args = ()
+        empirical_attention_mask = None
         if tokens_prompt_unpadded is None:
             # TODO: To be on the safe side, we may do this even in the unpadded case since we're not sure
             #  the user passed an unpadded tensor in the first place.
+            empirical_attention_mask = tokens_prompt_padded != self.padding_value
             tokens_prompt_list = self._to_list(
-                tokens_prompt_padded, tokens_prompt_padded != self.padding_value
+                tokens_prompt_padded, empirical_attention_mask
             )
         else:
             tokens_prompt_list = self._to_list(tokens_prompt_unpadded, None)
@@ -1365,6 +1381,22 @@ class vLLMWrapper(LLMWrapperBase):
                         padding_value=self.padding_value,
                         padding_side="right",
                     )
+                    if (
+                        prompt_logprobs_padded.shape[-1]
+                        != tokens_prompt_padded.shape[-1]
+                    ):
+                        tshape = tokens_prompt_padded.shape
+                        oshape = prompt_logprobs_padded.shape
+                        # it could be that the input was padded already - padding again then
+                        prompt_logprobs_padded = torch.cat(
+                            [
+                                prompt_logprobs_padded.new_zeros(
+                                    tshape[:-1] + (tshape[-1] - oshape[-1],)
+                                ),
+                                prompt_logprobs_padded,
+                            ],
+                            -1,
+                        )
                 else:
                     prompt_logprobs_list = request_output_tc.get(
                         "prompt_logprobs",
@@ -1490,26 +1522,21 @@ class vLLMWrapper(LLMWrapperBase):
         request_output_tc = _RequestOutput_tc.from_request_output(tokens_out_stuct)
+        # For unpadded case, extract from each sequence
+        log_probs_full_unpadded = request_output_tc.get("prompt_logprobs", as_list=True)
         # Extract log-probs from prompt_logprobs
         if self.pad_output:
             # For padded case, use all prompt_logprobs
-            log_probs_full_padded = request_output_tc.get(
-                "prompt_logprobs",
-                as_padded_tensor=True,
-                padding_value=0,
-                padding_side="left",
+            if attention_mask_full_padded is not None:
+                attention_mask_full_padded = tokens_full_padded != self.padding_value
+            log_probs_full_padded = torch.zeros_like(
+                tokens_full_padded, dtype=torch.get_default_dtype()
             )
-            # Mask out padding
-            attention_mask_full_padded = tokens_full_padded != self.padding_value
-            log_probs_full_padded = torch.where(
-                attention_mask_full_padded, log_probs_full_padded, 0.0
+            log_probs_full_padded[attention_mask_full_padded] = torch.cat(
+                log_probs_full_unpadded, -1
             )
         else:
-            # For unpadded case, extract from each sequence
-            log_probs_full_unpadded = request_output_tc.get(
-                "prompt_logprobs", as_list=True
-            )
             self._check_not_padded(log_probs_full_unpadded)
         assistant_mask_full_padded = None

torchrl/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = '2025.7.16'
-git_version = '361a8da6edc77979e17409cf19396230d18c18a9'
+__version__ = '2025.7.17'
+git_version = 'bec8f0382b9694a87c04385f55fc9f8f3ee1724f'

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: torchrl-nightly
-Version: 2025.7.16
+Version: 2025.7.17
 Summary: UNKNOWN
 Home-page: https://github.com/pytorch/rl
 Author: torchrl contributors

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.17.dist-info}/RECORD RENAMED Viewed

@@ -3,11 +3,11 @@ build_tools/setup_helpers/__init__.py,sha256=l9zlK7Nm5bT7P_onQx-hZeIGzKKyCFm1PFk
 build_tools/setup_helpers/extension.py,sha256=ihV8jz8kqOvpqzuD006XqF1oNX5ukKGlwIOJRb1Vd-o,6075
 torchrl/__init__.py,sha256=76lKYwYKmAKORhyVt2tURmYAIRTifxxO3gWsskrHAXU,3054
 torchrl/_extension.py,sha256=x6Nqj2brF3VhlEwxmNA2fYbmpxq1HHGrHMnP0YnQwdc,2412
-torchrl/_torchrl.cp310-win_amd64.pyd,sha256=MHnLXT4hRARJhDr5PwHXmfL7xW3wUX83LHClN4P4Kmo,440832
+torchrl/_torchrl.cp310-win_amd64.pyd,sha256=EmXEHv8OHR8lomhJKYRoDdRNcjSZrLKyQdUKYT1-BR4,440832
 torchrl/_utils.py,sha256=2N35rdD65U1khMi5gVIz8-nMjlZsoVq0kCiQftVRSxw,42297
-torchrl/version.py,sha256=8tM5vPhf-adaEbP-CudaaDTPQwY79ENoxHk7DIqTeDk,85
+torchrl/version.py,sha256=hd0Oai-1iVD9JdvzREmm-B6pS3vtNIorROtxZuyZf7A,85
 torchrl/collectors/__init__.py,sha256=LzTyfxmkNGPSa5-3rS5unQK7HfT5ZEdr2NV291rAOlU,832
-torchrl/collectors/collectors.py,sha256=i-7ANxLstwaj4ruTkxFvp4YV42oHm_9M95_uPJZZock,181631
+torchrl/collectors/collectors.py,sha256=UbXtDMHrXVQ-cd95TBpss2SIbNFKze2HTPYjFz2cPNQ,182146
 torchrl/collectors/utils.py,sha256=aBmBLpphhfplqQjRCyn1jtWWJ-Wtc7TWvM0rOBN8SsE,11579
 torchrl/collectors/weight_update.py,sha256=Ydq5nJSTV3Q1uqLtJ_1Nj1JB5rwHwrG5StaLxymWFV4,21572
 torchrl/collectors/distributed/__init__.py,sha256=cKDWdNlwx2LoJkTwf-DKUXbq3Y-0Z1DctPYPcdgOSU0,730
@@ -139,7 +139,7 @@ torchrl/envs/transforms/vip.py,sha256=r8Ni0hAYY1gispLj0TXV2VIedrgC4eW3hAhJBv47Q7
 torchrl/modules/__init__.py,sha256=TuJj3WUlvilYY39nUH-ykXkyprTxjq9NLW0QQXADqJk,4343
 torchrl/modules/distributions/__init__.py,sha256=Evkiz96ZPs7VUZp2n03h9kd7rmUCEEvMVl2f7RhzMhQ,1670
 torchrl/modules/distributions/continuous.py,sha256=tahVKeI_uFgnVmskJ-_NsXhWsSR8sr7FJ_281rCq4LE,26434
-torchrl/modules/distributions/discrete.py,sha256=HSuaJ0O71eBlToGDh31FRxGDO62k_Gf7iT9qgJjna84,36462
+torchrl/modules/distributions/discrete.py,sha256=QXDv-nllK6i1tXj0KiP6TnHDjrI05YRp05D3mBin6Pc,36488
 torchrl/modules/distributions/truncated_normal.py,sha256=l5G3TePasl7q12DjwisyQC_E0OfZZo2g_HzBhZREVxc,6122
 torchrl/modules/distributions/utils.py,sha256=q4AFDKFpacRhrl4rjJ54UhxQzjOcj_SKlz0UIcZlUVc,7796
 torchrl/modules/llm/__init__.py,sha256=_gH2JzO4sXWYIyDtPaGvrPJCBCGCRA5T0SXZtETeeoQ,775
@@ -147,9 +147,9 @@ torchrl/modules/llm/utils.py,sha256=b2s9ngHwXnNbLggygU3-ScNwk0MWICketq2pZBshGqM,
 torchrl/modules/llm/backends/__init__.py,sha256=ABKK4mJeRtoLXEqfnMvIuiovs7VJoCxnDDo6QYvPMVk,457
 torchrl/modules/llm/backends/vllm.py,sha256=5P78jEtAIytgYHzEkOrg-wwqh1ryhiMVy4M_AxNQ9JQ,9649
 torchrl/modules/llm/policies/__init__.py,sha256=x5gk4ja20-yjsPHY0F_Ymw1G7u4mCDULwxnTAaRTJN8,567
-torchrl/modules/llm/policies/common.py,sha256=Aev4EKEogWFr4C7wPqFZ8lmk041ZzrtoFx3QVVverc0,39068
-torchrl/modules/llm/policies/transformers_wrapper.py,sha256=EgXlpxue2K4cAUCabrCKgTLhFRosg-OcakuITstL2Zw,76137
-torchrl/modules/llm/policies/vllm_wrapper.py,sha256=tNil8XybcGQaVBW5q81MDimXjuLYZTUbBbVG4jNYPuc,80114
+torchrl/modules/llm/policies/common.py,sha256=qFc1Di76qFjTvf38_FfpVKZsz4d4Nva2tOFk9F9vUOM,40085
+torchrl/modules/llm/policies/transformers_wrapper.py,sha256=G4nZbtqcEch1BD3URWfn0pwiZtNF7O1f6P5qpotTJVc,77625
+torchrl/modules/llm/policies/vllm_wrapper.py,sha256=WRB1t-7_CcXn0JmQpRSUikLhgPe6CoasHYfIlgOXx-Q,81542
 torchrl/modules/models/__init__.py,sha256=Y1XTkBOB5EMj6IaMru6V3CDwFLnkUtxzsHcqzeqq_4Y,1829
 torchrl/modules/models/batchrenorm.py,sha256=bR4ZhaJ5E1cSK5o8L2dNX5KVLIb-bgrYxcq6yhx0I1A,4869
 torchrl/modules/models/decision_transformer.py,sha256=ANFTOm3k9_3Uv1vKGdXumRy3meBPnDdT8HqhVvJ2RCo,6783
@@ -223,8 +223,8 @@ torchrl/trainers/helpers/losses.py,sha256=HwrovwbMOhY-5-hlOz-YHclKnoJhMijVjDNuAT
 torchrl/trainers/helpers/models.py,sha256=VujBq9H92sEzpCtU1iTrJQNlwvyOO-Rho4bzsMonX6s,22465
 torchrl/trainers/helpers/replay_buffer.py,sha256=RaZqXnHimmadiibvDBcLbtIhpPaVMTPhYMOBvX4v3CA,2060
 torchrl/trainers/helpers/trainers.py,sha256=hB1FtHtP-S0PBQ4LF6WPy37caaLpacyaLThj1BNl5Ho,12372
-torchrl_nightly-2025.7.16.dist-info/LICENSE,sha256=PGO-oZsq4EzhE1-WQS2xGiEF3UCVb9YawfQ09cIMV_8,1119
-torchrl_nightly-2025.7.16.dist-info/METADATA,sha256=35ji9dwgAOpYAOrjBU-SAq54qS_DfvkVXzrs0I0xQGQ,44000
-torchrl_nightly-2025.7.16.dist-info/WHEEL,sha256=NVXpD7b4Gxps0cd2ds5rr5TG8W4ApEwx_i5J99qMZ5E,102
-torchrl_nightly-2025.7.16.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
-torchrl_nightly-2025.7.16.dist-info/RECORD,,
+torchrl_nightly-2025.7.17.dist-info/LICENSE,sha256=PGO-oZsq4EzhE1-WQS2xGiEF3UCVb9YawfQ09cIMV_8,1119
+torchrl_nightly-2025.7.17.dist-info/METADATA,sha256=SaETkAw3q6ZUIazIqAwcLeC4WvSoghlUNoSQlhQ4nXQ,44000
+torchrl_nightly-2025.7.17.dist-info/WHEEL,sha256=NVXpD7b4Gxps0cd2ds5rr5TG8W4ApEwx_i5J99qMZ5E,102
+torchrl_nightly-2025.7.17.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
+torchrl_nightly-2025.7.17.dist-info/RECORD,,

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.17.dist-info}/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.17.dist-info}/top_level.txt RENAMED Viewed

File without changes