PyPI - torchrl-nightly - Versions diffs - 2025.7.16__cp312-cp312-macosx_10_13_universal2.whl → 2025.7.18__cp312-cp312-macosx_10_13_universal2.whl - Mend

torchrl-nightly 2025.7.16__cp312-cp312-macosx_10_13_universal2.whl → 2025.7.18__cp312-cp312-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

torchrl/_torchrl.cpython-312-darwin.so CHANGED Viewed

Binary file

torchrl/collectors/collectors.py CHANGED Viewed

@@ -686,6 +686,10 @@ class SyncDataCollector(DataCollectorBase):
                 policy = RandomPolicy(env.full_action_spec)
         elif policy_factory is not None:
             raise TypeError("policy_factory cannot be used with policy argument.")
+        # If the underlying policy has a state_dict, we keep a reference to the policy and
+        # do all policy weight saving/loading through it
+        if hasattr(policy, "state_dict"):
+            self._policy_w_state_dict = policy
         if trust_policy is None:
             trust_policy = isinstance(policy, (RandomPolicy, CudaGraphModule))
@@ -1686,8 +1690,8 @@ class SyncDataCollector(DataCollectorBase):
         else:
             env_state_dict = OrderedDict()
-        if hasattr(self.policy, "state_dict"):
-            policy_state_dict = self.policy.state_dict()
+        if hasattr(self, "_policy_w_state_dict"):
+            policy_state_dict = self._policy_w_state_dict.state_dict()
             state_dict = OrderedDict(
                 policy_state_dict=policy_state_dict,
                 env_state_dict=env_state_dict,
@@ -1711,7 +1715,13 @@ class SyncDataCollector(DataCollectorBase):
         if strict or "env_state_dict" in state_dict:
             self.env.load_state_dict(state_dict["env_state_dict"], **kwargs)
         if strict or "policy_state_dict" in state_dict:
-            self.policy.load_state_dict(state_dict["policy_state_dict"], **kwargs)
+            if not hasattr(self, "_policy_w_state_dict"):
+                raise ValueError(
+                    "Underlying policy does not have state_dict to load policy_state_dict into."
+                )
+            self._policy_w_state_dict.load_state_dict(
+                state_dict["policy_state_dict"], **kwargs
+            )
         self._frames = state_dict["frames"]
         self._iter = state_dict["iter"]

torchrl/data/llm/history.py CHANGED Viewed

@@ -713,6 +713,42 @@ class History(TensorClass["nocast"]):
         | transformers.AutoProcessor  # noqa: F821
         | None = None,
     ) -> History:
+        r"""Inverts a chat template into a History object.
+        Args:
+            text (str | list[str]): The chat template to invert.
+            chat_template_name (str, optional): The name of the chat template to use.
+            tokenizer (transformers.AutoTokenizer | transformers.AutoProcessor, optional): The tokenizer to use.
+        Returns:
+            History: The inverted History object.
+        Examples:
+            >>> from torchrl.data.llm.history import History
+            >>> from transformers import AutoTokenizer
+            >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
+            >>> text = "<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n<|im_start|>user\nWrite a python script that gives the capital of France or Germany.\n<|im_end|>\n<|im_start|>assistant\n<think>The capital of France is Paris, the capital of Germany is Berlin.</think>\n<answer><python>\n"
+            >>> history = History.from_text(text, tokenizer=tokenizer)
+            >>> print(history)
+            History(
+                content=NonTensorStack(
+                    ['You are a helpful assistant.', 'Write a python s...,
+                    batch_size=torch.Size([3]),
+                    device=None),
+                is_complete=NonTensorStack(
+                    [True, True, False],
+                    batch_size=torch.Size([3]),
+                    device=None),
+                role=NonTensorStack(
+                    ['system', 'user', 'assistant'],
+                    batch_size=torch.Size([3]),
+                    device=None),
+                tool_calls=None,
+                tool_responses=None,
+                batch_size=torch.Size([3]),
+                device=None,
+                is_shared=False)
+        """
         if chat_template_name is None:
             if chat_template is not None:
                 # TODO: find best match given template

torchrl/modules/distributions/discrete.py CHANGED Viewed

@@ -352,7 +352,7 @@ class MaskedCategorical(D.Categorical):
                 logits = self.logits
                 if logits.ndim > 2:
                     # Bring channels in 2nd dim
-                    logits = logits.transpose(-1, 1)
+                    logits = logits.permute(0, -1, *range(1, logits.ndim - 1))
                 original_value_shape = None
                 if logits.ndim == 1 and value.ndim >= 1:
                     if value.ndim >= 2:

torchrl/modules/llm/policies/common.py CHANGED Viewed

@@ -9,8 +9,8 @@ import weakref
 from typing import Any, Literal, overload
 import torch
-from tensordict import NestedKey, TensorDictBase
-from tensordict.nn import TensorDictModuleBase, TensorDictSequential
+from tensordict import lazy_stack, NestedKey, TensorDictBase
+from tensordict.nn import TensorDictModuleBase
 from tensordict.tensorclass import TensorClass
 from tensordict.utils import _zip_strict
 from torch import distributions as D
@@ -175,29 +175,35 @@ class ChatHistory(TensorClass["nocast"]):
     def __post_init__(self):
         # Check that all history objects have one more batch dimension than the ChatHistory object
         if self.prompt is not None:
-            if self.prompt.batch_dims != self.batch_dims + 1:
+            if getattr(self.prompt, "batch_dims", None) == self.batch_dims:
                 warnings.warn(
                     "Prompt history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
                     f"got {self.prompt.batch_dims} and {self.batch_dims}. "
                     "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
                 )
-                self.prompt = self.prompt.unsqueeze(-1)
+                self.prompt = lazy_stack(
+                    [self.prompt], -1
+                )  # equivalent to unsqueeze(-1) but make sure it's a lazy stack
         if self.response is not None:
-            if self.response.batch_dims != self.batch_dims + 1:
+            if getattr(self.response, "batch_dims", None) == self.batch_dims:
                 warnings.warn(
                     "Response history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
                     f"got {self.response.batch_dims} and {self.batch_dims}. "
                     "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
                 )
-                self.response = self.response.unsqueeze(-1)
+                self.response = lazy_stack(
+                    [self.response], -1
+                )  # equivalent to unsqueeze(-1) but make sure it's a lazy stack
         if self.full is not None:
-            if self.full.batch_dims != self.batch_dims + 1:
+            if getattr(self.full, "batch_dims", None) == self.batch_dims:
                 warnings.warn(
                     "Full history should have one more batch dimension than the ChatHistory object to handle multi-turn conversations, "
                     f"got {self.full.batch_dims} and {self.batch_dims}. "
                     "The batch dimension of the ChatHistory object will be unsqueezed along the last dimension."
                 )
-                self.full = self.full.unsqueeze(-1)
+                self.full = lazy_stack(
+                    [self.full], -1
+                )  # equivalent to unsqueeze(-1) but make sure it's a lazy stack
 class LogProbs(TensorClass["nocast"]):
@@ -482,7 +488,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         # Get logits/log-probs
         if as_padded_tensor is None:
@@ -557,7 +563,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_prompt_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         # Try to get prompt tokens first
         if self.pad_output:
@@ -668,7 +674,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_assistant_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         # Update the tokens key to reflect the tokenized history when querying the log-probs
         tensordict.update(
             td_out,
@@ -737,7 +743,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_attention_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         if self.pad_output:
             logits = td_out.get(logits_key)
             attention_mask = td_out.get(attention_mask_key)
@@ -794,7 +800,7 @@ class LLMWrapperBase(TensorDictModuleBase):
                 "get_dist_with_custom_mask is not implemented for generate=True. "
                 "You can create a new version of this wrapper using the `get_new_version` method."
             )
-        td_out = self(tensordict.copy())
+        td_out = self.forward(tensordict.copy(), logits_only=True)
         if self.pad_output:
             logits = td_out.get(logits_key)
         else:
@@ -841,8 +847,24 @@ class LLMWrapperBase(TensorDictModuleBase):
         """
         return self._get_dist_with_attention_mask(tensordict, **kwargs)
-    # Sampling is taken care of by the sub-modules
-    forward = TensorDictSequential.forward
+    def forward(
+        self,
+        tensordict: TensorDictBase,
+        *,
+        tensordict_out: TensorDictBase | None = None,
+        logits_only: bool = False,
+        **kwargs,
+    ) -> TensorDictBase:  # noqa: D417
+        """Forward pass for the LLM policy.
+        Args:
+            tensordict (TensorDictBase): The input tensordict.
+        Keyword Args:
+            tensordict_out (TensorDictBase | None): The output tensordict.
+            logits_only (bool): Whether to return only the logits. Only effective if generate=False. Defaults to `False`.
+        """
+        raise NotImplementedError
     def _check_padded(self, val: torch.Tensor) -> torch.Tensor:
         """Check that a value is a padded tensor."""

torchrl/modules/llm/policies/transformers_wrapper.py CHANGED Viewed

@@ -13,6 +13,7 @@ from typing import Literal
 import torch
 from tensordict import (
     lazy_stack,
+    LazyStackedTensorDict,
     MetaData,
     NonTensorStack,
     set_list_to_stack,
@@ -468,19 +469,32 @@ class TransformersWrapper(LLMWrapperBase):
     def forward(
         self,
         tensordict: TensorDictBase,
+        *,
         tensordict_out: TensorDictBase | None = None,
+        logits_only: bool = False,
         **kwargs,
     ) -> TensorDictBase:
+        tensordict_orig = tensordict
         if not tensordict.ndim:
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim == 0. If this is needed, "
+                    "please submit an issue on github."
+                )
             # unsqueeze - squeeze the input
-            try:
-                return self(lazy_stack([tensordict])).squeeze(0)
-            except Exception as e:
-                raise RuntimeError(
-                    f"Unsqueeze/squeeze failed. Inputs to {type(self).__name__} should ideally be 1 dimensional."
-                ) from e
+            return self.forward(lazy_stack([tensordict]), logits_only=logits_only)[0]
         elif tensordict.ndim > 1:
-            return self(tensordict.reshape(-1)).view(tensordict.shape)
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim > 1. If this is needed, "
+                    "please submit an issue on github."
+                )
+            return self.forward(tensordict.reshape(-1), logits_only=logits_only).view(
+                tensordict.shape
+            )
+        if not isinstance(tensordict, LazyStackedTensorDict):
+            tensordict = tensordict.to_lazystack(0)
         _source_device = None
         if self._device:
@@ -517,17 +531,23 @@ class TransformersWrapper(LLMWrapperBase):
             if self.generate:
                 out = self._from_transformers_generate_history(tensordict, cfg, out)
             else:
-                out = self._from_transformers_logprobs_history(tensordict, cfg, out)
+                out = self._from_transformers_logprobs_history(
+                    tensordict, cfg, out, logits_only=logits_only
+                )
         elif self.input_mode == "text":
             if self.generate:
                 out = self._from_transformers_generate_text(tensordict, cfg, out)
             else:
-                out = self._from_transformers_logprobs_text(tensordict, cfg, out)
+                out = self._from_transformers_logprobs_text(
+                    tensordict, cfg, out, logits_only=logits_only
+                )
         elif self.input_mode == "tokens":
             if self.generate:
                 out = self._from_transformers_generate_tokens(tensordict, cfg, out)
             else:
-                out = self._from_transformers_logprobs_tokens(tensordict, cfg, out)
+                out = self._from_transformers_logprobs_tokens(
+                    tensordict, cfg, out, logits_only=logits_only
+                )
         if _source_device:
             out = out.to(_source_device)
@@ -535,7 +555,7 @@ class TransformersWrapper(LLMWrapperBase):
         if tensordict_out is None:
             if self.inplace is True:
                 # The output is the input
-                tensordict_out = tensordict
+                tensordict_out = tensordict_orig
             elif self.inplace is False:
                 # The output is the new structure
                 tensordict_out = out
@@ -690,7 +710,7 @@ class TransformersWrapper(LLMWrapperBase):
         result.set(self.history_key, history_chat)
         return result
-    def _from_transformers_logprobs_history(self, td, cfg, out):
+    def _from_transformers_logprobs_history(self, td, cfg, out, logits_only=False):
         """Compute log-probs from history input."""
         from torchrl.data.llm import History
@@ -731,7 +751,9 @@ class TransformersWrapper(LLMWrapperBase):
             raise ValueError(
                 f"Expected TensorDictBase for history input, got {type(response_tokens)}"
             )
-        result = self._logprobs_from_history_tokens(response_tokens, cfg, out)
+        result = self._logprobs_from_history_tokens(
+            response_tokens, cfg, out, logits_only=logits_only
+        )
         text_result = Text._from_tensordict(result.empty())
         result.set(self.text_key, text_result)
         result[self.text_key, "full"] = text_full
@@ -952,7 +974,9 @@ class TransformersWrapper(LLMWrapperBase):
                 result = result.to(cast)
             return result
-    def _logprobs_from_history_tokens(self, response_tokens, cfg, out):
+    def _logprobs_from_history_tokens(
+        self, response_tokens, cfg, out, logits_only=False
+    ):
         """Compute log-probs from history tokens."""
         pad_val = self.tokenizer.pad_token_id
@@ -996,6 +1020,7 @@ class TransformersWrapper(LLMWrapperBase):
             tokens_full_padded,
             attention_mask_full_padded,
             pad_val,
+            logits_only=logits_only,
         )
         # Build output TensorClass objects
@@ -1051,19 +1076,20 @@ class TransformersWrapper(LLMWrapperBase):
         tokens_obj.padded = MetaData(self.pad_output)
         out.set(self.tokens_key, tokens_obj)
-        log_probs_obj = LogProbs._from_tensordict(
-            TensorDict(batch_size=out.batch_size).to_lazystack(0)
-        )
-        if self.pad_output:
-            log_probs_obj.full = log_probs_full_padded
-        else:
-            log_probs_full_unpadded = _unpad_tensors(
-                log_probs_full_padded, attention_mask_full_padded, as_nested=False
+        if not logits_only:
+            log_probs_obj = LogProbs._from_tensordict(
+                TensorDict(batch_size=out.batch_size).to_lazystack(0)
             )
-            log_probs_obj.full = log_probs_full_unpadded
-        log_probs_obj.response = None
-        log_probs_obj.padded = MetaData(self.pad_output)
-        out.set(self.log_probs_key, log_probs_obj)
+            if self.pad_output:
+                log_probs_obj.full = log_probs_full_padded
+            else:
+                log_probs_full_unpadded = _unpad_tensors(
+                    log_probs_full_padded, attention_mask_full_padded, as_nested=False
+                )
+                log_probs_obj.full = log_probs_full_unpadded
+            log_probs_obj.response = None
+            log_probs_obj.padded = MetaData(self.pad_output)
+            out.set(self.log_probs_key, log_probs_obj)
         # Add logits to output if we're in a get_dist call
         if self._in_get_dist_call:
@@ -1095,7 +1121,7 @@ class TransformersWrapper(LLMWrapperBase):
             raise ValueError(f"Expected list of text for text input, got {type(text)}")
         return self._generate_from_text(text, cfg, out)
-    def _from_transformers_logprobs_text(self, td, cfg, out):
+    def _from_transformers_logprobs_text(self, td, cfg, out, logits_only=False):
         """Compute log-probs from text input."""
         # Validate input
         if self.input_key not in td:
@@ -1168,6 +1194,7 @@ class TransformersWrapper(LLMWrapperBase):
             input_ids_full_padded,
             attention_mask_full_padded,
             self.tokenizer.pad_token_id,
+            logits_only=logits_only,
         )
         # Build output TensorClass objects
@@ -1212,19 +1239,20 @@ class TransformersWrapper(LLMWrapperBase):
         masks_obj.padded = MetaData(self.pad_output)
         out.set(self.masks_key, masks_obj)
-        log_probs_obj = LogProbs._from_tensordict(
-            TensorDict(batch_size=out.batch_size).to_lazystack(0)
-        )
-        if self.pad_output:
-            log_probs_obj.full = log_probs_full_padded
-        else:
-            log_probs_full_unpadded = _unpad_tensors(
-                log_probs_full_padded, attention_mask_full_padded, as_nested=False
+        if not logits_only:
+            log_probs_obj = LogProbs._from_tensordict(
+                TensorDict(batch_size=out.batch_size).to_lazystack(0)
             )
-            log_probs_obj.full = log_probs_full_unpadded
-        log_probs_obj.response = None
-        log_probs_obj.padded = MetaData(self.pad_output)
-        out.set(self.log_probs_key, log_probs_obj)
+            if self.pad_output:
+                log_probs_obj.full = log_probs_full_padded
+            else:
+                log_probs_full_unpadded = _unpad_tensors(
+                    log_probs_full_padded, attention_mask_full_padded, as_nested=False
+                )
+                log_probs_obj.full = log_probs_full_unpadded
+            log_probs_obj.response = None
+            log_probs_obj.padded = MetaData(self.pad_output)
+            out.set(self.log_probs_key, log_probs_obj)
         # Add logits to output if we're in a get_dist call
         if self._in_get_dist_call:
@@ -1416,7 +1444,11 @@ class TransformersWrapper(LLMWrapperBase):
         return out
     def _from_transformers_logprobs_tokens(
-        self, td: TensorDictBase, cfg: dict | None, out: TensorDictBase
+        self,
+        td: TensorDictBase,
+        cfg: dict | None,
+        out: TensorDictBase,
+        logits_only=False,
     ) -> TensorDictBase:
         """Compute log-probs from tokens input."""
         # Validate input
@@ -1470,6 +1502,7 @@ class TransformersWrapper(LLMWrapperBase):
             input_ids_full_padded,
             attention_mask_full_padded,
             self.tokenizer.pad_token_id,
+            logits_only=logits_only,
         )
         # Build output TensorClass objects
@@ -1514,19 +1547,20 @@ class TransformersWrapper(LLMWrapperBase):
         masks_obj.padded = MetaData(self.pad_output)
         out.set(self.masks_key, masks_obj)
-        log_probs_obj = LogProbs._from_tensordict(
-            TensorDict(batch_size=out.batch_size).to_lazystack(0)
-        )
-        if self.pad_output:
-            log_probs_obj.full = log_probs_full_padded
-        else:
-            log_probs_full_unpadded = _unpad_tensors(
-                log_probs_full_padded, attention_mask_full_padded, as_nested=False
+        if not logits_only:
+            log_probs_obj = LogProbs._from_tensordict(
+                TensorDict(batch_size=out.batch_size).to_lazystack(0)
             )
-            log_probs_obj.full = log_probs_full_unpadded
-        log_probs_obj.response = None
-        log_probs_obj.padded = MetaData(self.pad_output)
-        out.set(self.log_probs_key, log_probs_obj)
+            if self.pad_output:
+                log_probs_obj.full = log_probs_full_padded
+            else:
+                log_probs_full_unpadded = _unpad_tensors(
+                    log_probs_full_padded, attention_mask_full_padded, as_nested=False
+                )
+                log_probs_obj.full = log_probs_full_unpadded
+            log_probs_obj.response = None
+            log_probs_obj.padded = MetaData(self.pad_output)
+            out.set(self.log_probs_key, log_probs_obj)
         # Add logits to output if we're in a get_dist call
         if self._in_get_dist_call:
@@ -1567,7 +1601,7 @@ class TransformersWrapper(LLMWrapperBase):
         return log_probs, logits
     def _compute_log_probs_from_model_output(
-        self, model_output, input_ids, attention_mask, pad_val
+        self, model_output, input_ids, attention_mask, pad_val, logits_only=False
     ):
         """Compute log-probs from model output without modifying original tensors.
@@ -1576,6 +1610,7 @@ class TransformersWrapper(LLMWrapperBase):
             input_ids: Original input token ids
             attention_mask: Original attention mask
             pad_val: Padding token value to ignore in loss computation
+            logits_only: Whether to return only the logits.
         Returns:
             tuple: (log_probs, shifted_logits) where log_probs are the computed log probabilities
@@ -1600,6 +1635,8 @@ class TransformersWrapper(LLMWrapperBase):
             raise ValueError(
                 f"The logits shape {shifted_logits.shape} does not match the input ids shape {shifted_input_ids.shape}"
             )
+        if logits_only:
+            return None, shifted_logits
         # Compute log-probs
         td = TensorDict(

torchrl/modules/llm/policies/vllm_wrapper.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Any, Literal
 import torch
 from tensordict import (
     lazy_stack,
+    LazyStackedTensorDict,
     MetaData,
     NonTensorStack,
     set_list_to_stack,
@@ -500,19 +501,32 @@ class vLLMWrapper(LLMWrapperBase):
     def forward(
         self,
         tensordict: TensorDictBase,
+        *,
         tensordict_out: TensorDictBase | None = None,
+        logits_only: bool = False,
         **kwargs,
     ) -> TensorDictBase:
+        tensordict_orig = tensordict
         if not tensordict.ndim:
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim == 0. If this is needed, "
+                    "please submit an issue on github."
+                )
             # unsqueeze - squeeze the input
-            try:
-                return self(lazy_stack([tensordict])).squeeze(0)
-            except Exception as e:
-                raise RuntimeError(
-                    f"Unsqueeze/squeeze failed. Inputs to {type(self).__name__} should ideally be 1 dimensional."
-                ) from e
+            return self.forward(lazy_stack([tensordict]), logits_only=logits_only)[0]
         elif tensordict.ndim > 1:
-            return self(tensordict.reshape(-1)).view(tensordict.shape)
+            if tensordict_out is not None:
+                raise ValueError(
+                    "tensordict_out must not be provided when tensordict.ndim > 1. If this is needed, "
+                    "please submit an issue on github."
+                )
+            return self.forward(tensordict.reshape(-1), logits_only=logits_only).view(
+                tensordict.shape
+            )
+        if not isinstance(tensordict, LazyStackedTensorDict):
+            tensordict = tensordict.to_lazystack(0)
         _source_device = None
         if self._device:
@@ -567,7 +581,7 @@ class vLLMWrapper(LLMWrapperBase):
         if tensordict_out is None:
             if self.inplace is True:
                 # The output is the input
-                tensordict_out = tensordict
+                tensordict_out = tensordict_orig
             elif self.inplace is False:
                 # The output is the new structure
                 tensordict_out = out
@@ -1242,12 +1256,14 @@ class vLLMWrapper(LLMWrapperBase):
         generate_kwargs = {"sampling_params": sampling_params}
         args = ()
+        empirical_attention_mask = None
         if tokens_prompt_unpadded is None:
             # TODO: To be on the safe side, we may do this even in the unpadded case since we're not sure
             #  the user passed an unpadded tensor in the first place.
+            empirical_attention_mask = tokens_prompt_padded != self.padding_value
             tokens_prompt_list = self._to_list(
-                tokens_prompt_padded, tokens_prompt_padded != self.padding_value
+                tokens_prompt_padded, empirical_attention_mask
             )
         else:
             tokens_prompt_list = self._to_list(tokens_prompt_unpadded, None)
@@ -1365,6 +1381,22 @@ class vLLMWrapper(LLMWrapperBase):
                         padding_value=self.padding_value,
                         padding_side="right",
                     )
+                    if (
+                        prompt_logprobs_padded.shape[-1]
+                        != tokens_prompt_padded.shape[-1]
+                    ):
+                        tshape = tokens_prompt_padded.shape
+                        oshape = prompt_logprobs_padded.shape
+                        # it could be that the input was padded already - padding again then
+                        prompt_logprobs_padded = torch.cat(
+                            [
+                                prompt_logprobs_padded.new_zeros(
+                                    tshape[:-1] + (tshape[-1] - oshape[-1],)
+                                ),
+                                prompt_logprobs_padded,
+                            ],
+                            -1,
+                        )
                 else:
                     prompt_logprobs_list = request_output_tc.get(
                         "prompt_logprobs",
@@ -1490,26 +1522,21 @@ class vLLMWrapper(LLMWrapperBase):
         request_output_tc = _RequestOutput_tc.from_request_output(tokens_out_stuct)
+        # For unpadded case, extract from each sequence
+        log_probs_full_unpadded = request_output_tc.get("prompt_logprobs", as_list=True)
         # Extract log-probs from prompt_logprobs
         if self.pad_output:
             # For padded case, use all prompt_logprobs
-            log_probs_full_padded = request_output_tc.get(
-                "prompt_logprobs",
-                as_padded_tensor=True,
-                padding_value=0,
-                padding_side="left",
+            if attention_mask_full_padded is not None:
+                attention_mask_full_padded = tokens_full_padded != self.padding_value
+            log_probs_full_padded = torch.zeros_like(
+                tokens_full_padded, dtype=torch.get_default_dtype()
             )
-            # Mask out padding
-            attention_mask_full_padded = tokens_full_padded != self.padding_value
-            log_probs_full_padded = torch.where(
-                attention_mask_full_padded, log_probs_full_padded, 0.0
+            log_probs_full_padded[attention_mask_full_padded] = torch.cat(
+                log_probs_full_unpadded, -1
             )
         else:
-            # For unpadded case, extract from each sequence
-            log_probs_full_unpadded = request_output_tc.get(
-                "prompt_logprobs", as_list=True
-            )
             self._check_not_padded(log_probs_full_unpadded)
         assistant_mask_full_padded = None

torchrl/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = '2025.7.16'
-git_version = '361a8da6edc77979e17409cf19396230d18c18a9'
+__version__ = '2025.7.18'
+git_version = '4001d9cb73cea4498b0fdfe420effc58a5a336be'

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torchrl-nightly
-Version: 2025.7.16
+Version: 2025.7.18
 Home-page: https://github.com/pytorch/rl
 Author: torchrl contributors
 Author-email: vmoens@fb.com

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.18.dist-info}/RECORD RENAMED Viewed

@@ -3,11 +3,11 @@ build_tools/setup_helpers/__init__.py,sha256=7l8TvVqxKezgzKCLuRv20mvGLloprFVZYm8
 build_tools/setup_helpers/extension.py,sha256=4-PDLr-pw40bJnd9SfxnTaSjUyuXU_Tg8yOg69Kl0o4,5914
 torchrl/__init__.py,sha256=mhDBx2UIuBKc0gmi8dVNHokQ6tCbIovruZmyAxcSsy8,2938
 torchrl/_extension.py,sha256=z7wQ8i1iYWYcnygq_j0nq9sT-koY13tfHhTLNbMk17Q,2353
-torchrl/_torchrl.cpython-312-darwin.so,sha256=zS0hetp6isBEoIh_SXjhZyCJ4bFrliGorbSMuxe8Luk,1692224
+torchrl/_torchrl.cpython-312-darwin.so,sha256=k_jisocYRQ0Z1X52Pu2ym9-02BXMnLrD7q0fOpElam8,1692224
 torchrl/_utils.py,sha256=Cw5EG6x5oSZF1iE3YCs1a32VUKp0rTXIs2u67q9zKUI,41078
-torchrl/version.py,sha256=aShA8Hert0ILH-D5pqVnCE_n2SCgvZPjiPpxsO1x4eM,83
+torchrl/version.py,sha256=MHs4CxNjQupYI_f84bY7dOAAfPSU9yN6TOyxiS7tS8c,83
 torchrl/collectors/__init__.py,sha256=hJ3JD6shRku0BL6SzJQq44FZ5Q1RGR8LealFyU3FRn4,799
-torchrl/collectors/collectors.py,sha256=WoeR-MAfzcLiy8EHPWQ3uknm_jTWjA9Wi45CODG8NZI,177782
+torchrl/collectors/collectors.py,sha256=HpaW-y0bQOaOql8_7VyEPJ084CulrVwn6iBpGYoHyH4,178287
 torchrl/collectors/utils.py,sha256=MlXrkYuDmV0Em-tVNQiLL32FWgPNDgceYYG_GgpiviA,11320
 torchrl/collectors/weight_update.py,sha256=nSIfs8ALsfggLoC2ylg1oOAqdGku1tt4e-50JCZJBww,21073
 torchrl/collectors/distributed/__init__.py,sha256=_24P0ALFunLhL-ls7EsssGUhJkZ_m3nw7krfMTwPqS0,705
@@ -41,7 +41,7 @@ torchrl/data/datasets/utils.py,sha256=nAFDTlBIPyEoPoJC-Hc_fcOhzE7UZQE4BwKxq15Vhv
 torchrl/data/datasets/vd4rl.py,sha256=z90MqrxKzod8TPGK0uzkC6vw5wQIE4cgrDAC4e72jyk,18262
 torchrl/data/llm/__init__.py,sha256=B4Ekok-w5PMiWcfmAGXaseaN6hWdNOr4WebeLrHfBVQ,975
 torchrl/data/llm/dataset.py,sha256=t-41hAzQcjrdoKwpHIMbcrT7pRcQ7DHl2a1-lr6E7W4,20703
-torchrl/data/llm/history.py,sha256=Tzkwmc37C9vYjVw_x1wblyENNZSV67srBEioO2j4v2c,57857
+torchrl/data/llm/history.py,sha256=l9JSxIO5eLUFwHH5IZkANSrByYa8BGmtxMlNXYf2fbs,59640
 torchrl/data/llm/prompt.py,sha256=bg5LzJfwOq5Ns72KQMciIprMWAmDDinzdopwdopU04c,8380
 torchrl/data/llm/reward.py,sha256=FbPchNXG3smJV9NCbB5Yk4grsCa2Se4KZ_tojVLKWQM,8404
 torchrl/data/llm/topk.py,sha256=mYXCgJS4TuEVLZfTNccQd6kmC858AAh2Ygy0q_K1hlY,8365
@@ -139,7 +139,7 @@ torchrl/envs/transforms/vip.py,sha256=kmygbenw75rEYsKRq4X1hzEH_CRe1406NZZ8Hg2R_V
 torchrl/modules/__init__.py,sha256=XlAO0hulhDQNcKhbu3cFi8KJOHXNiAgmXeTfny0WBqE,4157
 torchrl/modules/distributions/__init__.py,sha256=RDFoYD9IX1FhwXk5R4M8khq42gdTOcVnUnKHfWCTZBQ,1597
 torchrl/modules/distributions/continuous.py,sha256=VPBugDuavJmyZ-RzemyLIFA02UCMLsm-rzBQrKcTlIA,25667
-torchrl/modules/distributions/discrete.py,sha256=czQSNkacxgZcExKONzDRPZjCJPbfAVaS7fC7Igdp708,35555
+torchrl/modules/distributions/discrete.py,sha256=7UE6X8LeTZkaTRFvKNcFSOoug_tOcD_u-FOh-39ZSC4,35581
 torchrl/modules/distributions/truncated_normal.py,sha256=-qM8vwxTzv3VsWphZwcueDQpHQ67IRnkDFKlTDkQQnY,5937
 torchrl/modules/distributions/utils.py,sha256=kXRvNHeKUePIgKgn7DnKqbhQ6ImFGgkFVRxITX2dwNU,7567
 torchrl/modules/llm/__init__.py,sha256=BTkn-8QKp_8sW_NTKP02yoWSJUsX0XL6L9chTJl6epc,737
@@ -147,9 +147,9 @@ torchrl/modules/llm/utils.py,sha256=gf_F-4bEMwkcI3jLQM7ifB7nsjRctGebB5E2c-AznO0,
 torchrl/modules/llm/backends/__init__.py,sha256=WdVy9EdiAfk8i5zFa49TEkRvcUd0L4Un4v6wqWBy8l8,438
 torchrl/modules/llm/backends/vllm.py,sha256=x57Xop1xd5ZShicsh47ZFmz4VpfZ3eCzVx7k0COvpqQ,9387
 torchrl/modules/llm/policies/__init__.py,sha256=nfZ2mcVuucxnY3WCuzIQrTLIf1yEd36k8-AlvwnSa8Y,545
-torchrl/modules/llm/policies/common.py,sha256=jmWoaR6fgKQ5lKa9HqDVW73K3oySpb2zO3WJlNTz6iQ,38117
-torchrl/modules/llm/policies/transformers_wrapper.py,sha256=HTkubIsbEui2hWqAZ3GwsATI2NGmA0kry1nW5RjnEJ0,74326
-torchrl/modules/llm/policies/vllm_wrapper.py,sha256=u0ITRdVI8pNhpRRMy2yXEh9bK_TkYRUOUEzix2m2aR0,78231
+torchrl/modules/llm/policies/common.py,sha256=Kvn1cJQbp1EZtxWpAQ50TzZkwVtLAmryqiBHH2nK_wM,39112
+torchrl/modules/llm/policies/transformers_wrapper.py,sha256=oi-2KALM0pkH-u-Kd6WlnxfH9eGV2GzBqM410ANpPeM,75777
+torchrl/modules/llm/policies/vllm_wrapper.py,sha256=ReBvi2M9IAiwwBAR7GpDLSQhX0aC-dXPnHYb082Q0To,79632
 torchrl/modules/models/__init__.py,sha256=DrOG-7hynjjUh_tc2EqysiUiNMRiDR0WLtZql9TPNcI,1743
 torchrl/modules/models/batchrenorm.py,sha256=TojpTUluIcFdTSemIVRLGtB2O5q54mRHy3vJP6DuI5I,4750
 torchrl/modules/models/decision_transformer.py,sha256=Lttf_wZMNqXbB_vpxMYgEp18gEzOvm3NvMnxQkHkH4M,6604
@@ -223,8 +223,8 @@ torchrl/trainers/helpers/losses.py,sha256=sHlJqjh02t8cKN73X35Azd_OoWGurohLuviB8Y
 torchrl/trainers/helpers/models.py,sha256=ihTERG2c96E8cS3Tnul6a_ys6iDEEJmHh05p9blQTW8,21807
 torchrl/trainers/helpers/replay_buffer.py,sha256=ZUZHOa0TILyeWJ3iahzTJ6UvMl_0FdxuZfJEja94Bn8,2001
 torchrl/trainers/helpers/trainers.py,sha256=j6B5XA7_FFHMQeOIQwjNcO0CGE_4mZKUC9_jH_iqqh4,12071
-torchrl_nightly-2025.7.16.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
-torchrl_nightly-2025.7.16.dist-info/METADATA,sha256=vNEXmcQPsZZGDFx6pCdysVMqIIBiKpH4RpRNO6AMuTs,42990
-torchrl_nightly-2025.7.16.dist-info/WHEEL,sha256=9_3tTSxMJq-dgdzMiScNvtT5eTBVd3l6RgHS7HwTzpA,115
-torchrl_nightly-2025.7.16.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
-torchrl_nightly-2025.7.16.dist-info/RECORD,,
+torchrl_nightly-2025.7.18.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
+torchrl_nightly-2025.7.18.dist-info/METADATA,sha256=K_Nmn84sw1xeD28lqIPqdhLjdaFchSMXuG2vjAajTn0,42990
+torchrl_nightly-2025.7.18.dist-info/WHEEL,sha256=9_3tTSxMJq-dgdzMiScNvtT5eTBVd3l6RgHS7HwTzpA,115
+torchrl_nightly-2025.7.18.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
+torchrl_nightly-2025.7.18.dist-info/RECORD,,

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.7.16.dist-info → torchrl_nightly-2025.7.18.dist-info}/top_level.txt RENAMED Viewed

File without changes