PyPI - torchrl - Versions diffs - 0.10.0__cp39-cp39-win_amd64.whl → 0.10.1__cp39-cp39-win_amd64.whl - Mend

torchrl 0.10.0__cp39-cp39-win_amd64.whl → 0.10.1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

benchmarks/test_llm.py +5 -0
torchrl/__init__.py +4 -1
torchrl/_torchrl.cp39-win_amd64.pyd +0 -0
torchrl/_utils.py +3 -1
torchrl/collectors/collectors.py +11 -10
torchrl/collectors/distributed/generic.py +3 -3
torchrl/collectors/distributed/ray.py +10 -4
torchrl/collectors/distributed/rpc.py +3 -3
torchrl/collectors/distributed/sync.py +3 -3
torchrl/data/map/tree.py +2 -2
torchrl/data/tensor_specs.py +191 -8
torchrl/envs/batched_envs.py +1 -1
torchrl/envs/common.py +1 -1
torchrl/envs/custom/llm.py +3 -3
torchrl/envs/llm/envs.py +3 -3
torchrl/envs/transforms/transforms.py +2 -2
torchrl/modules/distributions/discrete.py +1 -1
torchrl/modules/llm/backends/vllm/vllm_async.py +1 -1
torchrl/modules/llm/policies/transformers_wrapper.py +2 -1
torchrl/modules/llm/policies/vllm_wrapper.py +1 -0
torchrl/objectives/a2c.py +3 -3
torchrl/objectives/cql.py +2 -2
torchrl/objectives/crossq.py +2 -2
torchrl/objectives/ddpg.py +1 -1
torchrl/objectives/decision_transformer.py +2 -2
torchrl/objectives/deprecated.py +2 -2
torchrl/objectives/dqn.py +4 -4
torchrl/objectives/gail.py +1 -1
torchrl/objectives/iql.py +4 -4
torchrl/objectives/multiagent/qmixer.py +1 -1
torchrl/objectives/redq.py +2 -2
torchrl/objectives/reinforce.py +3 -3
torchrl/objectives/sac.py +5 -5
torchrl/objectives/td3.py +2 -2
torchrl/objectives/td3_bc.py +2 -2
torchrl/record/loggers/wandb.py +3 -3
{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/METADATA +1 -1
{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/RECORD +42 -41
torchrl-0.10.1.dist-info/entry_points.txt +2 -0
{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/LICENSE +0 -0
{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/WHEEL +0 -0
{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/top_level.txt +0 -0

benchmarks/test_llm.py CHANGED Viewed

@@ -16,6 +16,11 @@ from torchrl.modules.llm.policies.transformers_wrapper import TransformersWrappe
 _has_transformers = importlib.import_module("transformers") is not None
+# Skip all these tests if gpu is not available
+pytestmark = pytest.mark.skipif(
+    not torch.cuda.is_available(), reason="GPU not available"
+)
 @pytest.fixture(scope="module")
 def transformers_wrapper():

torchrl/__init__.py CHANGED Viewed

@@ -27,7 +27,10 @@ from ._extension import _init_extension
 try:
     from .version import __version__
 except ImportError:
-    __version__ = "0.0.0+unknown"
+    try:
+        from ._version import __version__
+    except ImportError:
+        __version__ = "0.0.0+unknown"
 try:
     from torch.compiler import is_dynamo_compiling

torchrl/_torchrl.cp39-win_amd64.pyd CHANGED Viewed

Binary file

torchrl/_utils.py CHANGED Viewed

@@ -410,7 +410,9 @@ def accept_remote_rref_udf_invocation(decorated_class):
     """Class decorator that applies `accept_remote_rref_invocation` to all public methods."""
     # ignores private methods
     for name in dir(decorated_class):
-        method = getattr(decorated_class, name)
+        method = getattr(decorated_class, name, None)
+        if method is None:
+            continue
         if callable(method) and not name.startswith("_"):
             setattr(decorated_class, name, accept_remote_rref_invocation(method))
     return decorated_class

torchrl/collectors/collectors.py CHANGED Viewed

@@ -283,12 +283,13 @@ class DataCollectorBase(IterableDataset, metaclass=abc.ABCMeta):
     ) -> None:
         """Shuts down the collector when started asynchronously with the `start` method.
-        Arg:
+        Args:
             timeout (float, optional): The maximum time to wait for the collector to shutdown.
             close_env (bool, optional): If True, the collector will close the contained environment.
                 Defaults to `True`.
         .. seealso:: :meth:`~.start`
         """
         return self.shutdown(timeout=timeout, close_env=close_env)
@@ -440,7 +441,7 @@ class SyncDataCollector(DataCollectorBase):
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], optional): a callable that returns
@@ -1784,7 +1785,7 @@ class _MultiDataCollector(DataCollectorBase):
               ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], list of Callable[[], Callable], optional): a callable
@@ -2749,8 +2750,8 @@ class MultiSyncDataCollector(_MultiDataCollector):
         ...         if i == 2:
         ...             print(data)
         ...             break
-        >>> collector.shutdown()
-        >>> del collector
+        ...     collector.shutdown()
+        ...     del collector
         TensorDict(
             fields={
                 action: Tensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.float32, is_shared=False),
@@ -3130,8 +3131,8 @@ class MultiaSyncDataCollector(_MultiDataCollector):
         ...         if i == 2:
         ...             print(data)
         ...             break
-        ... collector.shutdown()
-        ... del collector
+        ...     collector.shutdown()
+        ...     del collector
         TensorDict(
             fields={
                 action: Tensor(shape=torch.Size([200, 1]), device=cpu, dtype=torch.float32, is_shared=False),
@@ -3366,7 +3367,7 @@ class aSyncDataCollector(MultiaSyncDataCollector):
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], optional): a callable that returns
@@ -3380,8 +3381,8 @@ class aSyncDataCollector(MultiaSyncDataCollector):
             total number of frames returned by the collector
             during its lifespan. If the ``total_frames`` is not divisible by
             ``frames_per_batch``, an exception is raised.
-             Endless collectors can be created by passing ``total_frames=-1``.
-             Defaults to ``-1`` (never ending collector).
+            Endless collectors can be created by passing ``total_frames=-1``.
+            Defaults to ``-1`` (never ending collector).
         device (int, str or torch.device, optional): The generic device of the
             collector. The ``device`` args fills any non-specified device: if
             ``device`` is not ``None`` and any of ``storing_device``, ``policy_device`` or

torchrl/collectors/distributed/generic.py CHANGED Viewed

@@ -282,7 +282,7 @@ class DistributedDataCollector(DataCollectorBase):
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], list of Callable[[], Callable], optional): a callable
@@ -296,8 +296,8 @@ class DistributedDataCollector(DataCollectorBase):
             number of frames returned by the collector
             during its lifespan. If the ``total_frames`` is not divisible by
             ``frames_per_batch``, an exception is raised.
-             Endless collectors can be created by passing ``total_frames=-1``.
-             Defaults to ``-1`` (endless collector).
+            Endless collectors can be created by passing ``total_frames=-1``.
+            Defaults to ``-1`` (endless collector).
         device (int, str or torch.device, optional): The generic device of the
             collector. The ``device`` args fills any non-specified device: if
             ``device`` is not ``None`` and any of ``storing_device``, ``policy_device`` or

torchrl/collectors/distributed/ray.py CHANGED Viewed

@@ -131,7 +131,7 @@ class RayCollector(DataCollectorBase):
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], list of Callable[[], Callable], optional): a callable
@@ -263,6 +263,10 @@ class RayCollector(DataCollectorBase):
             If not provided, a :class:`~torchrl.collectors.RayWeightUpdater` will be used by default, leveraging
             Ray's distributed capabilities.
             Consider using a constructor if the updater needs to be serialized.
+        use_env_creator (bool, optional): if ``True``, the environment constructor functions will be wrapped
+            in :class:`~torchrl.envs.EnvCreator`. This is useful for multiprocessed settings where shared memory
+            needs to be managed, but Ray has its own object storage mechanism, so this is typically not needed.
+            Defaults to ``False``.
     Examples:
         >>> from torch import nn
@@ -326,6 +330,7 @@ class RayCollector(DataCollectorBase):
         weight_updater: WeightUpdaterBase
         | Callable[[], WeightUpdaterBase]
         | None = None,
+        use_env_creator: bool = False,
     ):
         self.frames_per_batch = frames_per_batch
         if remote_configs is None:
@@ -400,9 +405,10 @@ class RayCollector(DataCollectorBase):
         create_env_fn, collector_kwargs, remote_configs = out_lists
         num_collectors = len(create_env_fn)
-        for i in range(len(create_env_fn)):
-            if not isinstance(create_env_fn[i], (EnvBase, EnvCreator)):
-                create_env_fn[i] = EnvCreator(create_env_fn[i])
+        if use_env_creator:
+            for i in range(len(create_env_fn)):
+                if not isinstance(create_env_fn[i], (EnvBase, EnvCreator)):
+                    create_env_fn[i] = EnvCreator(create_env_fn[i])
         # If ray available, try to connect to an existing Ray cluster or start one and connect to it.
         if not _has_ray:

torchrl/collectors/distributed/rpc.py CHANGED Viewed

@@ -121,7 +121,7 @@ class RPCDataCollector(DataCollectorBase):
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], list of Callable[[], Callable], optional): a callable
@@ -135,8 +135,8 @@ class RPCDataCollector(DataCollectorBase):
             number of frames returned by the collector
             during its lifespan. If the ``total_frames`` is not divisible by
             ``frames_per_batch``, an exception is raised.
-             Endless collectors can be created by passing ``total_frames=-1``.
-             Defaults to ``-1`` (endless collector).
+            Endless collectors can be created by passing ``total_frames=-1``.
+            Defaults to ``-1`` (endless collector).
         device (int, str or torch.device, optional): The generic device of the
             collector. The ``device`` args fills any non-specified device: if
             ``device`` is not ``None`` and any of ``storing_device``, ``policy_device`` or

torchrl/collectors/distributed/sync.py CHANGED Viewed

@@ -158,7 +158,7 @@ class DistributedSyncDataCollector(DataCollectorBase):
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
             .. note:: If the policy needs to be passed as a policy factory (e.g., in case it mustn't be serialized /
-                pickled directly), the :arg:`policy_factory` should be used instead.
+                pickled directly), the ``policy_factory`` should be used instead.
     Keyword Args:
         policy_factory (Callable[[], Callable], list of Callable[[], Callable], optional): a callable
@@ -172,8 +172,8 @@ class DistributedSyncDataCollector(DataCollectorBase):
             number of frames returned by the collector
             during its lifespan. If the ``total_frames`` is not divisible by
             ``frames_per_batch``, an exception is raised.
-             Endless collectors can be created by passing ``total_frames=-1``.
-             Defaults to ``-1`` (endless collector).
+            Endless collectors can be created by passing ``total_frames=-1``.
+            Defaults to ``-1`` (endless collector).
         device (int, str or torch.device, optional): The generic device of the
             collector. The ``device`` args fills any non-specified device: if
             ``device`` is not ``None`` and any of ``storing_device``, ``policy_device`` or

torchrl/data/map/tree.py CHANGED Viewed

@@ -610,7 +610,7 @@ class Tree(TensorClass["nocast"]):
         This function can pull out information from each of the nodes in a tree,
         so it can be useful for debugging. The nodes are listed line-by-line.
         Each line contains the path to the node, followed by the string
-        representation of that node generated with :arg:`node_format_fn`. Each
+        representation of that node generated with ``node_format_fn``. Each
         line is indented according to number of steps in the path required to
         get to the corresponding node.
@@ -1370,7 +1370,7 @@ class MCTSForest:
         This function can pull out information from each of the nodes in a tree,
         so it can be useful for debugging. The nodes are listed line-by-line.
         Each line contains the path to the node, followed by the string
-        representation of that node generated with :arg:`node_format_fn`. Each
+        representation of that node generated with ``node_format_fn``. Each
         line is indented according to number of steps in the path required to
         get to the corresponding node.

torchrl/data/tensor_specs.py CHANGED Viewed

@@ -13,7 +13,7 @@ import math
 import warnings
 import weakref
 from collections.abc import Callable, Iterable, Mapping, Sequence
-from copy import deepcopy
+from copy import copy, deepcopy
 from dataclasses import dataclass, field
 from functools import wraps
 from textwrap import indent
@@ -5095,6 +5095,7 @@ class Composite(TensorSpec):
     shape: torch.Size
     domain: str = "composite"
+    _td_dim_names: list[str] | None = None
     SPEC_HANDLED_FUNCTIONS = {}
@@ -5111,6 +5112,7 @@ class Composite(TensorSpec):
         device: torch.device | None = None,
         data_cls: type | None = None,
         step_mdp_static: bool = False,
+        names: Sequence[str] | None = None,
         **kwargs,
     ):
         # For compatibility with TensorDict
@@ -5126,6 +5128,12 @@ class Composite(TensorSpec):
         self._specs = {}
         self.step_mdp_static = step_mdp_static
+        # Initialize names
+        if names is not None:
+            self._td_dim_names = list(names)
+        else:
+            self._td_dim_names = None
         _device = (
             _make_ordinal_device(torch.device(device)) if device is not None else device
         )
@@ -5142,6 +5150,8 @@ class Composite(TensorSpec):
                 )
             for k, item in argdict.items():
                 if isinstance(item, dict):
+                    # Create nested Composite with appropriate names
+                    # Note: nested specs will get their names propagated later in the names setter
                     item = Composite(item, shape=shape, device=_device)
                 self[k] = item
         for k, item in kwargs.items():
@@ -5150,6 +5160,10 @@ class Composite(TensorSpec):
         self.encode = self._encode_eager
         self._encode_memo_dict = {}
+        # Propagate names to nested specs if names were provided
+        if names is not None:
+            self._propagate_names_to_nested()
     def memoize_encode(self, mode: bool = True) -> None:
         super().memoize_encode(mode=mode)
         for spec in self._specs.values():
@@ -5354,6 +5368,127 @@ class Composite(TensorSpec):
             spec.clear_device_()
         return self
+    def _has_names(self):
+        """Returns True if names are set for this Composite."""
+        return self._td_dim_names is not None
+    def _erase_names(self):
+        """Erases the names of this Composite."""
+        self._td_dim_names = None
+    def _propagate_names_to_nested(self):
+        """Propagates names to nested Composite specs."""
+        if not self._has_names():
+            return
+        for spec in self._specs.values():
+            if isinstance(spec, Composite):
+                # For nested specs, we need to propagate the names
+                # The nested spec should have the same leading dimensions
+                if spec.ndim >= self.ndim:
+                    nested_names = list(self.names) + [None] * (spec.ndim - self.ndim)
+                    spec.names = nested_names
+    @property
+    def names(self):
+        """Returns the names of the dimensions of this Composite."""
+        names = self._td_dim_names
+        if names is None:
+            return [None for _ in range(self.ndim)]
+        # Return a copy but don't use copy to make dynamo happy
+        return list(names)
+    @names.setter
+    def names(self, value):
+        """Sets the names of the dimensions of this Composite."""
+        if value is None:
+            self._td_dim_names = None
+            return
+        if len(value) != self.ndim:
+            raise ValueError(
+                f"Expected {self.ndim} names, but got {len(value)} names: {value}"
+            )
+        self._td_dim_names = list(value)
+        # Propagate names to nested Composite specs
+        for spec in self._specs.values():
+            if isinstance(spec, Composite):
+                # For nested specs, we need to propagate the names
+                # The nested spec should have the same leading dimensions
+                if spec.ndim >= self.ndim:
+                    nested_names = list(value) + [None] * (spec.ndim - self.ndim)
+                    spec.names = nested_names
+    def refine_names(self, *names):
+        """Refines the dimension names of self according to names.
+        Refining is a special case of renaming that "lifts" unnamed dimensions.
+        A None dim can be refined to have any name; a named dim can only be
+        refined to have the same name.
+        Because named specs can coexist with unnamed specs, refining names
+        gives a nice way to write named-spec-aware code that works with both
+        named and unnamed specs.
+        names may contain up to one Ellipsis (...). The Ellipsis is expanded
+        greedily; it is expanded in-place to fill names to the same length as
+        self.ndim using names from the corresponding indices of self.names.
+        Returns: the same composite spec with dimensions named according to the input.
+        Examples:
+            >>> spec = Composite({}, shape=[3, 4, 5, 6])
+            >>> spec_refined = spec.refine_names(None, None, None, "d")
+            >>> assert spec_refined.names == [None, None, None, "d"]
+            >>> spec_refined = spec.refine_names("a", None, None, "d")
+            >>> assert spec_refined.names == ["a", None, None, "d"]
+        """
+        # replace ellipsis if any
+        names_copy = copy(names)
+        if any(name is Ellipsis for name in names):
+            ellipsis_name = [None for _ in range(self.ndim - len(names) + 1)]
+            names = []
+            for name in names_copy:
+                if name is Ellipsis:
+                    names += ellipsis_name
+                else:
+                    names.append(name)
+        # check that the names that are set are either None or identical
+        curr_names = self.names
+        for i, name in enumerate(names):
+            if curr_names[i] is None:
+                continue
+            if curr_names[i] == name:
+                continue
+            else:
+                raise RuntimeError(
+                    f"refine_names: cannot coerce Composite names {self.names} with {names_copy}."
+                )
+        self.names = names
+        return self
+    def _get_names_idx(self, idx):
+        """Helper method to get names after indexing."""
+        if not self._has_names():
+            return None
+        names = copy(self.names)
+        if isinstance(idx, (int, slice)):
+            # Single dimension indexing
+            if isinstance(idx, int):
+                names.pop(idx)
+            else:
+                # For slice, we keep the names but adjust for the slice
+                pass
+        elif isinstance(idx, tuple):
+            # Multi-dimensional indexing
+            for i, sub_idx in enumerate(idx):
+                if isinstance(sub_idx, int):
+                    # Remove the dimension
+                    names.pop(i)
+                # For slices, we keep the name
+        return names
     def __getitem__(self, idx):
         """Indexes the current Composite based on the provided index."""
         if isinstance(idx, (str, tuple)):
@@ -5393,10 +5528,15 @@ class Composite(TensorSpec):
         except RuntimeError:
             device = self._device
+        names = None
+        if self._has_names():
+            names = self._get_names_idx(idx)
         return self.__class__(
             indexed_specs,
             shape=indexed_shape,
             device=device,
+            names=names,
         )
     def get(self, item, default=NO_DEFAULT):
@@ -5600,16 +5740,22 @@ class Composite(TensorSpec):
         for key, item in self.items():
             if item is not None:
                 _dict[key] = item.rand(shape)
-        if self.data_cls is None:
-            cls = TensorDict
+        cls = self.data_cls if self.data_cls is not None else TensorDict
+        if cls is not TensorDict:
+            kwargs = {}
+            if self._td_dim_names is not None:
+                warnings.warn(f"names for cls {cls} is not supported for rand.")
         else:
-            cls = self.data_cls
+            kwargs = {"names": self._td_dim_names}
         # No need to run checks since we know Composite is compliant with
         # TensorDict requirements
         return cls.from_dict(
             _dict,
             batch_size=_size([*shape, *_remove_neg_shapes(self.shape)]),
             device=self.device,
+            **kwargs,
         )
     def keys(
@@ -5760,6 +5906,7 @@ class Composite(TensorSpec):
                 shape=self.shape,
                 data_cls=self.data_cls,
                 step_mdp_static=self.step_mdp_static,
+                names=self.names if self._has_names() else None,
             )
         if not isinstance(dest, (str, int, torch.device)):
             raise ValueError(
@@ -5782,6 +5929,7 @@ class Composite(TensorSpec):
             shape=self.shape,
             data_cls=self.data_cls,
             step_mdp_static=self.step_mdp_static,
+            names=self.names if self._has_names() else None,
         )
     def clone(self) -> Composite:
@@ -5802,6 +5950,7 @@ class Composite(TensorSpec):
             shape=self.shape,
             data_cls=self.data_cls,
             step_mdp_static=self.step_mdp_static,
+            names=self.names if self._has_names() else None,
         )
     def cardinality(self) -> int:
@@ -5874,10 +6023,13 @@ class Composite(TensorSpec):
         except RuntimeError:
             device = self._device
-        if self.data_cls is not None:
-            cls = self.data_cls
+        cls = self.data_cls if self.data_cls is not None else TensorDict
+        if cls is not TensorDict:
+            kwargs = {}
+            if self._td_dim_names is not None:
+                warnings.warn(f"names for cls {cls} is not supported for zero.")
         else:
-            cls = TensorDict
+            kwargs = {"names": self._td_dim_names}
         return cls.from_dict(
             {
@@ -5887,6 +6039,7 @@ class Composite(TensorSpec):
             },
             batch_size=_size([*shape, *self._safe_shape]),
             device=device,
+            **kwargs,
         )
     def __eq__(self, other: object) -> bool:
@@ -5942,12 +6095,17 @@ class Composite(TensorSpec):
             else None
             for key, value in tuple(self.items())
         }
+        names = None
+        if self._has_names():
+            names = [None] * (len(shape) - self.ndim) + self.names
         out = Composite(
             specs,
             shape=shape,
             device=device,
             data_cls=self.data_cls,
             step_mdp_static=self.step_mdp_static,
+            names=names,
         )
         return out
@@ -5965,12 +6123,21 @@ class Composite(TensorSpec):
             except RuntimeError:
                 device = self._device
+            names = None
+            if self._has_names():
+                names = copy(self.names)
+                names.pop(dim)
+                # If all names are None after popping, set to None
+                if all(name is None for name in names):
+                    names = None
             return self.__class__(
                 {key: value.squeeze(dim) for key, value in self.items()},
                 shape=shape,
                 device=device,
                 data_cls=self.data_cls,
                 step_mdp_static=self.step_mdp_static,
+                names=names,
             )
         if self.shape.count(1) == 0:
@@ -5993,6 +6160,11 @@ class Composite(TensorSpec):
         except RuntimeError:
             device = self._device
+        names = None
+        if self._has_names():
+            names = copy(self.names)
+            names.insert(dim, None)
         return self.__class__(
             {
                 key: value.unsqueeze(dim) if value is not None else None
@@ -6002,6 +6174,7 @@ class Composite(TensorSpec):
             device=device,
             data_cls=self.data_cls,
             step_mdp_static=self.step_mdp_static,
+            names=names,
         )
     def unbind(self, dim: int = 0) -> tuple[Composite, ...]:
@@ -6012,8 +6185,17 @@ class Composite(TensorSpec):
             raise ValueError(
                 f"Cannot unbind along dim {orig_dim} with shape {self.shape}."
             )
-        shape = (s for i, s in enumerate(self.shape) if i != dim)
+        shape = tuple(s for i, s in enumerate(self.shape) if i != dim)
         unbound_vals = {key: val.unbind(dim) for key, val in self.items()}
+        names = None
+        if self._has_names():
+            names = copy(self.names)
+            names.pop(dim)
+            # If all names are None after popping, set to None
+            if all(name is None for name in names):
+                names = None
         return tuple(
             self.__class__(
                 {key: val[i] for key, val in unbound_vals.items()},
@@ -6021,6 +6203,7 @@ class Composite(TensorSpec):
                 device=self.device,
                 data_cls=self.data_cls,
                 step_mdp_static=self.step_mdp_static,
+                names=names,
             )
             for i in range(self.shape[dim])
         )

torchrl/envs/batched_envs.py CHANGED Viewed

@@ -308,7 +308,7 @@ class BatchedEnvBase(EnvBase):
         num_sub_threads: int = 1,
         serial_for_single: bool = False,
         non_blocking: bool = False,
-        mp_start_method: str = None,
+        mp_start_method: str | None = None,
         use_buffers: bool | None = None,
         consolidate: bool = True,
     ):

torchrl/envs/common.py CHANGED Viewed

@@ -2267,7 +2267,7 @@ class EnvBase(nn.Module, metaclass=_EnvPostInit):
         entry_point: Callable | None = None,
         transform: Transform | None = None,  # noqa: F821
         info_keys: list[NestedKey] | None = None,
-        backend: str = None,
+        backend: str | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,

torchrl/envs/custom/llm.py CHANGED Viewed

@@ -28,10 +28,10 @@ class LLMHashingEnv(EnvBase):
     The primary goal of this environment is to identify token chains using a hashing function.
     This allows the data to be stored in a :class:`~torchrl.data.MCTSForest` using nothing but hashes as node
     identifiers, or easily prune repeated token chains in a data structure.
-    The following figure gives an overview of this workflow:
-    .. figure:: /_static/img/rollout-llm.png
-        :alt: Data collection loop with our LLM environment.
+    .. The following figure gives an overview of this workflow:
+    .. .. figure:: /_static/img/rollout-llm.png
+    ..     :alt: Data collection loop with our LLM environment.
     Args:
         vocab_size (int): The size of the vocabulary. Can be omitted if the tokenizer is passed.

torchrl/envs/llm/envs.py CHANGED Viewed

@@ -601,10 +601,10 @@ class LLMHashingEnv(EnvBase):
     The primary goal of this environment is to identify token chains using a hashing function.
     This allows the data to be stored in a :class:`~torchrl.data.MCTSForest` using nothing but hashes as node
     identifiers, or easily prune repeated token chains in a data structure.
-    The following figure gives an overview of this workflow:
-    .. figure:: /_static/img/rollout-llm.png
-        :alt: Data collection loop with our LLM environment.
+    .. The following figure gives an overview of this workflow:
+    .. .. figure:: /_static/img/rollout-llm.png
+    ..     :alt: Data collection loop with our LLM environment.
     Args:
         vocab_size (int): The size of the vocabulary. Can be omitted if the tokenizer is passed.

torchrl/envs/transforms/transforms.py CHANGED Viewed

@@ -5423,8 +5423,8 @@ class Hash(UnaryTransform):
         """Look up the input that was given for a particular hash output.
         This feature is only available if, during initialization, either the
-        :arg:`repertoire` argument was given or both the :arg:`in_keys_inv` and
-        :arg:`out_keys_inv` arguments were given.
+        ``repertoire`` argument was given or both the ``in_keys_inv`` and
+        ``out_keys_inv`` arguments were given.
         Args:
             hash_tensor (Tensor): The hash output.

torchrl/modules/distributions/discrete.py CHANGED Viewed

@@ -622,7 +622,7 @@ class Ordinal(D.Categorical):
     not impose any notion of proximity or ordering over its support's atoms.
     The `Ordinal` distribution explicitly encodes those concepts, which is
     useful for learning discrete sampling from continuous sets. See §5 of
-    `Tang & Agrawal, 2020<https://arxiv.org/pdf/1901.10500.pdf>`_ for details.
+    `Tang & Agrawal, 2020 <https://arxiv.org/pdf/1901.10500.pdf>`_ for details.
     .. note::
         This class is mostly useful when you want to learn a distribution over

torchrl/modules/llm/backends/vllm/vllm_async.py CHANGED Viewed

@@ -526,7 +526,7 @@ class AsyncVLLM(RLvLLMEngine):
                 See `this issue <https://github.com/vllm-project/vllm/issues/8268>`_ for more details.
     Example:
-        >>> from torchrl.modules.llm.backends.vllm_async import AsyncVLLM
+        >>> from torchrl.modules.llm import AsyncVLLM
         >>> from vllm import SamplingParams
         >>>
         >>> # Simple usage - single GPU, single replica

torchrl/modules/llm/policies/transformers_wrapper.py CHANGED Viewed

@@ -172,6 +172,7 @@ class TransformersWrapper(LLMWrapperBase):
     Input Keys:
         The input key depends on both `input_mode` and `generate`:
         - If `input_mode="history"` and `generate=True`: `input_key` (defaults to `("history", "prompt")`)
         - If `input_mode="history"` and `generate=False`: `input_key` (defaults to `("history", "full")`)
         - If `input_mode="text"` and `generate=True`: `input_key` (defaults to `("text", "prompt")`)
@@ -2460,7 +2461,7 @@ class RemoteTransformersWrapper:
         model,
         max_concurrency: int = 16,
         validate_model: bool = True,
-        actor_name: str = None,
+        actor_name: str | None = None,
         num_gpus: int = 1,
         num_cpus: int = 1,
         **kwargs,

torchrl/modules/llm/policies/vllm_wrapper.py CHANGED Viewed

@@ -194,6 +194,7 @@ class vLLMWrapper(LLMWrapperBase):
     Input Keys:
         The input key depends on both `input_mode` and `generate`:
         - If `input_mode="history"` and `generate=True`: `input_key` (defaults to `("history", "prompt")`)
         - If `input_mode="history"` and `generate=False`: `input_key` (defaults to `("history", "full")`)
         - If `input_mode="text"` and `generate=True`: `input_key` (defaults to `("text", "prompt")`)

torchrl/objectives/a2c.py CHANGED Viewed

@@ -282,12 +282,12 @@ class A2CLoss(LossModule):
         loss_critic_type: str = "smooth_l1",
         gamma: float | None = None,
         separate_losses: bool = False,
-        advantage_key: str = None,
-        value_target_key: str = None,
+        advantage_key: str | None = None,
+        value_target_key: str | None = None,
         functional: bool = True,
         actor: ProbabilisticTensorDictSequential = None,
         critic: ProbabilisticTensorDictSequential = None,
-        reduction: str = None,
+        reduction: str | None = None,
         clip_value: float | None = None,
         **kwargs,
     ):

torchrl/objectives/cql.py CHANGED Viewed

@@ -291,7 +291,7 @@ class CQLLoss(LossModule):
         num_random: int = 10,
         with_lagrange: bool = False,
         lagrange_thresh: float = 0.0,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ) -> None:
         self._out_keys = None
@@ -1100,7 +1100,7 @@ class DiscreteCQLLoss(LossModule):
         delay_value: bool = True,
         gamma: float | None = None,
         action_space=None,
-        reduction: str = None,
+        reduction: str | None = None,
     ) -> None:
         self._in_keys = None
         if reduction is None:

torchrl/objectives/crossq.py CHANGED Viewed

@@ -266,9 +266,9 @@ class CrossQLoss(LossModule):
         action_spec=None,
         fixed_alpha: bool = False,
         target_entropy: str | float = "auto",
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ) -> None:
         self._in_keys = None

torchrl/objectives/ddpg.py CHANGED Viewed

@@ -201,7 +201,7 @@ class DDPGLoss(LossModule):
         delay_value: bool = True,
         gamma: float | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
     ) -> None:
         self._in_keys = None
         if reduction is None:

torchrl/objectives/decision_transformer.py CHANGED Viewed

@@ -85,7 +85,7 @@ class OnlineDTLoss(LossModule):
         fixed_alpha: bool = False,
         target_entropy: str | float = "auto",
         samples_mc_entropy: int = 1,
-        reduction: str = None,
+        reduction: str | None = None,
     ) -> None:
         self._in_keys = None
         self._out_keys = None
@@ -296,7 +296,7 @@ class DTLoss(LossModule):
         actor_network: ProbabilisticActor,
         *,
         loss_function: str = "l2",
-        reduction: str = None,
+        reduction: str | None = None,
         device: torch.device | None = None,
     ) -> None:
         self._in_keys = None

torchrl/objectives/deprecated.py CHANGED Viewed

@@ -163,9 +163,9 @@ class REDQLoss_deprecated(LossModule):
         delay_qvalue: bool = True,
         gSDE: bool = False,
         gamma: float | None = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ):
         self._in_keys = None

torchrl/objectives/dqn.py CHANGED Viewed

@@ -179,8 +179,8 @@ class DQNLoss(LossModule):
         double_dqn: bool = False,
         gamma: float | None = None,
         action_space: str | TensorSpec = None,
-        priority_key: str = None,
-        reduction: str = None,
+        priority_key: str | None = None,
+        reduction: str | None = None,
     ) -> None:
         if reduction is None:
             reduction = "mean"
@@ -455,8 +455,8 @@ class DistributionalDQNLoss(LossModule):
         *,
         gamma: float,
         delay_value: bool = True,
-        priority_key: str = None,
-        reduction: str = None,
+        priority_key: str | None = None,
+        reduction: str | None = None,
     ):
         if reduction is None:
             reduction = "mean"

torchrl/objectives/gail.py CHANGED Viewed

@@ -78,7 +78,7 @@ class GAILLoss(LossModule):
         *,
         use_grad_penalty: bool = False,
         gp_lambda: float = 10,
-        reduction: str = None,
+        reduction: str | None = None,
     ) -> None:
         self._in_keys = None
         self._out_keys = None

torchrl/objectives/iql.py CHANGED Viewed

@@ -266,9 +266,9 @@ class IQLLoss(LossModule):
         temperature: float = 1.0,
         expectile: float = 0.5,
         gamma: float | None = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ) -> None:
         self._in_keys = None
@@ -785,9 +785,9 @@ class DiscreteIQLLoss(IQLLoss):
         temperature: float = 1.0,
         expectile: float = 0.5,
         gamma: float | None = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
     ) -> None:
         self._in_keys = None
         self._out_keys = None

torchrl/objectives/multiagent/qmixer.py CHANGED Viewed

@@ -195,7 +195,7 @@ class QMixerLoss(LossModule):
         delay_value: bool = True,
         gamma: float | None = None,
         action_space: str | TensorSpec = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
     ) -> None:
         super().__init__()
         self._in_keys = None

torchrl/objectives/redq.py CHANGED Viewed

@@ -279,9 +279,9 @@ class REDQLoss(LossModule):
         delay_qvalue: bool = True,
         gSDE: bool = False,
         gamma: float | None = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ):
         if reduction is None:

torchrl/objectives/reinforce.py CHANGED Viewed

@@ -249,13 +249,13 @@ class ReinforceLoss(LossModule):
         delay_value: bool = False,
         loss_critic_type: str = "smooth_l1",
         gamma: float | None = None,
-        advantage_key: str = None,
-        value_target_key: str = None,
+        advantage_key: str | None = None,
+        value_target_key: str | None = None,
         separate_losses: bool = False,
         functional: bool = True,
         actor: ProbabilisticTensorDictSequential = None,
         critic: ProbabilisticTensorDictSequential = None,
-        reduction: str = None,
+        reduction: str | None = None,
         clip_value: float | None = None,
     ) -> None:
         if actor is not None:

torchrl/objectives/sac.py CHANGED Viewed

@@ -325,16 +325,16 @@ class SACLoss(LossModule):
         alpha_init: float = 1.0,
         min_alpha: float | None = None,
         max_alpha: float | None = None,
-        action_spec=None,
+        action_spec: TensorSpec | None = None,
         fixed_alpha: bool = False,
         target_entropy: str | float = "auto",
         delay_actor: bool = False,
         delay_qvalue: bool = True,
         delay_value: bool = True,
         gamma: float | None = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         skip_done_states: bool = False,
         deactivate_vmap: bool = False,
     ) -> None:
@@ -1195,9 +1195,9 @@ class DiscreteSACLoss(LossModule):
         target_entropy_weight: float = 0.98,
         target_entropy: str | Number = "auto",
         delay_qvalue: bool = True,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         skip_done_states: bool = False,
         deactivate_vmap: bool = False,
     ):

torchrl/objectives/td3.py CHANGED Viewed

@@ -236,9 +236,9 @@ class TD3Loss(LossModule):
         delay_actor: bool = True,
         delay_qvalue: bool = True,
         gamma: float | None = None,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ) -> None:
         if reduction is None:

torchrl/objectives/td3_bc.py CHANGED Viewed

@@ -251,9 +251,9 @@ class TD3BCLoss(LossModule):
         loss_function: str = "smooth_l1",
         delay_actor: bool = True,
         delay_qvalue: bool = True,
-        priority_key: str = None,
+        priority_key: str | None = None,
         separate_losses: bool = False,
-        reduction: str = None,
+        reduction: str | None = None,
         deactivate_vmap: bool = False,
     ) -> None:
         if reduction is None:

torchrl/record/loggers/wandb.py CHANGED Viewed

@@ -52,9 +52,9 @@ class WandbLogger(Logger):
         self,
         exp_name: str,
         offline: bool = False,
-        save_dir: str = None,
-        id: str = None,
-        project: str = None,
+        save_dir: str | None = None,
+        id: str | None = None,
+        project: str | None = None,
         *,
         video_fps: int = 32,
         **kwargs,

{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: torchrl
-Version: 0.10.0
+Version: 0.10.1
 Summary: A modular, primitive-first, python-first PyTorch library for Reinforcement Learning
 Author-email: torchrl contributors <vmoens@fb.com>
 Maintainer-email: torchrl contributors <vmoens@fb.com>

{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ benchmarks\requirements.txt,sha256=zq-bWlShbTeSnj-Ud4NDgGngvWR_jdpnTAlMahTjr3k,7
 benchmarks\test_collectors_benchmark.py,sha256=-8MQHzK7ItL4eQRuE8V40WP3EJVXktwDtgzGN23rJy8,6912
 benchmarks\test_compressed_storage_benchmark.py,sha256=8NjVwRGKxi5CSuq9O9xLzM6MYG8R3a1afBjaXtGQjPY,6074
 benchmarks\test_envs_benchmark.py,sha256=ntXAMgLSKtasErZso_lsM_F2ae6gnuBS-Ow0tI4OPoM,3653
-benchmarks\test_llm.py,sha256=PL476UXGywrjzSbEgwnCP_35B8Y3UalOVsHgDFz2vuk,3888
+benchmarks\test_llm.py,sha256=kHONY3smLA1JbyCBbSSeX8XSiapD6SiFxNKwikMf2-I,4038
 benchmarks\test_objectives_benchmarks.py,sha256=EipTFZNKV_VcofGHTyIQiKVQv45k_u1QHrAMgncNB8I,34608
 benchmarks\test_replaybuffer_benchmark.py,sha256=3VoDtjg0h3q76em1Ptq75jh_qfeHplYR4d3e1vTFsRc,8274
 benchmarks\ecosystem\gym_env_throughput.py,sha256=_QDRDLoq0LfGHNWV_o0E9s_k-YUuvcyWR0v3-w-Sjig,14282
@@ -92,18 +92,18 @@ sota-implementations\td3\utils.py,sha256=AU18AXsfPXnsOOB7wKgo_5pe1wFobquJywnI0ay
 sota-implementations\td3_bc\td3_bc.py,sha256=GUGzbGkaR7MDbmhvMCs69Ewd-iCVjaQCEt6dxcSE8yw,5927
 sota-implementations\td3_bc\utils.py,sha256=ZMMYehJP6KZaiiwTRlJRFKC528M49QhfEb_RAxaBj-U,6951
 torchrl\_extension.py,sha256=5DqUOUHZJPLqcSvGztUUBy4rJFAzznuJCAu5c84OJo8,2981
-torchrl\_torchrl.cp39-win_amd64.pyd,sha256=XUHgcTQC6nS1UUWxbL74fvxiokiqQCExWxPH2vgFpJU,418816
-torchrl\_utils.py,sha256=4tX8pOODv1pxgs4SQtauAhxdKgm3b_eh1fmj7JffELQ,32498
-torchrl\__init__.py,sha256=S0E9MQNriM-1IpeifdlYkejum3EQ263p10ojQOII5Rk,3108
-torchrl\collectors\collectors.py,sha256=Un4UDD7XxX3K-9AhkfsqOXKV6mBWf7Rd8AE_0blPgi8,183771
+torchrl\_torchrl.cp39-win_amd64.pyd,sha256=yPDxj_5ErX7tV2rp4VD2LeYx0pY9DVj0zFEETqCDaFc,418816
+torchrl\_utils.py,sha256=2CYX3KGAendGu5VCE1r_oRr_rGFCHd6jIUa1GMASLg4,32554
+torchrl\__init__.py,sha256=KINiNwlkd5Qj3ts2XIGQpk8P10aaPERlR1iLegQgtpM,3190
+torchrl\collectors\collectors.py,sha256=BdkinVV0S9rhd_ypGalsgm-5eOFBqurR2h0jozT8SQE,183779
 torchrl\collectors\utils.py,sha256=DLHoLf9MvjAmJssFuG1GE9wDpjIuWgmbmMrnphg7mwc,11588
 torchrl\collectors\weight_update.py,sha256=S9aSLYV6MtNMk23TM9ZY5nsk72s7bUJ2wnslh7k8sSI,24504
 torchrl\collectors\__init__.py,sha256=jdZJgqPB15BBKHgd46Md1FYy6GaQjGJBIsQ2mAo8W_E,898
 torchrl\collectors\distributed\default_configs.py,sha256=Kvbn84NRz9l7mBk_681P9I4AQ4hyeNPBfzLqydiFijQ,895
-torchrl\collectors\distributed\generic.py,sha256=cRVIGv4vfMB9VudJpar-5sU9gE_39mYzwcRYGZe5ez0,46526
-torchrl\collectors\distributed\ray.py,sha256=UOgRU0KWZT8e7GbE643Iw1Kv9ZuRxd7r6TFbgn_qAOs,36631
-torchrl\collectors\distributed\rpc.py,sha256=E9S_AFzP7hY3l6vHyNCk_A4Clzqhe8u-jXyIwbNJRnM,40497
-torchrl\collectors\distributed\sync.py,sha256=jJkrpVlYAo8JAE6ec8vA1wvaI7U6NwulPX3EbcW9vOc,27938
+torchrl\collectors\distributed\generic.py,sha256=tlxFJEYeaYJXML6kUgvykwsXj5nuZGsKWUKWAnOMpYM,46521
+torchrl\collectors\distributed\ray.py,sha256=kFuamafzMlZL7uT5ALzgm8vVb5GXfcibG7CXN3vqxN8,37082
+torchrl\collectors\distributed\rpc.py,sha256=kJZUX-5bFZGTvjRnYnsXumD9eESAkZIz7Yzn2IVpYvA,40492
+torchrl\collectors\distributed\sync.py,sha256=Tg4dlYGpxrTgILXJAugH8c0ZzrIIdW3vvAxiQF1jnAA,27933
 torchrl\collectors\distributed\utils.py,sha256=eY6M-vLCSzyACHRNBx5bHcieWsZfLg7DfNKGIv0IgHI,6625
 torchrl\collectors\distributed\__init__.py,sha256=cKDWdNlwx2LoJkTwf-DKUXbq3Y-0Z1DctPYPcdgOSU0,730
 torchrl\collectors\llm\base.py,sha256=ELhduZbaELYFpgitPoXB4qPhX2bsunqJgB2pOpwjG4U,22189
@@ -120,7 +120,7 @@ torchrl\csrc\torch_utils.h,sha256=k7gTjLle9wW_TG6GrqqOYIG1MKsWqjHPcoqPDxLys5Y,73
 torchrl\csrc\utils.cpp,sha256=agOkJ0G4ytRsuGmkTXT1kBLGWOxrpkd6LHDVeZymEgA,1690
 torchrl\csrc\utils.h,sha256=bXlPW94HH4UMRDXXbPgfC25SvI_txvAkueBmSex9g7M,1132
 torchrl\data\rlhf.py,sha256=y4KwcYjtlons4czR72LGLqTjfl913EKkP_qXNXO_LC4,1040
-torchrl\data\tensor_specs.py,sha256=dFEESV_bBmCiwW1J1Fz9-8tqHIb9dMsB4uWhQHc3s8Q,260938
+torchrl\data\tensor_specs.py,sha256=NpRGa6Kz_uvkn8ZjZAguFiIA7B9r2zPeMP91G9BlKoE,268154
 torchrl\data\utils.py,sha256=krn1klWLdfdxy7afevsVecX0KGTuNUSz2D2yNCGW1Hw,12459
 torchrl\data\__init__.py,sha256=SBu_aozTi8iEWZ_EJcWUMqERDXH9i-S3O6hjMb5xjno,5166
 torchrl\data\datasets\atari_dqn.py,sha256=zet-dUhsxIbPNMqcbqksMCBSP9-ZvzIqxsviRaRTJtY,41626
@@ -145,7 +145,7 @@ torchrl\data\llm\__init__.py,sha256=-_UPiaQgHzFVLM_gfiOS0sCGVRwkxZ1_Z5B6C9ThN9o,
 torchrl\data\map\hash.py,sha256=AilOzYQ0KYhCZpVZCm63AhRuXn2P_RLB4PcbIx6qnlA,7446
 torchrl\data\map\query.py,sha256=CfAC9XJh7KpdCJNgqfJ5CUCi7BWqekgCMlNVSIF60To,8125
 torchrl\data\map\tdstorage.py,sha256=HzdR7M8Fjt9543vqdEpy0QQ8tHgKLCqWYCA6P02_2dM,15143
-torchrl\data\map\tree.py,sha256=MqktMw54JjGNJTdo_sjTR_UVQ3M4XPjhphijLcYwjKE,60788
+torchrl\data\map\tree.py,sha256=EhYin_p5qRfb1mrfD_1mkYelyNupq4upMRaRPIvPLUY,60782
 torchrl\data\map\utils.py,sha256=fEjqCzaE4Vqjb8OzUvnClmLxVMooqeFOBMs7wroYvxs,3022
 torchrl\data\map\__init__.py,sha256=bON0vqCksU7FPoWNqiNcdl60t7yWUh9SdLhNtglj7jI,576
 torchrl\data\postprocs\postprocs.py,sha256=dpXOKWlhdKy4Um7HdzRKe42PJ_Q1jHC7AX5plR9AIiw,15509
@@ -160,15 +160,15 @@ torchrl\data\replay_buffers\utils.py,sha256=RPKS5C5U2GDPJQ1zSiawEi4LIwWuazJfcgQJ
 torchrl\data\replay_buffers\writers.py,sha256=WVChK3QPVb2Ehoqn3U_c8W2FJ5nU57hqCb7XTthrUgU,29488
 torchrl\data\replay_buffers\__init__.py,sha256=RcJSEXHz6zt1gQSFhEaKwQHUhZkYa2x-buNSDkNAslE,2595
 torchrl\envs\async_envs.py,sha256=5ao4aEdETWTaTHoUru5GZOzH5uvNtGGmHxnniSk4Wdc,44125
-torchrl\envs\batched_envs.py,sha256=jiUDll2tW2jJ57FlCFzV844DfC5OzVXCDX6idr8gLDc,121381
-torchrl\envs\common.py,sha256=GxKbl5CRO7fMDDrlJW0TmEpTifr3SYNSAUgi2iGQApA,174223
+torchrl\envs\batched_envs.py,sha256=hhlNbzE0h1euxAWLgbxClTRPXySIDKKec15Sqlk3bjs,121388
+torchrl\envs\common.py,sha256=vZgFUaunYISFYoYCZ2OlgDWek4aWWq8g61FArECUTLs,174230
 torchrl\envs\env_creator.py,sha256=AAuZNNgvm_jX2_014AWWvNI5EQCmerU8ICCIAvv3PiM,10329
 torchrl\envs\gym_like.py,sha256=dky7JLsHAVnTdLimf4KAZGsPP104SFLD4fVzlmyAYh8,32381
 torchrl\envs\utils.py,sha256=YTCNO6XyCfxm9njCFafrDh1nnZ7wTPYH8uPAeCtG4mI,74861
 torchrl\envs\vec_envs.py,sha256=B3lrPCVk4jRIXy0V0berwktZInHpx_UBABTcPUlA1Lw,377
 torchrl\envs\__init__.py,sha256=2eVr8StUSMiNd-IoD5BQAFFuV10pAtO926b6QzRzB_M,6082
 torchrl\envs\custom\chess.py,sha256=IiudT29mY6Yvau6CIXB5678d8kvkCoBvLh9FoQeSiM0,25285
-torchrl\envs\custom\llm.py,sha256=i_xxGNbdTUXCYJPY3tj6JjbaARFzdP9hqB3zzJjhywU,8860
+torchrl\envs\custom\llm.py,sha256=FnZltPFKQUOH49qlvABk9dth2d2ZRkO_tiA0kiu6LKs,8869
 torchrl\envs\custom\pendulum.py,sha256=8sBgT8DvHrg5-YSOduC8GAHM9v7SXXfd8BzHvQ3wHOU,18617
 torchrl\envs\custom\san_moves.txt,sha256=AMStL2XCAnEbO6UZEYfDSCp5zRO2811gPEIzOWbmmRY,217492
 torchrl\envs\custom\tictactoeenv.py,sha256=voszQ7rPl7PbCBB6BQ8OELCi5US5YFm5gb-CLOkAIRM,12547
@@ -194,7 +194,7 @@ torchrl\envs\libs\vmas.py,sha256=giTORg2AqYzyjrazdD94fD2dNYwX7qe5TFnr-E1mjIg,371
 torchrl\envs\libs\_gym_utils.py,sha256=JYCNtWW4gAYwLq4k87ZdwLtDR_mRSyWQOi4seuXllOI,13150
 torchrl\envs\libs\__init__.py,sha256=tNiqWDxI-PQ2sxer7atuaSmKN_35pUpDIVFor-GIPfg,1821
 torchrl\envs\llm\chat.py,sha256=vCtRIhlw_8jQ55KL8x8-7N9dXxb83z68MirCD2jqt6E,32529
-torchrl\envs\llm\envs.py,sha256=ercyfYxVmIiKJq-U-Dn2ZgWxhEmDBYTDOVST1af8_sE,35801
+torchrl\envs\llm\envs.py,sha256=z73Urni2ZCTPlQvMyHaxTzkvog7TwWqBI-c-GRd5TEk,35810
 torchrl\envs\llm\__init__.py,sha256=mxvPV4WD_jViongRHTYScOHwwGPjUjKhch90jA11IuY,1504
 torchrl\envs\llm\datasets\gsm8k.py,sha256=sja-7uzYSRd2CUYI6pe3SWq3YlKWWVmLqoNSFuw5tW0,17040
 torchrl\envs\llm\datasets\ifeval.py,sha256=9uSTySm3PKDIsgX7axHI1b-Z2IZmcNzUIACAIoDXuQM,12373
@@ -231,7 +231,7 @@ torchrl\envs\transforms\llm.py,sha256=V2ZY8-QY27GCpGY5i0UrryohQclybyL7aZwU9glc7w
 torchrl\envs\transforms\r3m.py,sha256=3B-JB3GHh3s1Af69WZ3wl3BU8SP0g_QmuH8IPztXRbQ,13850
 torchrl\envs\transforms\rb_transforms.py,sha256=eoJVEOv2ckVHth7nBgRaULW4TICf7YoQHcbWn9n1Cns,7661
 torchrl\envs\transforms\rlhf.py,sha256=DlAgMrLWVFkUQ3inpFgfHDkGjJKmFwhxiRGV3FWGZK8,691
-torchrl\envs\transforms\transforms.py,sha256=zJPpUPfpyzcrcg2X0AwkF2yDtMrzc99JtzqzQmcsyYw,499497
+torchrl\envs\transforms\transforms.py,sha256=OqABOhEw36yzPc_oYEhMkMkj0PBbCIsGc9igrx3V-8Q,499488
 torchrl\envs\transforms\utils.py,sha256=V7YAV2BcJWvhC6aUV9LcwOodZFPbKmsltyR747OdRTU,3358
 torchrl\envs\transforms\vc1.py,sha256=snXdONyRKkyMiaW-bT7SwDJUQVb5GWr1mqY1W78Ohn0,10841
 torchrl\envs\transforms\vecnorm.py,sha256=udY-bdOhm-Aqjpt-STQT_mT6Ee50j5XH7v70gmyoKKk,34915
@@ -239,7 +239,7 @@ torchrl\envs\transforms\vip.py,sha256=r8Ni0hAYY1gispLj0TXV2VIedrgC4eW3hAhJBv47Q7
 torchrl\envs\transforms\__init__.py,sha256=d0p0afpcykAcBU6HENDJxtq91UEkooY69wbFgyOFIxE,3281
 torchrl\modules\__init__.py,sha256=TuJj3WUlvilYY39nUH-ykXkyprTxjq9NLW0QQXADqJk,4343
 torchrl\modules\distributions\continuous.py,sha256=Q9T8okHY625AGthEet5WOWfT0DTTSBeqWbIOB3lqPlE,26443
-torchrl\modules\distributions\discrete.py,sha256=KfZMRqX0NjTG82EiBgB-GYnf1kaIcAUqn_-D5zYUrGY,36499
+torchrl\modules\distributions\discrete.py,sha256=A4-LMggbd0RBGsM3XmaQIyp7tzncdJpUHwMUzGQE9CM,36500
 torchrl\modules\distributions\truncated_normal.py,sha256=l5G3TePasl7q12DjwisyQC_E0OfZZo2g_HzBhZREVxc,6122
 torchrl\modules\distributions\utils.py,sha256=q4AFDKFpacRhrl4rjJ54UhxQzjOcj_SKlz0UIcZlUVc,7796
 torchrl\modules\distributions\__init__.py,sha256=Evkiz96ZPs7VUZp2n03h9kd7rmUCEEvMVl2f7RhzMhQ,1670
@@ -247,13 +247,13 @@ torchrl\modules\llm\utils.py,sha256=b2s9ngHwXnNbLggygU3-ScNwk0MWICketq2pZBshGqM,
 torchrl\modules\llm\__init__.py,sha256=_jPEt6oFb_R75zcVWlrfl4OMVxEs3Y9zCjVZp8dgg38,1098
 torchrl\modules\llm\backends\__init__.py,sha256=O7RanoHTBR4xLQLUJ2JUG4-zlVZ7PJDx1d2_AezVmaU,1027
 torchrl\modules\llm\backends\vllm\base.py,sha256=KZs36Q0sNveEkHJrub6xD_SzZafAdWz5ZK5ssHphMHM,2149
-torchrl\modules\llm\backends\vllm\vllm_async.py,sha256=VS_j-WmGJvW5G-nivGkYskEjzdm0V4j9RDqjKIMFM2E,80786
+torchrl\modules\llm\backends\vllm\vllm_async.py,sha256=rj9vyftLJVFjIQqLEFvHw3YhA_jzpYs0C9hQl1LImVs,80766
 torchrl\modules\llm\backends\vllm\vllm_sync.py,sha256=K7P_da0XqAEO9lzniN3gso8IKWgP_S4ueyGncZtMEXU,15958
 torchrl\modules\llm\backends\vllm\vllm_utils.py,sha256=qRwirmMfTvqwr34-AtwcpjMp-InhDj7Iz2ZnEbkTcxE,4320
 torchrl\modules\llm\backends\vllm\__init__.py,sha256=LPqxC7ijHq6kjcA1a0kqGZU_JK_r8oT8BetsU4KKsWY,1816
 torchrl\modules\llm\policies\common.py,sha256=dhw_Q64Nk3WrfLqjfLlLVY5BMSCp1pbjUJT8MHuoEjQ,58104
-torchrl\modules\llm\policies\transformers_wrapper.py,sha256=-GcgzV_WGaPv3ZC8BNSSJOGFtXXjLn2FCbq45oG7dPc,114574
-torchrl\modules\llm\policies\vllm_wrapper.py,sha256=5mIzwW-tb_L-oqLz8Coqb4KbTRo1Rws3pbcugrGEpW8,95597
+torchrl\modules\llm\policies\transformers_wrapper.py,sha256=HN2qsnFXdCoLHkJsuMp2Kp3wwMN9hEpWMebDHkJ-8xg,114583
+torchrl\modules\llm\policies\vllm_wrapper.py,sha256=uMbis1Coqj7oke2fTwtQZoPnbUgRT-CPrcM4SQWrwlA,95599
 torchrl\modules\llm\policies\__init__.py,sha256=4rOIFNkYAZXMB5WIAomMV0tTZVP_J9mA4uatDpySbk8,628
 torchrl\modules\models\batchrenorm.py,sha256=bR4ZhaJ5E1cSK5o8L2dNX5KVLIb-bgrYxcq6yhx0I1A,4869
 torchrl\modules\models\decision_transformer.py,sha256=ANFTOm3k9_3Uv1vKGdXumRy3meBPnDdT8HqhVvJ2RCo,6783
@@ -281,30 +281,30 @@ torchrl\modules\tensordict_module\__init__.py,sha256=iTz8iCBmxt661GrGJRBfw4tBoTu
 torchrl\modules\utils\mappings.py,sha256=HEPGNHhQrPNU85-Bq0cYm1TZIhSkdEBkLvgrmjFMa4Q,371
 torchrl\modules\utils\utils.py,sha256=Ae2bl6GDxm9kU73WeLi-0ZEsrFt-XTaGqdxdXiX9LSU,3005
 torchrl\modules\utils\__init__.py,sha256=NQ_ko0JAIPY_X5RgBJnZLZXnYSH2q_kuD0tvXGqqY3k,1165
-torchrl\objectives\a2c.py,sha256=1rtRVZZx_HZFdfF6xLM1Q0kYknQg06rW5tC54CNb7Qk,29449
+torchrl\objectives\a2c.py,sha256=Pc-xmumKqLw0ovMzLZ5F0BkovUB-LsUEsGvF2WVwTI4,29470
 torchrl\objectives\common.py,sha256=nslOhX1hi0nYvS8v7ylt5qB40fkQ7k-_XvO2p99ppTM,29274
-torchrl\objectives\cql.py,sha256=iu_wUH0MXPKFGYYV9AncQ81xzt42U2TR54f2WPl3fgE,56252
-torchrl\objectives\crossq.py,sha256=fqRlrFjRgSrb5JR4IP9fKb2-3snWQ7NtMprt21IqxTo,29557
-torchrl\objectives\ddpg.py,sha256=ceQW0cg6_5xxv1JVX_AS60E2seV7SHn6nUgYrqoRh9w,18214
-torchrl\objectives\decision_transformer.py,sha256=-U--UUC3UfbZi9MfOCugmjD1_xdohNoyACJmDkKAPgo,13405
-torchrl\objectives\deprecated.py,sha256=REfAaUeWcZ_b8NpVHBGa1U9NTO5zRZBc5vNQDdS_fGA,21253
-torchrl\objectives\dqn.py,sha256=qviNEbxiNQRKVDj-V4ibzeu0JbZvKs3MkEipNbc9UDM,28803
+torchrl\objectives\cql.py,sha256=a6Fltgf05z_bP3YaXIydYtD4VZNSVhbHgMSxZsynHAg,56266
+torchrl\objectives\crossq.py,sha256=KLG7DGZApOZWAktYYV-HLRh2C070zLAjcUh24xPqkbI,29571
+torchrl\objectives\ddpg.py,sha256=KxZDlsNehydFh7-4oVVouGK6Dz8CO19uEpgHayd53mM,18221
+torchrl\objectives\decision_transformer.py,sha256=vX-Gr8bXSQwq-gyPtFQWcrL_SO5ELuQfmBZm1uWzNzc,13419
+torchrl\objectives\deprecated.py,sha256=jm77VQqK68nZziTlL14VZ3FHIAPVcUv1DUMGW33diBo,21267
+torchrl\objectives\dqn.py,sha256=XhtPvhtNty1Gka9swHIJIF7HOM3huZqk0XTJ8RXVJcw,28831
 torchrl\objectives\dreamer.py,sha256=65EntKqou3auLMYxD1uaKGNyucfktabqaATNT1bExQc,18497
 torchrl\objectives\functional.py,sha256=0Pr_debAMM2bp06HPGVIpLTcyBue4DvcyUJVsaa6AjE,2154
-torchrl\objectives\gail.py,sha256=6UQHluezDA3fT7clTDwCT41xAcZMyHrxK4XR0juSxOc,9848
-torchrl\objectives\iql.py,sha256=VgCjfjKu91WCMCOK78vuc4k4kg1G3hPgOmjiKpozRM0,43976
+torchrl\objectives\gail.py,sha256=MCJ-TE_asCp-NTfSgrqkUx9DWrR1GXth7VqrH46lndA,9855
+torchrl\objectives\iql.py,sha256=CwymtcSV3RDktRVCbsQihST5PFik382-5J5qnRA3U8E,44004
 torchrl\objectives\ppo.py,sha256=l2fGbQ45Zd0mwSijLtHYk3rvaoOR451LPXwcVq1L7Zw,82038
-torchrl\objectives\redq.py,sha256=qRN5WyA6YHh7GcKX9n5GinXyETssAXJkiH0HuOx6Uss,29177
-torchrl\objectives\reinforce.py,sha256=EnUjqDSiTla3CuHg9rspQlvecd-VXZrPZxg4rGECZ8w,22861
-torchrl\objectives\sac.py,sha256=wKpfdm2y8Udp100PVp4bC0ljkdPwPQQNlYg5ZqvVO1M,70861
-torchrl\objectives\td3.py,sha256=Rq2q5gXo3AMuHm2OjRZvpfvKsAl1lIK5ALh2_sZM1ZE,23743
-torchrl\objectives\td3_bc.py,sha256=1pjB8mjCT2CLvQzjnqwAfZoc7yhjMB9UQjuJ5wZfTUY,26558
+torchrl\objectives\redq.py,sha256=tLGi5wh8gErf0Ds725n_mwd3iSvpCXSh2w0WFJoqQvY,29191
+torchrl\objectives\reinforce.py,sha256=Fzu-7VBiepxZT_bXX17yc4fyPl7BFjqQerCokVR29E8,22882
+torchrl\objectives\sac.py,sha256=Djj2yGhqu1cglIVoOvq232Q9fkoNq5D1P7z9Flis9GI,70910
+torchrl\objectives\td3.py,sha256=vixzm2sITvLVrojCnIYGopyimvcL8o_dML4nc93-WJY,23757
+torchrl\objectives\td3_bc.py,sha256=oKZm3BRHsg-DK5fzFw1DC6fX8hrzbh8tmCOGvw5a62Q,26572
 torchrl\objectives\utils.py,sha256=CfEk41IWgVpzgZ7jq7rWS7FZbh4ymYsv92Td2rCWFRE,25990
 torchrl\objectives\__init__.py,sha256=Ug1FX1kFbTSz_i51uaDw7pOBIXSUIbH7BE5_8PZNbHM,2245
 torchrl\objectives\llm\grpo.py,sha256=kXcHSA_uPAEqvqRjjLTrzyupqMG9yuiRh_G0AxMAzaQ,25307
 torchrl\objectives\llm\sft.py,sha256=U1jtwZfDYLaU1YzA6edGhRo4t00dHpWbVKQLyyF2f1g,21352
 torchrl\objectives\llm\__init__.py,sha256=tZmIz3rkeclw3MzJoOWEs2gkewjx2USKrKJbWdyiiaQ,406
-torchrl\objectives\multiagent\qmixer.py,sha256=yttOxc5FNylKw4iMnYSG1qO8EbHvx8imAhxNxW9_iLw,17362
+torchrl\objectives\multiagent\qmixer.py,sha256=MQST8UvktQLG9Z1b12Fj0RdxloWKj87paxL0DWwSucc,17369
 torchrl\objectives\multiagent\__init__.py,sha256=5uebDe5KrvlzeYV_BSd5vdmfruJQYMeDVVbU4iHErEg,245
 torchrl\objectives\value\advantages.py,sha256=Nz0IANqvV7uAMzghxA6Ta1DdkEfLo2w2Z21wd2w1duo,86865
 torchrl\objectives\value\functional.py,sha256=bgZiXJKuOmqlKdtTzWXvbgab4yT01xik-cTW2YQkTNU,51091
@@ -317,7 +317,7 @@ torchrl\record\loggers\csv.py,sha256=uNFjiPLq7mMr5z2WPyjyr9HGexu4ZkUwbX09FsV1mJ4
 torchrl\record\loggers\mlflow.py,sha256=9N-a5OUJJGwYej0WvTxQPkrazsahhmgof8seMDCnjM0,5098
 torchrl\record\loggers\tensorboard.py,sha256=x1Mo7KE4-iGG5NVToAP-1XceG_F6Vipr26B_1CK9Tg0,5005
 torchrl\record\loggers\utils.py,sha256=rZqyZi-ebLozHh8pbV-7m23W27zXsbnR04Rh-yWMPV8,2432
-torchrl\record\loggers\wandb.py,sha256=OJSDhMuT0PjfPHJdpthPZt9mJwqcf0lLQQDWdmjvENc,7277
+torchrl\record\loggers\wandb.py,sha256=tkedfdd4RqTPFfe6xnyIhPm8T5a4Lzi9wW0ww1PY4iE,7298
 torchrl\record\loggers\__init__.py,sha256=pa6ttxj0FORHS6MgiYg05iFoABwJ8vqBHn45wkqshT4,568
 torchrl\trainers\trainers.py,sha256=_1SvHfNGwUd8w_YeRJBpJPm4Bvrv-5u7HsyKk5V3FCA,66905
 torchrl\trainers\__init__.py,sha256=LEUdW1zV5jydpMjZqnJ7XZW77MBIVv8dZ1nGv-m89RQ,853
@@ -344,8 +344,9 @@ torchrl\trainers\helpers\models.py,sha256=VujBq9H92sEzpCtU1iTrJQNlwvyOO-Rho4bzsM
 torchrl\trainers\helpers\replay_buffer.py,sha256=RaZqXnHimmadiibvDBcLbtIhpPaVMTPhYMOBvX4v3CA,2060
 torchrl\trainers\helpers\trainers.py,sha256=VVhAXHcutHyVa7kJEo_RtaI9U5h0Hk2qLEnONXFpPQ8,12350
 torchrl\trainers\helpers\__init__.py,sha256=sCBIXQqFQKRrbcNojgPxIh82HpXnXKgA_kMa3uZESSk,1137
-torchrl-0.10.0.dist-info\LICENSE,sha256=PGO-oZsq4EzhE1-WQS2xGiEF3UCVb9YawfQ09cIMV_8,1119
-torchrl-0.10.0.dist-info\METADATA,sha256=VTYNsSJuSqtVhuiw1P4bZbaSm1UKjh9NR7OgMabJvYM,50106
-torchrl-0.10.0.dist-info\RECORD,,
-torchrl-0.10.0.dist-info\top_level.txt,sha256=-5FcSdmJ9DwdHF8aOIaofsPbz4Gm8G1eo7r7Sc2CHgE,59
-torchrl-0.10.0.dist-info\WHEEL,sha256=DmZ7B4aiAganfWOCUyjXG_z2uvUu-tkD3rVXMivgyOM,99
+torchrl-0.10.1.dist-info\entry_points.txt,sha256=kjqZUboF3jzU21uy15NPn2WDfbwGE21Ls0fmfhqhmy4,110
+torchrl-0.10.1.dist-info\LICENSE,sha256=PGO-oZsq4EzhE1-WQS2xGiEF3UCVb9YawfQ09cIMV_8,1119
+torchrl-0.10.1.dist-info\METADATA,sha256=S5rAbED_7qvdSpMbLin0amLt_NkQ1aFqx93zdIPNz2g,50106
+torchrl-0.10.1.dist-info\RECORD,,
+torchrl-0.10.1.dist-info\top_level.txt,sha256=-5FcSdmJ9DwdHF8aOIaofsPbz4Gm8G1eo7r7Sc2CHgE,59
+torchrl-0.10.1.dist-info\WHEEL,sha256=DmZ7B4aiAganfWOCUyjXG_z2uvUu-tkD3rVXMivgyOM,99

torchrl-0.10.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [vllm.general_plugins]
2	+ fp32_overrides = torchrl.modules.llm.backends.vllm.vllm_plugin:register_fp32_overrides

{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl-0.10.0.dist-info → torchrl-0.10.1.dist-info}/top_level.txt RENAMED Viewed

File without changes