PyPI - torchrl-nightly - Versions diffs - 2025.4.20__cp311-cp311-win_amd64.whl → 2025.4.22__cp311-cp311-win_amd64.whl - Mend

torchrl-nightly 2025.4.20__cp311-cp311-win_amd64.whl → 2025.4.22__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

torchrl/_torchrl.cp311-win_amd64.pyd CHANGED Viewed

Binary file

torchrl/envs/transforms/llm.py CHANGED Viewed

@@ -765,7 +765,7 @@ class KLRewardTransform(Transform):
         kl = curr_log_prob - log_prob
         if reward is None:
             reward = 0
-        next_tensordict.set(self.out_keys[0], reward + self.coef * kl)
+        next_tensordict.set(self.out_keys[0], reward - self.coef * kl)
         return next_tensordict
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:

torchrl/modules/tensordict_module/exploration.py CHANGED Viewed

@@ -159,23 +159,29 @@ class EGreedyModule(TensorDictModuleBase):
                 action_tensordict = tensordict
                 action_key = self.action_key
-            out = action_tensordict.get(action_key)
+            action = action_tensordict.get(action_key)
             eps = self.eps
-            cond = torch.rand(action_tensordict.shape, device=out.device) < eps
+            device = eps.device
+            action_device = action.device
+            if action_device is not None and action_device != device:
+                raise RuntimeError(
+                    f"Expected action and e-greedy module to be on the same device, but got {action.device=} and e-greedy device={device}."
+                )
+            cond = torch.rand(action_tensordict.shape, device=device) < eps
             # cond = torch.zeros(action_tensordict.shape, device=out.device, dtype=torch.bool).bernoulli_(eps)
-            cond = expand_as_right(cond, out)
+            cond = expand_as_right(cond, action)
             spec = self.spec
             if spec is not None:
                 if isinstance(spec, Composite):
                     spec = spec[self.action_key]
-                if spec.shape != out.shape:
+                if spec.shape != action.shape:
                     # In batched envs if the spec is passed unbatched, the rand() will not
                     # cover all batched dims
                     if (
                         not len(spec.shape)
-                        or out.shape[-len(spec.shape) :] == spec.shape
+                        or action.shape[-len(spec.shape) :] == spec.shape
                     ):
-                        spec = spec.expand(out.shape)
+                        spec = spec.expand(action.shape)
                     else:
                         raise ValueError(
                             "Action spec shape does not match the action shape"
@@ -188,12 +194,12 @@ class EGreedyModule(TensorDictModuleBase):
                         )
                     spec.update_mask(action_mask)
                 r = spec.rand()
-                if r.device != out.device:
-                    r = r.to(out.device)
-                out = torch.where(cond, r, out)
+                if r.device != device:
+                    r = r.to(device)
+                action = torch.where(cond, r, action)
             else:
                 raise RuntimeError("spec must be provided to the exploration wrapper.")
-            action_tensordict.set(action_key, out)
+            action_tensordict.set(action_key, action)
         return tensordict

torchrl/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = '2025.4.20'
-git_version = '3a9f244dea03f117c9e97b49a82c23894ebb3e34'
+__version__ = '2025.4.22'
+git_version = '382430db3c457312366fce4ea42330a656337419'

{torchrl_nightly-2025.4.20.dist-info → torchrl_nightly-2025.4.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: torchrl-nightly
-Version: 2025.4.20
+Version: 2025.4.22
 Home-page: https://github.com/pytorch/rl
 Author: torchrl contributors
 Author-email: vmoens@fb.com

{torchrl_nightly-2025.4.20.dist-info → torchrl_nightly-2025.4.22.dist-info}/RECORD RENAMED Viewed

@@ -3,9 +3,9 @@ build_tools/setup_helpers/__init__.py,sha256=l9zlK7Nm5bT7P_onQx-hZeIGzKKyCFm1PFk
 build_tools/setup_helpers/extension.py,sha256=ihV8jz8kqOvpqzuD006XqF1oNX5ukKGlwIOJRb1Vd-o,6075
 torchrl/__init__.py,sha256=jm6WKJgsKzm6zSTABU4LfHfwSxL47cdIHOpvAsSJLa0,2976
 torchrl/_extension.py,sha256=x6Nqj2brF3VhlEwxmNA2fYbmpxq1HHGrHMnP0YnQwdc,2412
-torchrl/_torchrl.cp311-win_amd64.pyd,sha256=No6pS8v9VSjBSfdugbr4HanLkt1gYJFFx61XOr2araQ,454656
+torchrl/_torchrl.cp311-win_amd64.pyd,sha256=lrWSY3skCpa25hPVGF9wDlgA1Uo-w2JlFz2WQu5Cagg,454656
 torchrl/_utils.py,sha256=pCDGUvEmx-z8Ksnu5vfAHrPmqY0W7PzCqEKU7yLVxs0,38436
-torchrl/version.py,sha256=h1Q67zA6LJaBaHpY5XdPo8y4doNJUj8af5IKbAk3f2U,85
+torchrl/version.py,sha256=GQsx-gRVU7gZZCedJSS2cyF3-S8CFcBKQyk6YlNlRIE,85
 torchrl/collectors/__init__.py,sha256=A76nzBSeZT4QmLidVT8IrZ41utvdMFvSc0kOPYgiiPU,904
 torchrl/collectors/collectors.py,sha256=_3a9aHW0tXL5CZVOHaIDx5pFTpZrTQgSZ4BKh4Y6Anw,167253
 torchrl/collectors/llm.py,sha256=7SY9EYrNb8zhYZKIQtOjAOk8VMen-2zeG5wIN-45gVc,16796
@@ -95,7 +95,7 @@ torchrl/envs/model_based/dreamer.py,sha256=ZTT5wCln-YvN2ZL4dCbIEXXwskJEv3M2Mthmd
 torchrl/envs/transforms/__init__.py,sha256=j1iMhwoTS4-6wTGMeQwjDPLZcmCebLyaFpOSD3oVn8g,3374
 torchrl/envs/transforms/functional.py,sha256=U8eTymhG_88oqPHdlQhJTsNHyY0fk9ouIMzr1OSgeyM,1486
 torchrl/envs/transforms/gym_transforms.py,sha256=MS-PpSLLzdUFcvlF-yQkepwyBnmi7m9ivrDb2wSsVjM,10085
-torchrl/envs/transforms/llm.py,sha256=6hEotgHsLwELzjJ-jovFGiJQmi49IgWo5fUO8GOBbzA,38857
+torchrl/envs/transforms/llm.py,sha256=ovwDOcHekQDNu98yBiLoPFoO2MGlKTuT03SblbFgDNU,38857
 torchrl/envs/transforms/r3m.py,sha256=uC1jhCytTvYIjfiPExS9-MgSg0C7aw7r6AI0rba0bxo,13848
 torchrl/envs/transforms/rb_transforms.py,sha256=66zBM9RL_yYzIb4IKbLw9nDGtl_rHdMcOBn8fY7-1KY,7662
 torchrl/envs/transforms/rlhf.py,sha256=6PUb9SvF2cvyHUD_DmxQ4UM35VJ87qHit8PFHLWwLAk,653
@@ -131,7 +131,7 @@ torchrl/modules/planners/mppi.py,sha256=LM7wEsFBipednMcVlBls-vMW_aPHVh1cXDeGHmZm
 torchrl/modules/tensordict_module/__init__.py,sha256=iTz8iCBmxt661GrGJRBfw4tBoTuiSIQLiQxMZCszf24,2383
 torchrl/modules/tensordict_module/actors.py,sha256=bo-C4zQz2uKMQrCOST3Sgym_QDNMFhI1GHMKZF7obzQ,111160
 torchrl/modules/tensordict_module/common.py,sha256=k_Ic-HI0KUI1_kEisugJPnY7MgXKyHgmfpUofYiD1Oc,22235
-torchrl/modules/tensordict_module/exploration.py,sha256=UAf6uQaBla-ryzBDKIKdDy063sXy7vKhny1d7_FuA8E,29942
+torchrl/modules/tensordict_module/exploration.py,sha256=6OpcmmDaKIcL3ktD1bJsqfKJQQHfLnGAE7VNyuCpvM8,30298
 torchrl/modules/tensordict_module/probabilistic.py,sha256=ocx_QN7s0u89Gsn2YAzs21dz02TwJ8VwMI7Nn8X5zrM,16703
 torchrl/modules/tensordict_module/rnn.py,sha256=c2mw6BDnh93bDDx062GX3uesIfctymrJjtLWkBO7Vgs,71635
 torchrl/modules/tensordict_module/sequence.py,sha256=UMyPgl1XhgbL70orZISjM1spQ1NT1bejNpATAlODcu8,6143
@@ -184,8 +184,8 @@ torchrl/trainers/helpers/losses.py,sha256=7ANhLBKMEGhEI5erxggp8UJg8n_IRXHC1V9t6O
 torchrl/trainers/helpers/models.py,sha256=JkIr28JiVAtAago0n1bSBcWl_oxr2XJcFo5WxNugi1E,22463
 torchrl/trainers/helpers/replay_buffer.py,sha256=RaZqXnHimmadiibvDBcLbtIhpPaVMTPhYMOBvX4v3CA,2060
 torchrl/trainers/helpers/trainers.py,sha256=hB1FtHtP-S0PBQ4LF6WPy37caaLpacyaLThj1BNl5Ho,12372
-torchrl_nightly-2025.4.20.dist-info/LICENSE,sha256=PGO-oZsq4EzhE1-WQS2xGiEF3UCVb9YawfQ09cIMV_8,1119
-torchrl_nightly-2025.4.20.dist-info/METADATA,sha256=IkuffiNOSxjaL_CQ0pWS3bZ1LnOjqUtG_EQAQrYiGZ8,40854
-torchrl_nightly-2025.4.20.dist-info/WHEEL,sha256=y4n9_669c4ZQLyT56MHjc_JUbnwtaZfMVMycweN557o,102
-torchrl_nightly-2025.4.20.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
-torchrl_nightly-2025.4.20.dist-info/RECORD,,
+torchrl_nightly-2025.4.22.dist-info/LICENSE,sha256=PGO-oZsq4EzhE1-WQS2xGiEF3UCVb9YawfQ09cIMV_8,1119
+torchrl_nightly-2025.4.22.dist-info/METADATA,sha256=2pxCJVBjjNzIwPjpIgIT1eA96xiS229JZdGA3dYq4DI,40854
+torchrl_nightly-2025.4.22.dist-info/WHEEL,sha256=y4n9_669c4ZQLyT56MHjc_JUbnwtaZfMVMycweN557o,102
+torchrl_nightly-2025.4.22.dist-info/top_level.txt,sha256=JeTJ1jV7QJwLcUS1nr21aPn_wb-XlAZ9c-z_EH472JA,20
+torchrl_nightly-2025.4.22.dist-info/RECORD,,

{torchrl_nightly-2025.4.20.dist-info → torchrl_nightly-2025.4.22.dist-info}/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.4.20.dist-info → torchrl_nightly-2025.4.22.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl_nightly-2025.4.20.dist-info → torchrl_nightly-2025.4.22.dist-info}/top_level.txt RENAMED Viewed

File without changes