PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-win_amd64.whl - Mend

torchrl 0.11.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (394) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cp314-win_amd64.pyd +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/LICENSE +21 -0
torchrl-0.11.0.dist-info/METADATA +1307 -0
torchrl-0.11.0.dist-info/RECORD +394 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/envs/transforms/gym_transforms.py ADDED Viewed

@@ -0,0 +1,203 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Gym-specific transforms."""
+from __future__ import annotations
+import warnings
+import torch
+from tensordict import TensorDictBase
+from tensordict.utils import expand_as_right, NestedKey
+from torchrl.data.tensor_specs import Unbounded
+from torchrl.envs.transforms.transforms import FORWARD_NOT_IMPLEMENTED, Transform
+class EndOfLifeTransform(Transform):
+    """Registers the end-of-life signal from a Gym env with a `lives` method.
+    Proposed by DeepMind for the DQN and co. It helps value estimation.
+    Args:
+        eol_key (NestedKey, optional): the key where the end-of-life signal should
+            be written. Defaults to ``"end-of-life"``.
+        done_key (NestedKey, optional): a "done" key in the parent env done_spec,
+            where the done value can be retrieved. This key must be unique and its
+            shape must match the shape of the end-of-life entry. Defaults to ``"done"``.
+        eol_attribute (str, optional): the location of the "lives" in the gym env.
+            Defaults to ``"unwrapped.ale.lives"``. Supported attribute types are
+            integer/array-like objects or callables that return these values.
+    .. note::
+        This transform should be used with gym envs that have a ``env.unwrapped.ale.lives``.
+    Examples:
+        >>> from torchrl.envs.libs.gym import GymEnv
+        >>> from torchrl.envs.transforms.transforms import TransformedEnv
+        >>> env = GymEnv("ALE/Breakout-v5")
+        >>> env.rollout(100)
+        TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([100, 4]), device=cpu, dtype=torch.int64, is_shared=False),
+                done: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        pixels: Tensor(shape=torch.Size([100, 210, 160, 3]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    batch_size=torch.Size([100]),
+                    device=cpu,
+                    is_shared=False),
+                pixels: Tensor(shape=torch.Size([100, 210, 160, 3]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+            batch_size=torch.Size([100]),
+            device=cpu,
+            is_shared=False)
+        >>> eol_transform = EndOfLifeTransform()
+        >>> env = TransformedEnv(env, eol_transform)
+        >>> env.rollout(100)
+        TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([100, 4]), device=cpu, dtype=torch.int64, is_shared=False),
+                done: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                eol: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                lives: Tensor(shape=torch.Size([100]), device=cpu, dtype=torch.int64, is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        end-of-life: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        lives: Tensor(shape=torch.Size([100]), device=cpu, dtype=torch.int64, is_shared=False),
+                        pixels: Tensor(shape=torch.Size([100, 210, 160, 3]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    batch_size=torch.Size([100]),
+                    device=cpu,
+                    is_shared=False),
+                pixels: Tensor(shape=torch.Size([100, 210, 160, 3]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([100, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+            batch_size=torch.Size([100]),
+            device=cpu,
+            is_shared=False)
+    The typical usage of this transform is to replace the "done" state by "end-of-life"
+    within the loss module. The end-of-life signal isn't registered within the ``done_spec``
+    because it should not instruct the env to reset.
+    Examples:
+        >>> from torchrl.objectives import DQNLoss
+        >>> module = torch.nn.Identity() # used as a placeholder
+        >>> loss = DQNLoss(module, action_space="categorical")
+        >>> loss.set_keys(done="end-of-life", terminated="end-of-life")
+        >>> # equivalently
+        >>> eol_transform.register_keys(loss)
+    """
+    NO_PARENT_ERR = "The {} transform is being executed without a parent env. This is currently not supported."
+    def __init__(
+        self,
+        eol_key: NestedKey = "end-of-life",
+        lives_key: NestedKey = "lives",
+        done_key: NestedKey = "done",
+        eol_attribute="unwrapped.ale.lives",
+    ):
+        super().__init__(in_keys=[done_key], out_keys=[eol_key, lives_key])
+        self.eol_key = eol_key
+        self.lives_key = lives_key
+        self.done_key = done_key
+        self.eol_attribute = eol_attribute.split(".")
+    def _get_lives(self):
+        from torchrl.envs.libs.gym import GymWrapper
+        base_env = self.parent.base_env
+        if not isinstance(base_env, GymWrapper):
+            warnings.warn(
+                f"The base_env is not a gym env. Compatibility of {type(self)} is not guaranteed with "
+                f"environment types that do not inherit from GymWrapper.",
+                category=UserWarning,
+            )
+        # getattr falls back on _env by default
+        lives = getattr(base_env, self.eol_attribute[0])
+        for att in self.eol_attribute[1:]:
+            if isinstance(lives, list):
+                # For SerialEnv (and who knows Parallel one day)
+                lives = [getattr(_lives, att) for _lives in lives]
+            else:
+                lives = getattr(lives, att)
+        if callable(lives):
+            lives = lives()
+        elif isinstance(lives, list) and all(callable(_lives) for _lives in lives):
+            lives = torch.as_tensor([_lives() for _lives in lives])
+        return lives
+    def _call(self, next_tensordict: TensorDictBase) -> TensorDictBase:
+        return next_tensordict
+    def _step(self, tensordict, next_tensordict):
+        parent = self.parent
+        if parent is None:
+            raise RuntimeError(self.NO_PARENT_ERR.format(type(self)))
+        lives = self._get_lives()
+        end_of_life = torch.as_tensor(
+            tensordict.get(self.lives_key) > lives, device=self.parent.device
+        )
+        done = next_tensordict.get(self.done_key, None)  # TODO: None soon to be removed
+        if done is None:
+            raise KeyError(
+                f"The done value pointed by {self.done_key} cannot be found in tensordict with keys {tensordict.keys(True, True)}. "
+                f"Make sure to pass the appropriate done_key to the {type(self)} transform."
+            )
+        end_of_life = expand_as_right(end_of_life, done) | done
+        next_tensordict.set(self.eol_key, end_of_life)
+        next_tensordict.set(self.lives_key, lives)
+        return next_tensordict
+    def _reset(self, tensordict, tensordict_reset):
+        parent = self.parent
+        if parent is None:
+            raise RuntimeError(self.NO_PARENT_ERR.format(type(self)))
+        lives = self._get_lives()
+        end_of_life = False
+        tensordict_reset.set(
+            self.eol_key,
+            torch.as_tensor(end_of_life).expand(
+                parent.full_done_spec[self.done_key].shape
+            ),
+        )
+        tensordict_reset.set(self.lives_key, lives)
+        return tensordict_reset
+    def transform_observation_spec(self, observation_spec):
+        full_done_spec = self.parent.output_spec["full_done_spec"]
+        observation_spec[self.eol_key] = full_done_spec[self.done_key].clone()
+        observation_spec[self.lives_key] = Unbounded(
+            self.parent.batch_size,
+            device=self.parent.device,
+            dtype=torch.int64,
+        )
+        return observation_spec
+    def register_keys(
+        self, loss_or_advantage: torchrl.objectives.common.LossModule  # noqa
+    ):
+        """Registers the end-of-life key at appropriate places within the loss.
+        Args:
+            loss_or_advantage (torchrl.objectives.LossModule or torchrl.objectives.value.ValueEstimatorBase): a module to instruct what the end-of-life key is.
+        """
+        loss_or_advantage.set_keys(done=self.eol_key, terminated=self.eol_key)
+    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
+        raise RuntimeError(FORWARD_NOT_IMPLEMENTED.format(type(self)))

torchrl/envs/transforms/module.py ADDED Viewed

@@ -0,0 +1,341 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from collections.abc import Callable
+from contextlib import nullcontext
+from typing import overload, TYPE_CHECKING
+import torch
+from tensordict import TensorDictBase
+from tensordict.nn import TensorDictModuleBase
+from torchrl._utils import logger as torchrl_logger
+from torchrl.data.tensor_specs import TensorSpec
+from torchrl.envs.transforms.ray_service import _RayServiceMetaClass, RayTransform
+from torchrl.envs.transforms.transforms import Transform
+if TYPE_CHECKING:
+    from torchrl.weight_update import WeightSyncScheme
+__all__ = ["ModuleTransform", "RayModuleTransform"]
+class RayModuleTransform(RayTransform):
+    """Ray-based ModuleTransform for distributed processing.
+    This transform creates a Ray actor that wraps a ModuleTransform,
+    allowing module execution in a separate Ray worker process.
+    Args:
+        weight_sync_scheme: Optional weight synchronization scheme for updating
+            the module's weights from a parent collector. When provided, the scheme
+            is initialized on the receiver side (the Ray actor) and can receive
+            weight updates via torch.distributed.
+        **kwargs: Additional arguments passed to RayTransform and ModuleTransform.
+    Example:
+        >>> from torchrl.weight_update import RayModuleTransformScheme
+        >>> scheme = RayModuleTransformScheme()
+        >>> transform = RayModuleTransform(module=my_module, weight_sync_scheme=scheme)
+        >>> # The scheme can then be registered with a collector for weight updates
+    """
+    def __init__(self, *, weight_sync_scheme=None, **kwargs):
+        self._weight_sync_scheme = weight_sync_scheme
+        super().__init__(**kwargs)
+        # After actor is created, initialize the scheme on the receiver side
+        if weight_sync_scheme is not None:
+            # Store transform reference in the scheme for sender initialization
+            weight_sync_scheme._set_transform(self)
+            weight_sync_scheme.init_on_sender()
+            # Initialize receiver in the actor
+            torchrl_logger.debug(
+                "Setting up weight sync scheme on sender -- sender will do the remote call"
+            )
+            weight_sync_scheme.connect()
+    @property
+    def in_keys(self):
+        return self._ray.get(self._actor._getattr.remote("in_keys"))
+    @property
+    def out_keys(self):
+        return self._ray.get(self._actor._getattr.remote("out_keys"))
+    def _create_actor(self, **kwargs):
+        import ray
+        remote = self._ray.remote(ModuleTransform)
+        ray_kwargs = {}
+        num_gpus = self._num_gpus
+        if num_gpus is not None:
+            ray_kwargs["num_gpus"] = num_gpus
+        num_cpus = self._num_cpus
+        if num_cpus is not None:
+            ray_kwargs["num_cpus"] = num_cpus
+        actor_name = self._actor_name
+        if actor_name is not None:
+            ray_kwargs["name"] = actor_name
+        if ray_kwargs:
+            remote = remote.options(**ray_kwargs)
+        actor = remote.remote(**kwargs)
+        # wait till the actor is ready
+        ray.get(actor._ready.remote())
+        return actor
+    @overload
+    def update_weights(self, state_dict: dict[str, torch.Tensor]) -> None:
+        ...
+    @overload
+    def update_weights(self, params: TensorDictBase) -> None:
+        ...
+    def update_weights(self, *args, **kwargs) -> None:
+        import ray
+        if self._update_weights_method == "tensordict":
+            try:
+                td = kwargs.get("params", args[0])
+            except IndexError:
+                raise ValueError("params must be provided")
+            return ray.get(self._actor._update_weights_tensordict.remote(params=td))
+        elif self._update_weights_method == "state_dict":
+            try:
+                state_dict = kwargs.get("state_dict", args[0])
+            except IndexError:
+                raise ValueError("state_dict must be provided")
+            return ray.get(
+                self._actor._update_weights_state_dict.remote(state_dict=state_dict)
+            )
+        else:
+            raise ValueError(
+                f"Invalid update_weights_method: {self._update_weights_method}"
+            )
+class ModuleTransform(Transform, metaclass=_RayServiceMetaClass):
+    """A transform that wraps a module.
+    Keyword Args:
+        module (TensorDictModuleBase): The module to wrap. Exclusive with `module_factory`. At least one of `module` or `module_factory` must be provided.
+        module_factory (Callable[[], TensorDictModuleBase]): The factory to create the module. Exclusive with `module`. At least one of `module` or `module_factory` must be provided.
+        no_grad (bool, optional): Whether to use gradient computation. Default is `False`.
+        inverse (bool, optional): Whether to use the inverse of the module. Default is `False`.
+        device (torch.device, optional): The device to use. Default is `None`.
+        use_ray_service (bool, optional): Whether to use Ray service. Default is `False`.
+        num_gpus (int, optional): The number of GPUs to use if using Ray. Default is `None`.
+        num_cpus (int, optional): The number of CPUs to use if using Ray. Default is `None`.
+        actor_name (str, optional): The name of the actor to use. Default is `None`. If an actor name is provided and
+            an actor with this name already exists, the existing actor will be used.
+        observation_spec_transform (TensorSpec or Callable[[TensorSpec], TensorSpec]): either a new spec for the observation
+            after it has been transformed by the module, or a function that modifies the existing spec.
+            Defaults to `None` (observation specs remain unchanged).
+        done_spec_transform (TensorSpec or Callable[[TensorSpec], TensorSpec]): either a new spec for the done
+            after it has been transformed by the module, or a function that modifies the existing spec.
+            Defaults to `None` (done specs remain unchanged).
+        reward_spec_transform (TensorSpec or Callable[[TensorSpec], TensorSpec]): either a new spec for the reward
+            after it has been transformed by the module, or a function that modifies the existing spec.
+            Defaults to `None` (reward specs remain unchanged).
+        state_spec_transform (TensorSpec or Callable[[TensorSpec], TensorSpec]): either a new spec for the state
+            after it has been transformed by the module, or a function that modifies the existing spec.
+            Defaults to `None` (state specs remain unchanged).
+        action_spec_transform (TensorSpec or Callable[[TensorSpec], TensorSpec]): either a new spec for the action
+            after it has been transformed by the module, or a function that modifies the existing spec.
+            Defaults to `None` (action specs remain unchanged).
+    """
+    _RayServiceClass = RayModuleTransform
+    def __init__(
+        self,
+        *,
+        module: TensorDictModuleBase | None = None,
+        module_factory: Callable[[], TensorDictModuleBase] | None = None,
+        no_grad: bool = False,
+        inverse: bool = False,
+        device: torch.device | None = None,
+        use_ray_service: bool = False,  # noqa
+        actor_name: str | None = None,  # noqa
+        num_gpus: int | None = None,
+        num_cpus: int | None = None,
+        observation_spec_transform: TensorSpec
+        | Callable[[TensorSpec], TensorSpec]
+        | None = None,
+        action_spec_transform: TensorSpec
+        | Callable[[TensorSpec], TensorSpec]
+        | None = None,
+        reward_spec_transform: TensorSpec
+        | Callable[[TensorSpec], TensorSpec]
+        | None = None,
+        done_spec_transform: TensorSpec
+        | Callable[[TensorSpec], TensorSpec]
+        | None = None,
+        state_spec_transform: TensorSpec
+        | Callable[[TensorSpec], TensorSpec]
+        | None = None,
+    ):
+        super().__init__()
+        if module is None and module_factory is None:
+            raise ValueError(
+                "At least one of `module` or `module_factory` must be provided."
+            )
+        if module is not None and module_factory is not None:
+            raise ValueError(
+                "Only one of `module` or `module_factory` must be provided."
+            )
+        self.module = module if module is not None else module_factory()
+        self.no_grad = no_grad
+        self.inverse = inverse
+        self.device = device
+        self.observation_spec_transform = observation_spec_transform
+        self.action_spec_transform = action_spec_transform
+        self.reward_spec_transform = reward_spec_transform
+        self.done_spec_transform = done_spec_transform
+        self.state_spec_transform = state_spec_transform
+    @property
+    def in_keys(self) -> list[str]:
+        return self._in_keys()
+    def _in_keys(self):
+        return self.module.in_keys if not self.inverse else []
+    @in_keys.setter
+    def in_keys(self, value: list[str] | None):
+        if value is not None:
+            raise RuntimeError(f"in_keys {value} cannot be set for ModuleTransform")
+    @property
+    def out_keys(self) -> list[str]:
+        return self._out_keys()
+    def _out_keys(self):
+        return self.module.out_keys if not self.inverse else []
+    @property
+    def in_keys_inv(self) -> list[str]:
+        return self._in_keys_inv()
+    def _in_keys_inv(self):
+        return self.module.out_keys if self.inverse else []
+    @in_keys_inv.setter
+    def in_keys_inv(self, value: list[str]):
+        if value is not None:
+            raise RuntimeError(f"in_keys_inv {value} cannot be set for ModuleTransform")
+    @property
+    def out_keys_inv(self) -> list[str]:
+        return self._out_keys_inv()
+    def _out_keys_inv(self):
+        return self.module.in_keys if self.inverse else []
+    @out_keys_inv.setter
+    def out_keys_inv(self, value: list[str] | None):
+        if value is not None:
+            raise RuntimeError(
+                f"out_keys_inv {value} cannot be set for ModuleTransform"
+            )
+    @out_keys.setter
+    def out_keys(self, value: list[str] | None):
+        if value is not None:
+            raise RuntimeError(f"out_keys {value} cannot be set for ModuleTransform")
+    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
+        return self._call(tensordict)
+    def _call(self, tensordict: TensorDictBase) -> TensorDictBase:
+        if self.inverse:
+            return tensordict
+        with torch.no_grad() if self.no_grad else nullcontext():
+            with (
+                tensordict.to(self.device)
+                if self.device is not None
+                else nullcontext(tensordict)
+            ) as td:
+                return self.module(td)
+    def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase:
+        if not self.inverse:
+            return tensordict
+        with torch.no_grad() if self.no_grad else nullcontext():
+            with (
+                tensordict.to(self.device)
+                if self.device is not None
+                else nullcontext(tensordict)
+            ) as td:
+                return self.module(td)
+    def _update_weights_tensordict(self, params: TensorDictBase) -> None:
+        params.to_module(self.module)
+    def _update_weights_state_dict(self, state_dict: dict[str, torch.Tensor]) -> None:
+        self.module.load_state_dict(state_dict)
+    def _init_weight_sync_scheme(self, scheme: WeightSyncScheme, model_id: str) -> None:
+        """Initialize weight sync scheme on the receiver side (called in Ray actor).
+        This method is called by RayModuleTransform after the actor is created
+        to set up the receiver side of the weight synchronization scheme.
+        Args:
+            scheme: The weight sync scheme instance (e.g., RayModuleTransformScheme).
+            model_id: Identifier for the model being synchronized.
+        """
+        torchrl_logger.debug(f"Initializing weight sync scheme for {model_id=}")
+        scheme.init_on_receiver(model_id=model_id, context=self)
+        torchrl_logger.debug(f"Setup weight sync scheme for {model_id=}")
+        scheme.connect()
+        self._weight_sync_scheme = scheme
+    def _receive_weights_scheme(self):
+        self._weight_sync_scheme.receive()
+    def transform_observation_spec(self, observation_spec: TensorSpec) -> TensorSpec:
+        if self.observation_spec_transform is not None:
+            if isinstance(self.observation_spec_transform, TensorSpec):
+                return self.observation_spec_transform
+            else:
+                return self.observation_spec_transform(observation_spec)
+        return observation_spec
+    def transform_action_spec(self, action_spec: TensorSpec) -> TensorSpec:
+        if self.action_spec_transform is not None:
+            if isinstance(self.action_spec_transform, TensorSpec):
+                return self.action_spec_transform
+            else:
+                return self.action_spec_transform(action_spec)
+        return action_spec
+    def transform_reward_spec(self, reward_spec: TensorSpec) -> TensorSpec:
+        if self.reward_spec_transform is not None:
+            if isinstance(self.reward_spec_transform, TensorSpec):
+                return self.reward_spec_transform
+            else:
+                return self.reward_spec_transform(reward_spec)
+        return reward_spec
+    def transform_done_spec(self, done_spec: TensorSpec) -> TensorSpec:
+        if self.done_spec_transform is not None:
+            if isinstance(self.done_spec_transform, TensorSpec):
+                return self.done_spec_transform
+            else:
+                return self.done_spec_transform(done_spec)
+        return done_spec
+    def transform_state_spec(self, state_spec: TensorSpec) -> TensorSpec:
+        if self.state_spec_transform is not None:
+            if isinstance(self.state_spec_transform, TensorSpec):
+                return self.state_spec_transform
+            else:
+                return self.state_spec_transform(state_spec)
+        return state_spec