PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-win_amd64.whl - Mend

torchrl 0.11.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (394) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cp314-win_amd64.pyd +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/LICENSE +21 -0
torchrl-0.11.0.dist-info/METADATA +1307 -0
torchrl-0.11.0.dist-info/RECORD +394 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/envs/model_based/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .common import ModelBasedEnvBase
+from .dreamer import DreamerDecoder, DreamerEnv
+__all__ = ["ModelBasedEnvBase", "DreamerDecoder", "DreamerEnv"]

torchrl/envs/model_based/common.py ADDED Viewed

@@ -0,0 +1,180 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import abc
+import warnings
+import torch
+from tensordict import TensorDict
+from tensordict.nn import TensorDictModule
+from torchrl.data.utils import DEVICE_TYPING
+from torchrl.envs.common import EnvBase
+class ModelBasedEnvBase(EnvBase):
+    """Basic environment for Model Based RL sota-implementations.
+    Wrapper around the model of the MBRL algorithm.
+    It is meant to give an env framework to a world model (including but not limited to observations, reward, done state and safety constraints models).
+    and to behave as a classical environment.
+    This is a base class for other environments and it should not be used directly.
+    Example:
+        >>> import torch
+        >>> from tensordict import TensorDict
+        >>> from torchrl.data import Composite, Unbounded
+        >>> class MyMBEnv(ModelBasedEnvBase):
+        ...     def __init__(self, world_model, device="cpu", dtype=None, batch_size=None):
+        ...         super().__init__(world_model, device=device, dtype=dtype, batch_size=batch_size)
+        ...         self.observation_spec = Composite(
+        ...             hidden_observation=Unbounded((4,))
+        ...         )
+        ...         self.state_spec = Composite(
+        ...             hidden_observation=Unbounded((4,)),
+        ...         )
+        ...         self.action_spec = Unbounded((1,))
+        ...         self.reward_spec = Unbounded((1,))
+        ...
+        ...     def _reset(self, tensordict: TensorDict) -> TensorDict:
+        ...         tensordict = TensorDict(
+        ...             batch_size=self.batch_size,
+        ...             device=self.device,
+        ...         )
+        ...         tensordict = tensordict.update(self.state_spec.rand())
+        ...         tensordict = tensordict.update(self.observation_spec.rand())
+        ...         return tensordict
+        >>> # This environment is used as follows:
+        >>> import torch.nn as nn
+        >>> from torchrl.modules import MLP, WorldModelWrapper
+        >>> world_model = WorldModelWrapper(
+        ...     TensorDictModule(
+        ...         MLP(out_features=4, activation_class=nn.ReLU, activate_last_layer=True, depth=0),
+        ...         in_keys=["hidden_observation", "action"],
+        ...         out_keys=["hidden_observation"],
+        ...     ),
+        ...     TensorDictModule(
+        ...         nn.Linear(4, 1),
+        ...         in_keys=["hidden_observation"],
+        ...         out_keys=["reward"],
+        ...     ),
+        ... )
+        >>> env = MyMBEnv(world_model)
+        >>> tensordict = env.rollout(max_steps=10)
+        >>> print(tensordict)
+        TensorDict(
+            fields={
+                action: Tensor(torch.Size([10, 1]), dtype=torch.float32),
+                done: Tensor(torch.Size([10, 1]), dtype=torch.bool),
+                hidden_observation: Tensor(torch.Size([10, 4]), dtype=torch.float32),
+                next: LazyStackedTensorDict(
+                    fields={
+                        hidden_observation: Tensor(torch.Size([10, 4]), dtype=torch.float32)},
+                    batch_size=torch.Size([10]),
+                    device=cpu,
+                    is_shared=False),
+                reward: Tensor(torch.Size([10, 1]), dtype=torch.float32)},
+            batch_size=torch.Size([10]),
+            device=cpu,
+            is_shared=False)
+    Properties:
+        observation_spec (Composite): sampling spec of the observations;
+        action_spec (TensorSpec): sampling spec of the actions;
+        reward_spec (TensorSpec): sampling spec of the rewards;
+        input_spec (Composite): sampling spec of the inputs;
+        batch_size (torch.Size): batch_size to be used by the env. If not set, the env accept tensordicts of all batch sizes.
+        device (torch.device): device where the env input and output are expected to live
+    Args:
+        world_model (nn.Module): model that generates world states and its corresponding rewards;
+        params (List[torch.Tensor], optional): list of parameters of the world model;
+        buffers (List[torch.Tensor], optional): list of buffers of the world model;
+        device (torch.device, optional): device where the env input and output are expected to live
+        dtype (torch.dtype, optional): dtype of the env input and output
+        batch_size (torch.Size, optional): number of environments contained in the instance
+        run_type_check (bool, optional): whether to run type checks on the step of the env
+    Methods:
+        step (TensorDict -> TensorDict): step in the environment
+        reset (TensorDict, optional -> TensorDict): reset the environment
+        set_seed (int -> int): sets the seed of the environment
+        rand_step (TensorDict, optional -> TensorDict): random step given the action spec
+        rollout (Callable, ... -> TensorDict): executes a rollout in the environment with the given policy (or random
+            steps if no policy is provided)
+    """
+    def __init__(
+        self,
+        world_model: TensorDictModule,
+        params: list[torch.Tensor] | None = None,
+        buffers: list[torch.Tensor] | None = None,
+        device: DEVICE_TYPING = "cpu",
+        batch_size: torch.Size | None = None,
+        run_type_checks: bool = False,
+        allow_done_after_reset: bool = False,
+    ):
+        super().__init__(
+            device=device,
+            batch_size=batch_size,
+            run_type_checks=run_type_checks,
+            allow_done_after_reset=allow_done_after_reset,
+        )
+        self.world_model = world_model.to(self.device)
+        self.world_model_params = params
+        self.world_model_buffers = buffers
+    @classmethod
+    def __new__(cls, *args, **kwargs):
+        return super().__new__(
+            cls, *args, _inplace_update=False, _batch_locked=False, **kwargs
+        )
+    def set_specs_from_env(self, env: EnvBase):
+        """Sets the specs of the environment from the specs of the given environment."""
+        device = self.device
+        output_spec = env.output_spec.clone()
+        input_spec = env.input_spec.clone()
+        if device is not None:
+            output_spec = output_spec.to(device)
+            input_spec = input_spec.to(device)
+        self.__dict__["_output_spec"] = output_spec
+        self.__dict__["_input_spec"] = input_spec
+        self.empty_cache()
+    def _step(
+        self,
+        tensordict: TensorDict,
+    ) -> TensorDict:
+        # step method requires to be immutable
+        tensordict_out = tensordict.clone(recurse=False)
+        # Compute world state
+        if self.world_model_params is not None:
+            tensordict_out = self.world_model(
+                tensordict_out,
+                params=self.world_model_params,
+                buffers=self.world_model_buffers,
+            )
+        else:
+            tensordict_out = self.world_model(tensordict_out)
+        # done can be missing, it will be filled by `step`
+        # Convert to list for torch.compile compatibility (dynamo can't unpack _CompositeSpecKeysView)
+        keys_to_select = (
+            list(self.observation_spec.keys())
+            + list(self.full_done_spec.keys())
+            + list(self.full_reward_spec.keys())
+        )
+        tensordict_out = tensordict_out.select(*keys_to_select, strict=False)
+        return tensordict_out
+    @abc.abstractmethod
+    def _reset(self, tensordict: TensorDict, **kwargs) -> TensorDict:
+        raise NotImplementedError
+    def _set_seed(self, seed: int | None) -> None:
+        warnings.warn("Set seed isn't needed for model based environments")

torchrl/envs/model_based/dreamer.py ADDED Viewed

@@ -0,0 +1,112 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import torch
+from tensordict import TensorDict
+from tensordict.nn import TensorDictModule
+from torchrl.data.tensor_specs import Composite
+from torchrl.data.utils import DEVICE_TYPING
+from torchrl.envs.common import EnvBase
+from torchrl.envs.model_based import ModelBasedEnvBase
+from torchrl.envs.transforms.transforms import Transform
+class DreamerEnv(ModelBasedEnvBase):
+    """Dreamer simulation environment.
+    This environment is used for imagination rollouts in Dreamer training.
+    It never terminates (done is always False) since imagination runs for a
+    fixed horizon. The done-checking methods are overridden to avoid CUDA
+    synchronization overhead from Python control flow on CUDA tensors.
+    """
+    def __init__(
+        self,
+        world_model: TensorDictModule,
+        prior_shape: tuple[int, ...],
+        belief_shape: tuple[int, ...],
+        obs_decoder: TensorDictModule = None,
+        device: DEVICE_TYPING = "cpu",
+        batch_size: torch.Size | None = None,
+    ):
+        super().__init__(
+            world_model,
+            device=device,
+            batch_size=batch_size,
+            # Skip done validation in reset() — imagination never terminates.
+            allow_done_after_reset=True,
+        )
+        self.obs_decoder = obs_decoder
+        self.prior_shape = prior_shape
+        self.belief_shape = belief_shape
+    def any_done(self, tensordict) -> bool:
+        """Returns False — imagination rollouts never terminate.
+        Overridden to avoid CUDA sync from `done.any()` in parent class.
+        """
+        return False
+    def maybe_reset(self, tensordict):
+        """No-op — imagination rollouts don't need partial resets.
+        Overridden to avoid CUDA sync from done checks in parent class.
+        """
+        return tensordict
+    def set_specs_from_env(self, env: EnvBase):
+        """Sets the specs of the environment from the specs of the given environment."""
+        super().set_specs_from_env(env)
+        self.action_spec = self.action_spec.to(self.device)
+        self.state_spec = Composite(
+            state=self.observation_spec["state"],
+            belief=self.observation_spec["belief"],
+            shape=env.batch_size,
+        )
+    def _reset(self, tensordict=None, **kwargs) -> TensorDict:
+        batch_size = tensordict.batch_size if tensordict is not None else []
+        device = tensordict.device if tensordict is not None else self.device
+        if tensordict is None:
+            td = self.state_spec.rand(shape=batch_size)
+            # why don't we reuse actions taken at those steps?
+            td.set("action", self.action_spec.rand(shape=batch_size))
+            td[("next", "reward")] = self.reward_spec.rand(shape=batch_size)
+            td.update(self.observation_spec.rand(shape=batch_size))
+            if device is not None:
+                td = td.to(device, non_blocking=True)
+                if torch.cuda.is_available() and device.type == "cpu":
+                    torch.cuda.synchronize()
+                elif torch.backends.mps.is_available():
+                    torch.mps.synchronize()
+        else:
+            td = tensordict.clone()
+        return td
+    def decode_obs(self, tensordict: TensorDict, compute_latents=False) -> TensorDict:
+        if self.obs_decoder is None:
+            raise ValueError("No observation decoder provided")
+        if compute_latents:
+            tensordict = self.world_model(tensordict)
+        return self.obs_decoder(tensordict)
+class DreamerDecoder(Transform):
+    """A transform to record the decoded observations in Dreamer.
+    Examples:
+        >>> model_based_env = DreamerEnv(...)
+        >>> model_based_env_eval = model_based_env.append_transform(DreamerDecoder())
+    """
+    def _call(self, next_tensordict):
+        return self.parent.base_env.obs_decoder(next_tensordict)
+    def _reset(self, tensordict, tensordict_reset):
+        return self._call(tensordict_reset)
+    def transform_observation_spec(self, observation_spec):
+        return observation_spec

torchrl/envs/transforms/__init__.py ADDED Viewed

@@ -0,0 +1,147 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .gym_transforms import EndOfLifeTransform
+from .module import ModuleTransform
+from .r3m import R3MTransform
+from .ray_service import RayTransform
+from .rb_transforms import MultiStepTransform
+from .transforms import (
+    ActionDiscretizer,
+    ActionMask,
+    AutoResetEnv,
+    AutoResetTransform,
+    BatchSizeTransform,
+    BinarizeReward,
+    BurnInTransform,
+    CatFrames,
+    CatTensors,
+    CenterCrop,
+    ClipTransform,
+    Compose,
+    ConditionalPolicySwitch,
+    ConditionalSkip,
+    Crop,
+    DeviceCastTransform,
+    DiscreteActionProjection,
+    DoubleToFloat,
+    DTypeCastTransform,
+    ExcludeTransform,
+    FiniteTensorDictCheck,
+    FlattenObservation,
+    FrameSkipTransform,
+    GrayScale,
+    gSDENoise,
+    Hash,
+    InitTracker,
+    LineariseRewards,
+    MultiAction,
+    NoopResetEnv,
+    ObservationNorm,
+    ObservationTransform,
+    PermuteTransform,
+    PinMemoryTransform,
+    RandomCropTensorDict,
+    RemoveEmptySpecs,
+    RenameTransform,
+    Resize,
+    Reward2GoTransform,
+    RewardClipping,
+    RewardScaling,
+    RewardSum,
+    SelectTransform,
+    SignTransform,
+    SqueezeTransform,
+    Stack,
+    StepCounter,
+    TargetReturn,
+    TensorDictPrimer,
+    TimeMaxPool,
+    Timer,
+    Tokenizer,
+    ToTensorImage,
+    TrajCounter,
+    Transform,
+    TransformedEnv,
+    UnaryTransform,
+    UnsqueezeTransform,
+    VecGymEnvTransform,
+    VecNorm,
+)
+from .vc1 import VC1Transform
+from .vecnorm import VecNormV2
+from .vip import VIPRewardTransform, VIPTransform
+__all__ = [
+    "ActionDiscretizer",
+    "ActionMask",
+    "AutoResetEnv",
+    "AutoResetTransform",
+    "BatchSizeTransform",
+    "BinarizeReward",
+    "BurnInTransform",
+    "CatFrames",
+    "CatTensors",
+    "CenterCrop",
+    "ClipTransform",
+    "Compose",
+    "ConditionalPolicySwitch",
+    "ConditionalSkip",
+    "Crop",
+    "DTypeCastTransform",
+    "DeviceCastTransform",
+    "DiscreteActionProjection",
+    "DoubleToFloat",
+    "EndOfLifeTransform",
+    "ExcludeTransform",
+    "FiniteTensorDictCheck",
+    "FlattenObservation",
+    "FrameSkipTransform",
+    "GrayScale",
+    "Hash",
+    "InitTracker",
+    "LineariseRewards",
+    "ModuleTransform",
+    "MultiAction",
+    "MultiStepTransform",
+    "NoopResetEnv",
+    "ObservationNorm",
+    "ObservationTransform",
+    "PermuteTransform",
+    "PinMemoryTransform",
+    "R3MTransform",
+    "RandomCropTensorDict",
+    "RayTransform",
+    "RemoveEmptySpecs",
+    "RenameTransform",
+    "Resize",
+    "Reward2GoTransform",
+    "RewardClipping",
+    "RewardScaling",
+    "RewardSum",
+    "SelectTransform",
+    "SignTransform",
+    "SqueezeTransform",
+    "Stack",
+    "StepCounter",
+    "TargetReturn",
+    "TensorDictPrimer",
+    "TimeMaxPool",
+    "Timer",
+    "ToTensorImage",
+    "Tokenizer",
+    "TrajCounter",
+    "Transform",
+    "TransformedEnv",
+    "UnaryTransform",
+    "UnsqueezeTransform",
+    "VC1Transform",
+    "VIPRewardTransform",
+    "VIPTransform",
+    "VecGymEnvTransform",
+    "VecNorm",
+    "VecNormV2",
+    "gSDENoise",
+]

torchrl/envs/transforms/functional.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from torch import Tensor
+# copied from torchvision
+def _get_image_num_channels(img: Tensor) -> int:
+    if img.ndim == 2:
+        return 1
+    elif img.ndim > 2:
+        return img.shape[-3]
+    raise TypeError(f"Input ndim should be 2 or more. Got {img.ndim}")
+def _assert_channels(img: Tensor, permitted: list[int]) -> None:
+    c = _get_image_num_channels(img)
+    if c not in permitted:
+        raise TypeError(
+            f"Input image tensor permitted channel values are {permitted}, but found "
+            f"{c} (full shape: {img.shape})"
+        )
+def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
+    """Turns an RGB image into grayscale."""
+    if img.ndim < 3:
+        raise TypeError(
+            "Input image tensor should have at least 3 dimensions, but found"
+            "{}".format(img.ndim)
+        )
+    _assert_channels(img, [3])
+    if num_output_channels not in (1, 3):
+        raise ValueError("num_output_channels should be either 1 or 3")
+    r, g, b = img.unbind(dim=-3)
+    l_img = (0.2989 * r + 0.587 * g + 0.114 * b).to(img.dtype)
+    l_img = l_img.unsqueeze(dim=-3)
+    if num_output_channels == 3:
+        return l_img.expand(img.shape)
+    return l_img