PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314t-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/modules/models/models.py ADDED Viewed

@@ -0,0 +1,1712 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import dataclasses
+from collections.abc import Callable, Sequence
+from copy import deepcopy
+from numbers import Number
+import torch
+from torch import nn
+from torchrl._utils import prod
+from torchrl.data.utils import DEVICE_TYPING
+from torchrl.modules.models.decision_transformer import DecisionTransformer
+from torchrl.modules.models.utils import (
+    _find_depth,
+    create_on_device,
+    LazyMapping,
+    SquashDims,
+    Squeeze2dLayer,
+    SqueezeLayer,
+)
+from torchrl.modules.tensordict_module.common import DistributionalDQNnet  # noqa
+class MLP(nn.Sequential):
+    """A multi-layer perceptron.
+    If MLP receives more than one input, it concatenates them all along the last dimension before passing the
+    resulting tensor through the network. This is aimed at allowing for a seamless interface with calls of the type of
+        >>> model(state, action)  # compute state-action value
+    In the future, this feature may be moved to the ProbabilisticTDModule, though it would require it to handle
+    different cases (vectors, images, ...)
+    Args:
+        in_features (int, optional): number of input features;
+        out_features (int, torch.Size or equivalent): number of output
+            features. If iterable of integers, the output is reshaped to the
+            desired shape.
+        depth (int, optional): depth of the network. A depth of 0 will produce
+            a single linear layer network with the desired input and output size.
+            A length of 1 will create 2 linear layers etc. If no depth is indicated,
+            the depth information should be contained in the ``num_cells``
+            argument (see below). If ``num_cells`` is an iterable and depth is
+            indicated, both should match: ``len(num_cells)`` must be equal to
+            ``depth``.
+            Defaults to ``0`` (no depth - the network contains a single linear layer).
+        num_cells (int or sequence of int, optional): number of cells of every
+            layer in between the input and output. If an integer is provided,
+            every layer will have the same number of cells. If an iterable is provided,
+            the linear layers ``out_features`` will match the content of
+            ``num_cells``. Defaults to ``32``;
+        activation_class (Type[nn.Module] or callable, optional): activation
+            class or constructor to be used.
+            Defaults to :class:`~torch.nn.Tanh`.
+        activation_kwargs (dict or list of dicts, optional): kwargs to be used
+            with the activation class. Also accepts a list of kwargs of length
+            ``depth + int(activate_last_layer)``.
+        norm_class (Type or callable, optional): normalization class or
+            constructor, if any.
+        norm_kwargs (dict or list of dicts, optional): kwargs to be used with
+            the normalization layers. Also accepts a list of kwargs of length
+            ``depth + int(activate_last_layer)``.
+        dropout (:obj:`float`, optional): dropout probability. Defaults to ``None`` (no
+            dropout);
+        bias_last_layer (bool): if ``True``, the last Linear layer will have a bias parameter.
+            default: True;
+        single_bias_last_layer (bool): if ``True``, the last dimension of the bias of the last layer will be a singleton
+            dimension.
+            default: True;
+        layer_class (Type[nn.Module] or callable, optional): class to be used
+            for the linear layers;
+        layer_kwargs (dict or list of dicts, optional): kwargs for the linear
+            layers. Also accepts a list of kwargs of length ``depth + 1``.
+        activate_last_layer (bool): whether the MLP output should be activated. This is useful when the MLP output
+            is used as the input for another module.
+            default: False.
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> # All of the following examples provide valid, working MLPs
+        >>> mlp = MLP(in_features=3, out_features=6, depth=0) # MLP consisting of a single 3 x 6 linear layer
+        >>> print(mlp)
+        MLP(
+          (0): Linear(in_features=3, out_features=6, bias=True)
+        )
+        >>> mlp = MLP(in_features=3, out_features=6, depth=4, num_cells=32)
+        >>> print(mlp)
+        MLP(
+          (0): Linear(in_features=3, out_features=32, bias=True)
+          (1): Tanh()
+          (2): Linear(in_features=32, out_features=32, bias=True)
+          (3): Tanh()
+          (4): Linear(in_features=32, out_features=32, bias=True)
+          (5): Tanh()
+          (6): Linear(in_features=32, out_features=32, bias=True)
+          (7): Tanh()
+          (8): Linear(in_features=32, out_features=6, bias=True)
+        )
+        >>> mlp = MLP(out_features=6, depth=4, num_cells=32)  # LazyLinear for the first layer
+        >>> print(mlp)
+        MLP(
+          (0): LazyLinear(in_features=0, out_features=32, bias=True)
+          (1): Tanh()
+          (2): Linear(in_features=32, out_features=32, bias=True)
+          (3): Tanh()
+          (4): Linear(in_features=32, out_features=32, bias=True)
+          (5): Tanh()
+          (6): Linear(in_features=32, out_features=32, bias=True)
+          (7): Tanh()
+          (8): Linear(in_features=32, out_features=6, bias=True)
+        )
+        >>> mlp = MLP(out_features=6, num_cells=[32, 33, 34, 35])  # defines the depth by the num_cells arg
+        >>> print(mlp)
+        MLP(
+          (0): LazyLinear(in_features=0, out_features=32, bias=True)
+          (1): Tanh()
+          (2): Linear(in_features=32, out_features=33, bias=True)
+          (3): Tanh()
+          (4): Linear(in_features=33, out_features=34, bias=True)
+          (5): Tanh()
+          (6): Linear(in_features=34, out_features=35, bias=True)
+          (7): Tanh()
+          (8): Linear(in_features=35, out_features=6, bias=True)
+        )
+        >>> mlp = MLP(out_features=(6, 7), num_cells=[32, 33, 34, 35])  # returns a view of the output tensor with shape [*, 6, 7]
+        >>> print(mlp)
+        MLP(
+          (0): LazyLinear(in_features=0, out_features=32, bias=True)
+          (1): Tanh()
+          (2): Linear(in_features=32, out_features=33, bias=True)
+          (3): Tanh()
+          (4): Linear(in_features=33, out_features=34, bias=True)
+          (5): Tanh()
+          (6): Linear(in_features=34, out_features=35, bias=True)
+          (7): Tanh()
+          (8): Linear(in_features=35, out_features=42, bias=True)
+        )
+        >>> from torchrl.modules import NoisyLinear
+        >>> mlp = MLP(out_features=(6, 7), num_cells=[32, 33, 34, 35], layer_class=NoisyLinear)  # uses NoisyLinear layers
+        >>> print(mlp)
+        MLP(
+          (0): NoisyLazyLinear(in_features=0, out_features=32, bias=False)
+          (1): Tanh()
+          (2): NoisyLinear(in_features=32, out_features=33, bias=True)
+          (3): Tanh()
+          (4): NoisyLinear(in_features=33, out_features=34, bias=True)
+          (5): Tanh()
+          (6): NoisyLinear(in_features=34, out_features=35, bias=True)
+          (7): Tanh()
+          (8): NoisyLinear(in_features=35, out_features=42, bias=True)
+        )
+    """
+    def __init__(
+        self,
+        in_features: int | None = None,
+        out_features: int | torch.Size | None = None,
+        depth: int | None = None,
+        num_cells: Sequence[int] | int | None = None,
+        activation_class: type[nn.Module] | Callable = nn.Tanh,
+        activation_kwargs: dict | list[dict] | None = None,
+        norm_class: type[nn.Module] | Callable | None = None,
+        norm_kwargs: dict | list[dict] | None = None,
+        dropout: float | None = None,
+        bias_last_layer: bool = True,
+        single_bias_last_layer: bool = False,
+        layer_class: type[nn.Module] | Callable = nn.Linear,
+        layer_kwargs: dict | None = None,
+        activate_last_layer: bool = False,
+        device: DEVICE_TYPING | None = None,
+    ):
+        if out_features is None:
+            raise ValueError("out_features must be specified for MLP.")
+        if num_cells is None:
+            default_num_cells = 32
+            if depth is None:
+                num_cells = []
+                depth = 0
+            else:
+                num_cells = [default_num_cells] * depth
+        self.in_features = in_features
+        _out_features_num = out_features
+        if not isinstance(out_features, Number):
+            _out_features_num = prod(out_features)
+        self.out_features = out_features
+        self._reshape_out = not isinstance(
+            self.out_features, (int, torch.SymInt, Number)
+        )
+        self._out_features_num = _out_features_num
+        self.activation_class = activation_class
+        self.norm_class = norm_class
+        self.dropout = dropout
+        self.bias_last_layer = bias_last_layer
+        self.single_bias_last_layer = single_bias_last_layer
+        self.layer_class = layer_class
+        self.activation_kwargs = activation_kwargs
+        self.norm_kwargs = norm_kwargs
+        self.layer_kwargs = layer_kwargs
+        self.activate_last_layer = activate_last_layer
+        if single_bias_last_layer:
+            raise NotImplementedError
+        if not (isinstance(num_cells, Sequence) or depth is not None):
+            raise RuntimeError(
+                "If num_cells is provided as an integer, \
+            depth must be provided too."
+            )
+        self.num_cells = (
+            list(num_cells) if isinstance(num_cells, Sequence) else [num_cells] * depth
+        )
+        self.depth = depth if depth is not None else len(self.num_cells)
+        if not (len(self.num_cells) == depth or depth is None):
+            raise RuntimeError(
+                "depth and num_cells length conflict, \
+            consider matching or specifying a constant num_cells argument together with a a desired depth"
+            )
+        self._activation_kwargs_iter = _iter_maybe_over_single(
+            activation_kwargs, n=self.depth + self.activate_last_layer
+        )
+        self._norm_kwargs_iter = _iter_maybe_over_single(
+            norm_kwargs, n=self.depth + self.activate_last_layer
+        )
+        self._layer_kwargs_iter = _iter_maybe_over_single(
+            layer_kwargs, n=self.depth + 1
+        )
+        layers = self._make_net(device)
+        layers = [
+            layer if isinstance(layer, nn.Module) else _ExecutableLayer(layer)
+            for layer in layers
+        ]
+        super().__init__(*layers)
+    def _make_net(self, device: DEVICE_TYPING | None) -> list[nn.Module]:
+        layers = []
+        in_features = [self.in_features] + self.num_cells
+        out_features = self.num_cells + [self._out_features_num]
+        for i, (_in, _out) in enumerate(zip(in_features, out_features)):
+            layer_kwargs = next(self._layer_kwargs_iter)
+            _bias = layer_kwargs.pop(
+                "bias", self.bias_last_layer if i == self.depth else True
+            )
+            if _in is not None:
+                layers.append(
+                    create_on_device(
+                        self.layer_class,
+                        device,
+                        _in,
+                        _out,
+                        bias=_bias,
+                        **layer_kwargs,
+                    )
+                )
+            else:
+                try:
+                    lazy_version = LazyMapping[self.layer_class]
+                except KeyError:
+                    raise KeyError(
+                        f"The lazy version of {self.layer_class.__name__} is not implemented yet. "
+                        "Consider providing the input feature dimensions explicitly when creating an MLP module"
+                    )
+                layers.append(
+                    create_on_device(
+                        lazy_version, device, _out, bias=_bias, **layer_kwargs
+                    )
+                )
+            if i < self.depth or self.activate_last_layer:
+                norm_kwargs = next(self._norm_kwargs_iter)
+                activation_kwargs = next(self._activation_kwargs_iter)
+                if self.dropout is not None:
+                    layers.append(create_on_device(nn.Dropout, device, p=self.dropout))
+                if self.norm_class is not None:
+                    layers.append(
+                        create_on_device(self.norm_class, device, **norm_kwargs)
+                    )
+                layers.append(
+                    create_on_device(self.activation_class, device, **activation_kwargs)
+                )
+        return layers
+    def forward(self, *inputs: tuple[torch.Tensor]) -> torch.Tensor:
+        if len(inputs) > 1:
+            inputs = (torch.cat([*inputs], -1),)
+        out = super().forward(*inputs)
+        if self._reshape_out:
+            out = out.view(*out.shape[:-1], *self.out_features)
+        return out
+class ConvNet(nn.Sequential):
+    """A convolutional neural network.
+    Args:
+        in_features (int, optional): number of input features. If ``None``, a
+            :class:`~torch.nn.LazyConv2d` module is used for the first layer.;
+        depth (int, optional): depth of the network. A depth of 1 will produce
+            a single linear layer network with the desired input size, and
+            with an output size equal to the last element of the num_cells
+            argument.
+            If no depth is indicated, the depth information should be contained
+            in the ``num_cells`` argument (see below).
+            If ``num_cells`` is an iterable and ``depth`` is indicated, both
+            should match: ``len(num_cells)`` must be equal to the ``depth``.
+        num_cells (int or Sequence of int, optional): number of cells of
+            every layer in between the input and output. If an integer is
+            provided, every layer will have the same number of cells. If an
+            iterable is provided, the linear layers ``out_features`` will match
+            the content of num_cells. Defaults to ``[32, 32, 32]``.
+        kernel_sizes (int, sequence of int, optional): Kernel size(s) of the
+            conv network. If iterable, the length must match the depth,
+            defined by the ``num_cells`` or depth arguments.
+            Defaults to ``3``.
+        strides (int or sequence of int, optional): Stride(s) of the conv network. If
+            iterable, the length must match the depth, defined by the
+            ``num_cells`` or depth arguments. Defaults to ``1``.
+        activation_class (Type[nn.Module] or callable, optional): activation
+            class or constructor to be used.
+            Defaults to :class:`~torch.nn.Tanh`.
+        activation_kwargs (dict or list of dicts, optional): kwargs to be used
+            with the activation class. A list of kwargs of length ``depth``
+            can also be passed, with one element per layer.
+        norm_class (Type or callable, optional): normalization class or
+            constructor, if any.
+        norm_kwargs (dict or list of dicts, optional): kwargs to be used with
+            the normalization layers. A list of kwargs of length ``depth`` can
+            also be passed, with one element per layer.
+        bias_last_layer (bool): if ``True``, the last Linear layer will have a
+            bias parameter. Defaults to ``True``.
+        aggregator_class (Type[nn.Module] or callable): aggregator class or
+            constructor to use at the end of the chain.
+            Defaults to :class:`torchrl.modules.utils.models.SquashDims`;
+        aggregator_kwargs (dict, optional): kwargs for the
+            ``aggregator_class``.
+        squeeze_output (bool): whether the output should be squeezed of its
+            singleton dimensions.
+            Defaults to ``False``.
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> # All of the following examples provide valid, working MLPs
+        >>> cnet = ConvNet(in_features=3, depth=1, num_cells=[32,]) # MLP consisting of a single 3 x 6 linear layer
+        >>> print(cnet)
+        ConvNet(
+          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
+          (1): ELU(alpha=1.0)
+          (2): SquashDims()
+        )
+        >>> cnet = ConvNet(in_features=3, depth=4, num_cells=32)
+        >>> print(cnet)
+        ConvNet(
+          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
+          (1): ELU(alpha=1.0)
+          (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
+          (3): ELU(alpha=1.0)
+          (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
+          (5): ELU(alpha=1.0)
+          (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
+          (7): ELU(alpha=1.0)
+          (8): SquashDims()
+        )
+        >>> cnet = ConvNet(in_features=3, num_cells=[32, 33, 34, 35])  # defines the depth by the num_cells arg
+        >>> print(cnet)
+        ConvNet(
+          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
+          (1): ELU(alpha=1.0)
+          (2): Conv2d(32, 33, kernel_size=(3, 3), stride=(1, 1))
+          (3): ELU(alpha=1.0)
+          (4): Conv2d(33, 34, kernel_size=(3, 3), stride=(1, 1))
+          (5): ELU(alpha=1.0)
+          (6): Conv2d(34, 35, kernel_size=(3, 3), stride=(1, 1))
+          (7): ELU(alpha=1.0)
+          (8): SquashDims()
+        )
+        >>> cnet = ConvNet(in_features=3, num_cells=[32, 33, 34, 35], kernel_sizes=[3, 4, 5, (2, 3)])  # defines kernels, possibly rectangular
+        >>> print(cnet)
+        ConvNet(
+          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
+          (1): ELU(alpha=1.0)
+          (2): Conv2d(32, 33, kernel_size=(4, 4), stride=(1, 1))
+          (3): ELU(alpha=1.0)
+          (4): Conv2d(33, 34, kernel_size=(5, 5), stride=(1, 1))
+          (5): ELU(alpha=1.0)
+          (6): Conv2d(34, 35, kernel_size=(2, 3), stride=(1, 1))
+          (7): ELU(alpha=1.0)
+          (8): SquashDims()
+        )
+    """
+    def __init__(
+        self,
+        in_features: int | None = None,
+        depth: int | None = None,
+        num_cells: Sequence[int] | int = None,
+        kernel_sizes: Sequence[int] | int = 3,
+        strides: Sequence[int] | int = 1,
+        paddings: Sequence[int] | int = 0,
+        activation_class: type[nn.Module] | Callable = nn.ELU,
+        activation_kwargs: dict | list[dict] | None = None,
+        norm_class: type[nn.Module] | Callable | None = None,
+        norm_kwargs: dict | list[dict] | None = None,
+        bias_last_layer: bool = True,
+        aggregator_class: type[nn.Module] | Callable | None = SquashDims,
+        aggregator_kwargs: dict | None = None,
+        squeeze_output: bool = False,
+        device: DEVICE_TYPING | None = None,
+    ):
+        if num_cells is None:
+            num_cells = [32, 32, 32]
+        self.in_features = in_features
+        self.activation_class = activation_class
+        self.norm_class = norm_class
+        self.bias_last_layer = bias_last_layer
+        self.aggregator_class = aggregator_class
+        self.aggregator_kwargs = (
+            aggregator_kwargs if aggregator_kwargs is not None else {"ndims_in": 3}
+        )
+        self.squeeze_output = squeeze_output
+        # self.single_bias_last_layer = single_bias_last_layer
+        self.activation_kwargs = (
+            activation_kwargs if activation_kwargs is not None else {}
+        )
+        self.norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
+        depth = _find_depth(depth, num_cells, kernel_sizes, strides, paddings)
+        self.depth = depth
+        if depth == 0:
+            raise ValueError("Null depth is not permitted with ConvNet.")
+        for _field, _value in zip(
+            ["num_cells", "kernel_sizes", "strides", "paddings"],
+            [num_cells, kernel_sizes, strides, paddings],
+        ):
+            _depth = depth
+            setattr(
+                self,
+                _field,
+                (_value if isinstance(_value, Sequence) else [_value] * _depth),
+            )
+            if not (isinstance(_value, Sequence) or _depth is not None):
+                raise RuntimeError(
+                    f"If {_field} is provided as an integer, "
+                    "depth must be provided too."
+                )
+            if not (len(getattr(self, _field)) == _depth or _depth is None):
+                raise RuntimeError(
+                    f"depth={depth} and {_field}={len(getattr(self, _field))} length conflict, "
+                    + f"consider matching or specifying a constant {_field} argument together with a a desired depth"
+                )
+        self.out_features = self.num_cells[-1]
+        self.depth = len(self.kernel_sizes)
+        self._activation_kwargs_iter = _iter_maybe_over_single(
+            activation_kwargs, n=self.depth
+        )
+        self._norm_kwargs_iter = _iter_maybe_over_single(norm_kwargs, n=self.depth)
+        layers = self._make_net(device)
+        layers = [
+            layer if isinstance(layer, nn.Module) else _ExecutableLayer(layer)
+            for layer in layers
+        ]
+        super().__init__(*layers)
+    def _make_net(self, device: DEVICE_TYPING | None) -> nn.Module:
+        layers = []
+        in_features = [self.in_features] + list(self.num_cells[: self.depth])
+        out_features = list(self.num_cells) + [self.out_features]
+        kernel_sizes = self.kernel_sizes
+        strides = self.strides
+        paddings = self.paddings
+        for i, (_in, _out, _kernel, _stride, _padding) in enumerate(
+            zip(in_features, out_features, kernel_sizes, strides, paddings)
+        ):
+            _bias = (i < len(in_features) - 1) or self.bias_last_layer
+            if _in is not None:
+                layers.append(
+                    nn.Conv2d(
+                        _in,
+                        _out,
+                        kernel_size=_kernel,
+                        stride=_stride,
+                        bias=_bias,
+                        padding=_padding,
+                        device=device,
+                    )
+                )
+            else:
+                layers.append(
+                    nn.LazyConv2d(
+                        _out,
+                        kernel_size=_kernel,
+                        stride=_stride,
+                        bias=_bias,
+                        padding=_padding,
+                        device=device,
+                    )
+                )
+            activation_kwargs = next(self._activation_kwargs_iter)
+            layers.append(
+                create_on_device(self.activation_class, device, **activation_kwargs)
+            )
+            if self.norm_class is not None:
+                norm_kwargs = next(self._norm_kwargs_iter)
+                layers.append(create_on_device(self.norm_class, device, **norm_kwargs))
+        if self.aggregator_class is not None:
+            layers.append(
+                create_on_device(
+                    self.aggregator_class, device, **self.aggregator_kwargs
+                )
+            )
+        if self.squeeze_output:
+            layers.append(Squeeze2dLayer())
+        return layers
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        *batch, C, L, W = inputs.shape
+        if len(batch) > 1:
+            inputs = inputs.flatten(0, len(batch) - 1)
+        out = super().forward(inputs)
+        if len(batch) > 1:
+            out = out.unflatten(0, batch)
+        return out
+    @classmethod
+    def default_atari_dqn(cls, num_actions: int):
+        """Returns the default DQN as presented in the seminal DQN paper.
+        Args:
+            num_actions (int): the action space of the atari game.
+        """
+        cnn = ConvNet(
+            activation_class=torch.nn.ReLU,
+            num_cells=[32, 64, 64],
+            kernel_sizes=[8, 4, 3],
+            strides=[4, 2, 1],
+        )
+        mlp = MLP(
+            activation_class=torch.nn.ReLU,
+            out_features=num_actions,
+            num_cells=[512],
+        )
+        return nn.Sequential(cnn, mlp)
+Conv2dNet = ConvNet
+class Conv3dNet(nn.Sequential):
+    """A 3D-convolutional neural network.
+    Args:
+        in_features (int, optional): number of input features. A lazy
+            implementation that automatically retrieves the input size will be
+            used if none is provided.
+        depth (int, optional): depth of the network. A depth of ``1`` will
+            produce a single linear layer network with the desired input size,
+            and with an output size equal to the last element of the
+            ``num_cells`` argument. If no ``depth`` is indicated, the ``depth``
+            information should be contained in the ``num_cells`` argument
+            (see below).
+            If ``num_cells`` is an iterable and ``depth`` is indicated,
+            both should match: ``len(num_cells)`` must be equal to
+            the ``depth``.
+        num_cells (int or sequence of int, optional): number of cells of every
+            layer in between the input and output. If an integer is provided,
+            every layer will have the same number of cells and the depth will
+            be retrieved from ``depth``. If an iterable is
+            provided, the linear layers ``out_features`` will match the content
+            of num_cells. Defaults to ``[32, 32, 32]`` or ``[32] * depth` is
+            depth is not ``None``.
+        kernel_sizes (int, sequence of int, optional): Kernel size(s) of the
+            conv network. If iterable, the length must match the depth,
+            defined by the ``num_cells`` or depth arguments. Defaults to ``3``.
+        strides (int or sequence of int): Stride(s) of the conv network.
+            If iterable, the length must match the depth, defined by the
+            ``num_cells`` or depth arguments. Defaults to ``1``.
+        activation_class (Type[nn.Module] or callable): activation class or
+            constructor to be used. Defaults to :class:`~torch.nn.Tanh`.
+        activation_kwargs (dict or list of dicts, optional): kwargs to be used
+            with the activation class. A list of kwargs of length ``depth``
+            with one element per layer can also be provided.
+        norm_class (Type or callable, optional): normalization class, if any.
+        norm_kwargs (dict or list of dicts, optional): kwargs to be used with
+            the normalization layers. A list of kwargs of length ``depth``
+            with one element per layer can also be provided.
+        bias_last_layer (bool): if ``True``, the last Linear layer will have a
+            bias parameter. Defaults to ``True``.
+        aggregator_class (Type[nn.Module] or callable): aggregator class or
+            constructor to use at the end of the chain. Defaults to
+            :class:`~torchrl.modules.models.utils.SquashDims`.
+        aggregator_kwargs (dict, optional): kwargs for the ``aggregator_class``
+            constructor.
+        squeeze_output (bool): whether the output should be squeezed of its
+            singleton dimensions. Defaults to ``False``.
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> # All of the following examples provide valid, working MLPs
+        >>> cnet = Conv3dNet(in_features=3, depth=1, num_cells=[32,])
+        >>> print(cnet)
+        Conv3dNet(
+            (0): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (1): ELU(alpha=1.0)
+            (2): SquashDims()
+        )
+        >>> cnet = Conv3dNet(in_features=3, depth=4, num_cells=32)
+        >>> print(cnet)
+        Conv3dNet(
+            (0): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (1): ELU(alpha=1.0)
+            (2): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (3): ELU(alpha=1.0)
+            (4): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (5): ELU(alpha=1.0)
+            (6): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (7): ELU(alpha=1.0)
+            (8): SquashDims()
+        )
+        >>> cnet = Conv3dNet(in_features=3, num_cells=[32, 33, 34, 35])  # defines the depth by the num_cells arg
+        >>> print(cnet)
+        Conv3dNet(
+            (0): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (1): ELU(alpha=1.0)
+            (2): Conv3d(32, 33, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (3): ELU(alpha=1.0)
+            (4): Conv3d(33, 34, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (5): ELU(alpha=1.0)
+            (6): Conv3d(34, 35, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (7): ELU(alpha=1.0)
+            (8): SquashDims()
+        )
+        >>> cnet = Conv3dNet(in_features=3, num_cells=[32, 33, 34, 35], kernel_sizes=[3, 4, 5, (2, 3, 4)])  # defines kernels, possibly rectangular
+        >>> print(cnet)
+        Conv3dNet(
+            (0): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1))
+            (1): ELU(alpha=1.0)
+            (2): Conv3d(32, 33, kernel_size=(4, 4, 4), stride=(1, 1, 1))
+            (3): ELU(alpha=1.0)
+            (4): Conv3d(33, 34, kernel_size=(5, 5, 5), stride=(1, 1, 1))
+            (5): ELU(alpha=1.0)
+            (6): Conv3d(34, 35, kernel_size=(2, 3, 4), stride=(1, 1, 1))
+            (7): ELU(alpha=1.0)
+            (8): SquashDims()
+        )
+    """
+    def __init__(
+        self,
+        in_features: int | None = None,
+        depth: int | None = None,
+        num_cells: Sequence[int] | int = None,
+        kernel_sizes: Sequence[int] | int = 3,
+        strides: Sequence[int] | int = 1,
+        paddings: Sequence[int] | int = 0,
+        activation_class: type[nn.Module] | Callable = nn.ELU,
+        activation_kwargs: dict | list[dict] | None = None,
+        norm_class: type[nn.Module] | Callable | None = None,
+        norm_kwargs: dict | list[dict] | None = None,
+        bias_last_layer: bool = True,
+        aggregator_class: type[nn.Module] | Callable | None = SquashDims,
+        aggregator_kwargs: dict | None = None,
+        squeeze_output: bool = False,
+        device: DEVICE_TYPING | None = None,
+    ):
+        if num_cells is None:
+            if depth is None:
+                num_cells = [32, 32, 32]
+            else:
+                num_cells = [32] * depth
+        self.in_features = in_features
+        self.activation_class = activation_class
+        self.norm_class = norm_class
+        self.activation_kwargs = (
+            activation_kwargs if activation_kwargs is not None else {}
+        )
+        self.norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
+        self.bias_last_layer = bias_last_layer
+        self.aggregator_class = aggregator_class
+        self.aggregator_kwargs = (
+            aggregator_kwargs if aggregator_kwargs is not None else {"ndims_in": 4}
+        )
+        self.squeeze_output = squeeze_output
+        # self.single_bias_last_layer = single_bias_last_layer
+        depth = _find_depth(depth, num_cells, kernel_sizes, strides, paddings)
+        self.depth = depth
+        if depth == 0:
+            raise ValueError("Null depth is not permitted with Conv3dNet.")
+        for _field, _value in zip(
+            ["num_cells", "kernel_sizes", "strides", "paddings"],
+            [num_cells, kernel_sizes, strides, paddings],
+        ):
+            _depth = depth
+            setattr(
+                self,
+                _field,
+                (_value if isinstance(_value, Sequence) else [_value] * _depth),
+            )
+            if not (len(getattr(self, _field)) == _depth or _depth is None):
+                raise ValueError(
+                    f"depth={depth} and {_field}={len(getattr(self, _field))} length conflict, "
+                    + f"consider matching or specifying a constant {_field} argument together with a a desired depth"
+                )
+        self.out_features = self.num_cells[-1]
+        self.depth = len(self.kernel_sizes)
+        self._activation_kwargs_iter = _iter_maybe_over_single(
+            activation_kwargs, n=self.depth
+        )
+        self._norm_kwargs_iter = _iter_maybe_over_single(norm_kwargs, n=self.depth)
+        layers = self._make_net(device)
+        layers = [
+            layer if isinstance(layer, nn.Module) else _ExecutableLayer(layer)
+            for layer in layers
+        ]
+        super().__init__(*layers)
+    def _make_net(self, device: DEVICE_TYPING | None) -> nn.Module:
+        layers = []
+        in_features = [self.in_features] + self.num_cells[: self.depth]
+        out_features = self.num_cells + [self.out_features]
+        kernel_sizes = self.kernel_sizes
+        strides = self.strides
+        paddings = self.paddings
+        for i, (_in, _out, _kernel, _stride, _padding) in enumerate(
+            zip(in_features, out_features, kernel_sizes, strides, paddings)
+        ):
+            _bias = (i < len(in_features) - 1) or self.bias_last_layer
+            if _in is not None:
+                layers.append(
+                    nn.Conv3d(
+                        _in,
+                        _out,
+                        kernel_size=_kernel,
+                        stride=_stride,
+                        bias=_bias,
+                        padding=_padding,
+                        device=device,
+                    )
+                )
+            else:
+                layers.append(
+                    nn.LazyConv3d(
+                        _out,
+                        kernel_size=_kernel,
+                        stride=_stride,
+                        bias=_bias,
+                        padding=_padding,
+                        device=device,
+                    )
+                )
+            activation_kwargs = next(self._activation_kwargs_iter)
+            layers.append(
+                create_on_device(self.activation_class, device, **activation_kwargs)
+            )
+            if self.norm_class is not None:
+                norm_kwargs = next(self._norm_kwargs_iter)
+                layers.append(create_on_device(self.norm_class, device, **norm_kwargs))
+        if self.aggregator_class is not None:
+            layers.append(
+                create_on_device(
+                    self.aggregator_class, device, **self.aggregator_kwargs
+                )
+            )
+        if self.squeeze_output:
+            layers.append(SqueezeLayer((-3, -2, -1)))
+        return layers
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        try:
+            *batch, C, D, L, W = inputs.shape
+        except ValueError as err:
+            raise ValueError(
+                f"The input value of {self.__class__.__name__} must have at least 4 dimensions, got {inputs.ndim} instead."
+            ) from err
+        if len(batch) > 1:
+            inputs = inputs.flatten(0, len(batch) - 1)
+        out = super().forward(inputs)
+        if len(batch) > 1:
+            out = out.unflatten(0, batch)
+        return out
+class DuelingMlpDQNet(nn.Module):
+    """Creates a Dueling MLP Q-network.
+    Presented in https://arxiv.org/abs/1511.06581
+    Args:
+        out_features (int, torch.Size or equivalent): number of features for the advantage network
+        out_features_value (int): number of features for the value network.
+            Defaults to ``1``.
+        mlp_kwargs_feature (dict, optional): kwargs for the feature network.
+            Default is
+            >>> mlp_kwargs_feature = {
+            ...     'num_cells': [256, 256],
+            ...     'activation_class': nn.ELU,
+            ...     'out_features': 256,
+            ...     'activate_last_layer': True,
+            ... }
+        mlp_kwargs_output (dict, optional): kwargs for the advantage and
+            value networks. Default is
+            >>> mlp_kwargs_output = {
+            ...     "depth": 1,
+            ...     "activation_class": nn.ELU,
+            ...     "num_cells": 512,
+            ...     "bias_last_layer": True,
+            ... }
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> import torch
+        >>> from torchrl.modules import DuelingMlpDQNet
+        >>> # we can ask for a specific output shape
+        >>> net = DuelingMlpDQNet(out_features=(3, 2))
+        >>> print(net)
+        DuelingMlpDQNet(
+          (features): MLP(
+            (0): LazyLinear(in_features=0, out_features=256, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=256, out_features=256, bias=True)
+            (3): ELU(alpha=1.0)
+            (4): Linear(in_features=256, out_features=256, bias=True)
+            (5): ELU(alpha=1.0)
+          )
+          (advantage): MLP(
+            (0): LazyLinear(in_features=0, out_features=512, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=512, out_features=6, bias=True)
+          )
+          (value): MLP(
+            (0): LazyLinear(in_features=0, out_features=512, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=512, out_features=1, bias=True)
+          )
+        )
+        >>> x = torch.zeros(1, 5)
+        >>> y = net(x)
+        >>> print(y)
+        tensor([[[ 0.0232, -0.0477],
+                 [-0.0226, -0.0019],
+                 [-0.0314,  0.0069]]], grad_fn=<SubBackward0>)
+    """
+    def __init__(
+        self,
+        out_features: int | torch.Size,
+        out_features_value: int = 1,
+        mlp_kwargs_feature: dict | None = None,
+        mlp_kwargs_output: dict | None = None,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        mlp_kwargs_feature = (
+            mlp_kwargs_feature if mlp_kwargs_feature is not None else {}
+        )
+        _mlp_kwargs_feature = {
+            "num_cells": [256, 256],
+            "out_features": 256,
+            "activation_class": nn.ELU,
+            "activate_last_layer": True,
+        }
+        _mlp_kwargs_feature.update(mlp_kwargs_feature)
+        self.features = MLP(device=device, **_mlp_kwargs_feature)
+        _mlp_kwargs_output = {
+            "depth": 1,
+            "activation_class": nn.ELU,
+            "num_cells": 512,
+            "bias_last_layer": True,
+        }
+        mlp_kwargs_output = mlp_kwargs_output if mlp_kwargs_output is not None else {}
+        _mlp_kwargs_output.update(mlp_kwargs_output)
+        self.out_features = out_features
+        self.out_features_value = out_features_value
+        self.advantage = MLP(
+            out_features=out_features, device=device, **_mlp_kwargs_output
+        )
+        self.value = MLP(
+            out_features=out_features_value, device=device, **_mlp_kwargs_output
+        )
+        for layer in self.modules():
+            if isinstance(layer, (nn.Conv2d, nn.Linear)) and isinstance(
+                layer.bias, torch.Tensor
+            ):
+                layer.bias.data.zero_()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.features(x)
+        advantage = self.advantage(x)
+        value = self.value(x)
+        return value + advantage - advantage.mean(dim=-1, keepdim=True)
+class DuelingCnnDQNet(nn.Module):
+    """Dueling CNN Q-network.
+    Presented in https://arxiv.org/abs/1511.06581
+    Args:
+        out_features (int): number of features for the advantage network.
+        out_features_value (int): number of features for the value network.
+        cnn_kwargs (dict or list of dicts, optional): kwargs for the feature
+            network. Default is
+            >>> cnn_kwargs = {
+            ...     'num_cells': [32, 64, 64],
+            ...     'strides': [4, 2, 1],
+            ...     'kernel_sizes': [8, 4, 3],
+            ... }
+        mlp_kwargs (dict or list of dicts, optional): kwargs for the advantage
+            and value network. Default is
+            >>> mlp_kwargs = {
+            ...     "depth": 1,
+            ...     "activation_class": nn.ELU,
+            ...     "num_cells": 512,
+            ...     "bias_last_layer": True,
+            ... }
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> import torch
+        >>> from torchrl.modules import DuelingCnnDQNet
+        >>> net = DuelingCnnDQNet(out_features=20)
+        >>> print(net)
+        DuelingCnnDQNet(
+          (features): ConvNet(
+            (0): LazyConv2d(0, 32, kernel_size=(8, 8), stride=(4, 4))
+            (1): ELU(alpha=1.0)
+            (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
+            (3): ELU(alpha=1.0)
+            (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
+            (5): ELU(alpha=1.0)
+            (6): SquashDims()
+          )
+          (advantage): MLP(
+            (0): LazyLinear(in_features=0, out_features=512, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=512, out_features=20, bias=True)
+          )
+          (value): MLP(
+            (0): LazyLinear(in_features=0, out_features=512, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=512, out_features=1, bias=True)
+          )
+        )
+        >>> x = torch.zeros(1, 3, 64, 64)
+        >>> y = net(x)
+        >>> print(y.shape)
+        torch.Size([1, 20])
+    """
+    def __init__(
+        self,
+        out_features: int,
+        out_features_value: int = 1,
+        cnn_kwargs: dict | None = None,
+        mlp_kwargs: dict | None = None,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        cnn_kwargs = cnn_kwargs if cnn_kwargs is not None else {}
+        _cnn_kwargs = {
+            "num_cells": [32, 64, 64],
+            "strides": [4, 2, 1],
+            "kernel_sizes": [8, 4, 3],
+        }
+        _cnn_kwargs.update(cnn_kwargs)
+        self.features = ConvNet(device=device, **_cnn_kwargs)
+        _mlp_kwargs = {
+            "depth": 1,
+            "activation_class": nn.ELU,
+            "num_cells": 512,
+            "bias_last_layer": True,
+        }
+        mlp_kwargs = mlp_kwargs if mlp_kwargs is not None else {}
+        _mlp_kwargs.update(mlp_kwargs)
+        self.out_features = out_features
+        self.out_features_value = out_features_value
+        self.advantage = MLP(out_features=out_features, device=device, **_mlp_kwargs)
+        self.value = MLP(out_features=out_features_value, device=device, **_mlp_kwargs)
+        for layer in self.modules():
+            if isinstance(layer, (nn.Conv2d, nn.Linear)) and isinstance(
+                layer.bias, torch.Tensor
+            ):
+                layer.bias.data.zero_()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.features(x)
+        advantage = self.advantage(x)
+        value = self.value(x)
+        return value + advantage - advantage.mean(dim=-1, keepdim=True)
+def ddpg_init_last_layer(
+    module: nn.Sequential,
+    scale: float = 6e-4,
+    device: DEVICE_TYPING | None = None,
+) -> None:
+    """Initializer for the last layer of DDPG modules.
+    Presented in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING",
+    https://arxiv.org/pdf/1509.02971.pdf
+    Args:
+        module (nn.Module): an actor or critic to be initialized.
+        scale (:obj:`float`, optional): the noise scale. Defaults to ``6e-4``.
+        device (torch.device, optional): the device where the noise should be
+            created. Defaults to the device of the last layer's weight
+            parameter.
+    Examples:
+        >>> from torchrl.modules.models.models import MLP, ddpg_init_last_layer
+        >>> mlp = MLP(in_features=4, out_features=5, num_cells=(10, 10))
+        >>> # init the last layer of the MLP
+        >>> ddpg_init_last_layer(mlp)
+    """
+    for last_layer in reversed(module):
+        if isinstance(last_layer, (nn.Linear, nn.Conv2d)):
+            break
+    else:
+        raise RuntimeError("Could not find a nn.Linear / nn.Conv2d to initialize.")
+    last_layer.weight.data.copy_(
+        torch.rand_like(last_layer.weight.data, device=device) * scale - scale / 2
+    )
+    if last_layer.bias is not None:
+        last_layer.bias.data.copy_(
+            torch.rand_like(last_layer.bias.data, device=device) * scale - scale / 2
+        )
+class DdpgCnnActor(nn.Module):
+    """DDPG Convolutional Actor class.
+    Presented in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING",
+    https://arxiv.org/pdf/1509.02971.pdf
+    The DDPG Convolutional Actor takes as input an observation (some simple
+    transformation of the observed pixels) and returns an action vector from
+    it, as well as an observation embedding that can be reused for a value
+    estimation. It should be trained to maximise the value returned by the
+    DDPG Q Value network.
+    Args:
+        action_dim (int): length of the action vector.
+        conv_net_kwargs (dict or list of dicts, optional): kwargs for the ConvNet.
+            Defaults to
+            >>> {
+            ...     'in_features': None,
+            ...     "num_cells": [32, 64, 64],
+            ...     "kernel_sizes": [8, 4, 3],
+            ...     "strides": [4, 2, 1],
+            ...     "paddings": [0, 0, 1],
+            ...     'activation_class': torch.nn.ELU,
+            ...     'norm_class': None,
+            ...     'aggregator_class': SquashDims,
+            ...     'aggregator_kwargs': {"ndims_in": 3},
+            ...     'squeeze_output': True,
+            ... }  #
+        mlp_net_kwargs: kwargs for MLP.
+            Defaults to:
+            >>> {
+            ...     'in_features': None,
+            ...     'out_features': action_dim,
+            ...     'depth': 2,
+            ...     'num_cells': 200,
+            ...     'activation_class': nn.ELU,
+            ...     'bias_last_layer': True,
+            ... }
+        use_avg_pooling (bool, optional): if ``True``, a
+            :class:`~torch.nn.AvgPooling` layer is used to aggregate the
+            output. Defaults to ``False``.
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> import torch
+        >>> from torchrl.modules import DdpgCnnActor
+        >>> actor = DdpgCnnActor(action_dim=4)
+        >>> print(actor)
+        DdpgCnnActor(
+          (convnet): ConvNet(
+            (0): LazyConv2d(0, 32, kernel_size=(8, 8), stride=(4, 4))
+            (1): ELU(alpha=1.0)
+            (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
+            (3): ELU(alpha=1.0)
+            (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+            (5): ELU(alpha=1.0)
+            (6): SquashDims()
+          )
+          (mlp): MLP(
+            (0): LazyLinear(in_features=0, out_features=200, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=200, out_features=200, bias=True)
+            (3): ELU(alpha=1.0)
+            (4): Linear(in_features=200, out_features=4, bias=True)
+          )
+        )
+        >>> obs = torch.randn(10, 3, 64, 64)
+        >>> action, hidden = actor(obs)
+        >>> print(action.shape)
+        torch.Size([10, 4])
+        >>> print(hidden.shape)
+        torch.Size([10, 2304])
+    """
+    def __init__(
+        self,
+        action_dim: int,
+        conv_net_kwargs: dict | None = None,
+        mlp_net_kwargs: dict | None = None,
+        use_avg_pooling: bool = False,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        conv_net_default_kwargs = {
+            "in_features": None,
+            "num_cells": [32, 64, 64],
+            "kernel_sizes": [8, 4, 3],
+            "strides": [4, 2, 1],
+            "paddings": [0, 0, 1],
+            "activation_class": nn.ELU,
+            "norm_class": None,
+            "aggregator_class": SquashDims
+            if not use_avg_pooling
+            else nn.AdaptiveAvgPool2d,
+            "aggregator_kwargs": {"ndims_in": 3}
+            if not use_avg_pooling
+            else {"output_size": (1, 1)},
+            "squeeze_output": use_avg_pooling,
+        }
+        conv_net_kwargs = conv_net_kwargs if conv_net_kwargs is not None else {}
+        conv_net_default_kwargs.update(conv_net_kwargs)
+        mlp_net_default_kwargs = {
+            "in_features": None,
+            "out_features": action_dim,
+            "depth": 2,
+            "num_cells": 200,
+            "activation_class": nn.ELU,
+            "bias_last_layer": True,
+        }
+        mlp_net_kwargs = mlp_net_kwargs if mlp_net_kwargs is not None else {}
+        mlp_net_default_kwargs.update(mlp_net_kwargs)
+        self.convnet = ConvNet(device=device, **conv_net_default_kwargs)
+        self.mlp = MLP(device=device, **mlp_net_default_kwargs)
+        ddpg_init_last_layer(self.mlp, 6e-4, device=device)
+    def forward(self, observation: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        hidden = self.convnet(observation)
+        action = self.mlp(hidden)
+        return action, hidden
+class DdpgMlpActor(nn.Module):
+    """DDPG Actor class.
+    Presented in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING",
+    https://arxiv.org/pdf/1509.02971.pdf
+    The DDPG Actor takes as input an observation vector and returns an action from it.
+    It is trained to maximise the value returned by the DDPG Q Value network.
+    Args:
+        action_dim (int): length of the action vector
+        mlp_net_kwargs (dict, optional): kwargs for MLP.
+            Defaults to
+            >>> {
+            ...     'in_features': None,
+            ...     'out_features': action_dim,
+            ...     'depth': 2,
+            ...     'num_cells': [400, 300],
+            ...     'activation_class': nn.ELU,
+            ...     'bias_last_layer': True,
+            ... }
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> import torch
+        >>> from torchrl.modules import DdpgMlpActor
+        >>> actor = DdpgMlpActor(action_dim=4)
+        >>> print(actor)
+        DdpgMlpActor(
+          (mlp): MLP(
+            (0): LazyLinear(in_features=0, out_features=400, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=400, out_features=300, bias=True)
+            (3): ELU(alpha=1.0)
+            (4): Linear(in_features=300, out_features=4, bias=True)
+          )
+        )
+        >>> obs = torch.zeros(10, 6)
+        >>> action = actor(obs)
+        >>> print(action.shape)
+        torch.Size([10, 4])
+    """
+    def __init__(
+        self,
+        action_dim: int,
+        mlp_net_kwargs: dict | None = None,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        mlp_net_default_kwargs = {
+            "in_features": None,
+            "out_features": action_dim,
+            "depth": 2,
+            "num_cells": [400, 300],
+            "activation_class": nn.ELU,
+            "bias_last_layer": True,
+        }
+        mlp_net_kwargs = mlp_net_kwargs if mlp_net_kwargs is not None else {}
+        mlp_net_default_kwargs.update(mlp_net_kwargs)
+        self.mlp = MLP(device=device, **mlp_net_default_kwargs)
+        ddpg_init_last_layer(self.mlp, 6e-3, device=device)
+    def forward(self, observation: torch.Tensor) -> torch.Tensor:
+        action = self.mlp(observation)
+        return action
+class DdpgCnnQNet(nn.Module):
+    """DDPG Convolutional Q-value class.
+    Presented in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING",
+    https://arxiv.org/pdf/1509.02971.pdf
+    The DDPG Q-value network takes as input an observation and an action, and
+    returns a scalar from it.
+    Args:
+        conv_net_kwargs (dict, optional): kwargs for the
+            convolutional network.
+            Defaults to
+            >>> {
+            ...     'in_features': None,
+            ...     "num_cells": [32, 64, 128],
+            ...     "kernel_sizes": [8, 4, 3],
+            ...     "strides": [4, 2, 1],
+            ...     "paddings": [0, 0, 1],
+            ...     'activation_class': nn.ELU,
+            ...     'norm_class': None,
+            ...     'aggregator_class': nn.AdaptiveAvgPool2d,
+            ...     'aggregator_kwargs': {},
+            ...     'squeeze_output': True,
+            ... }
+        mlp_net_kwargs (dict, optional): kwargs for MLP.
+            Defaults to
+            >>> {
+            ...     'in_features': None,
+            ...     'out_features': 1,
+            ...     'depth': 2,
+            ...     'num_cells': 200,
+            ...     'activation_class': nn.ELU,
+            ...     'bias_last_layer': True,
+            ... }
+        use_avg_pooling (bool, optional): if ``True``, a
+            :class:`~torch.nn.AvgPooling` layer is used to aggregate the
+            output. Default is ``True``.
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> from torchrl.modules import DdpgCnnQNet
+        >>> import torch
+        >>> net = DdpgCnnQNet()
+        >>> print(net)
+        DdpgCnnQNet(
+          (convnet): ConvNet(
+            (0): LazyConv2d(0, 32, kernel_size=(8, 8), stride=(4, 4))
+            (1): ELU(alpha=1.0)
+            (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
+            (3): ELU(alpha=1.0)
+            (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+            (5): ELU(alpha=1.0)
+            (6): AdaptiveAvgPool2d(output_size=(1, 1))
+            (7): Squeeze2dLayer()
+          )
+          (mlp): MLP(
+            (0): LazyLinear(in_features=0, out_features=200, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=200, out_features=200, bias=True)
+            (3): ELU(alpha=1.0)
+            (4): Linear(in_features=200, out_features=1, bias=True)
+          )
+        )
+        >>> obs = torch.zeros(1, 3, 64, 64)
+        >>> action = torch.zeros(1, 4)
+        >>> value = net(obs, action)
+        >>> print(value.shape)
+        torch.Size([1, 1])
+    """
+    def __init__(
+        self,
+        conv_net_kwargs: dict | None = None,
+        mlp_net_kwargs: dict | None = None,
+        use_avg_pooling: bool = True,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        conv_net_default_kwargs = {
+            "in_features": None,
+            "num_cells": [32, 64, 128],
+            "kernel_sizes": [8, 4, 3],
+            "strides": [4, 2, 1],
+            "paddings": [0, 0, 1],
+            "activation_class": nn.ELU,
+            "norm_class": None,
+            "aggregator_class": SquashDims
+            if not use_avg_pooling
+            else nn.AdaptiveAvgPool2d,
+            "aggregator_kwargs": {"ndims_in": 3}
+            if not use_avg_pooling
+            else {"output_size": (1, 1)},
+            "squeeze_output": use_avg_pooling,
+        }
+        conv_net_kwargs = conv_net_kwargs if conv_net_kwargs is not None else {}
+        conv_net_default_kwargs.update(conv_net_kwargs)
+        mlp_net_default_kwargs = {
+            "in_features": None,
+            "out_features": 1,
+            "depth": 2,
+            "num_cells": 200,
+            "activation_class": nn.ELU,
+            "bias_last_layer": True,
+        }
+        mlp_net_kwargs = mlp_net_kwargs if mlp_net_kwargs is not None else {}
+        mlp_net_default_kwargs.update(mlp_net_kwargs)
+        self.convnet = ConvNet(device=device, **conv_net_default_kwargs)
+        self.mlp = MLP(device=device, **mlp_net_default_kwargs)
+        ddpg_init_last_layer(self.mlp, 6e-4, device=device)
+    def forward(self, observation: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
+        hidden = torch.cat([self.convnet(observation), action], -1)
+        value = self.mlp(hidden)
+        return value
+class DdpgMlpQNet(nn.Module):
+    """DDPG Q-value MLP class.
+    Presented in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING",
+    https://arxiv.org/pdf/1509.02971.pdf
+    The DDPG Q-value network takes as input an observation and an action,
+    and returns a scalar from it.
+    Because actions are integrated later than observations, two networks are
+    created.
+    Args:
+        mlp_net_kwargs_net1 (dict, optional): kwargs for MLP.
+            Defaults to
+            >>> {
+            ...     'in_features': None,
+            ...     'out_features': 400,
+            ...     'depth': 0,
+            ...     'num_cells': [],
+            ...     'activation_class': nn.ELU,
+            ...     'bias_last_layer': True,
+            ...     'activate_last_layer': True,
+            ...     }
+        mlp_net_kwargs_net2
+            Defaults to
+            >>> {
+            ...     'in_features': None,
+            ...     'out_features': 1,
+            ...     'depth': 1,
+            ...     'num_cells': [300, ],
+            ...     'activation_class': nn.ELU,
+            ...     'bias_last_layer': True,
+            ... }
+        device (torch.device, optional): device to create the module on.
+    Examples:
+        >>> import torch
+        >>> from torchrl.modules import DdpgMlpQNet
+        >>> net = DdpgMlpQNet()
+        >>> print(net)
+        DdpgMlpQNet(
+          (mlp1): MLP(
+            (0): LazyLinear(in_features=0, out_features=400, bias=True)
+            (1): ELU(alpha=1.0)
+          )
+          (mlp2): MLP(
+            (0): LazyLinear(in_features=0, out_features=300, bias=True)
+            (1): ELU(alpha=1.0)
+            (2): Linear(in_features=300, out_features=1, bias=True)
+          )
+        )
+        >>> obs = torch.zeros(1, 32)
+        >>> action = torch.zeros(1, 4)
+        >>> value = net(obs, action)
+        >>> print(value.shape)
+        torch.Size([1, 1])
+    """
+    def __init__(
+        self,
+        mlp_net_kwargs_net1: dict | None = None,
+        mlp_net_kwargs_net2: dict | None = None,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        mlp1_net_default_kwargs = {
+            "in_features": None,
+            "out_features": 400,
+            "depth": 0,
+            "num_cells": [],
+            "activation_class": nn.ELU,
+            "bias_last_layer": True,
+            "activate_last_layer": True,
+        }
+        mlp_net_kwargs_net1: dict = (
+            mlp_net_kwargs_net1 if mlp_net_kwargs_net1 is not None else {}
+        )
+        mlp1_net_default_kwargs.update(mlp_net_kwargs_net1)
+        self.mlp1 = MLP(device=device, **mlp1_net_default_kwargs)
+        mlp2_net_default_kwargs = {
+            "in_features": None,
+            "out_features": 1,
+            "num_cells": [
+                300,
+            ],
+            "activation_class": nn.ELU,
+            "bias_last_layer": True,
+        }
+        mlp_net_kwargs_net2 = (
+            mlp_net_kwargs_net2 if mlp_net_kwargs_net2 is not None else {}
+        )
+        mlp2_net_default_kwargs.update(mlp_net_kwargs_net2)
+        self.mlp2 = MLP(device=device, **mlp2_net_default_kwargs)
+        ddpg_init_last_layer(self.mlp2, 6e-3, device=device)
+    def forward(self, observation: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
+        value = self.mlp2(torch.cat([self.mlp1(observation), action], -1))
+        return value
+class OnlineDTActor(nn.Module):
+    """Online Decision Transformer Actor class.
+    Actor class for the Online Decision Transformer to sample actions from
+    gaussian distribution as presented inresented in
+    `"Online Decision Transformer" <https://arxiv.org/abs/2202.05607.pdf>`_.
+    Returns the mean and standard deviation for the gaussian distribution to sample actions from.
+    Args:
+        state_dim (int): state dimension.
+        action_dim (int): action dimension.
+        transformer_config (Dict or :class:`DecisionTransformer.DTConfig`):
+            config for the GPT2 transformer.
+            Defaults to :meth:`default_config`.
+        device (torch.device, optional): device to use. Defaults to None.
+    Examples:
+        >>> model = OnlineDTActor(state_dim=4, action_dim=2,
+        ...     transformer_config=OnlineDTActor.default_config())
+        >>> observation = torch.randn(32, 10, 4)
+        >>> action = torch.randn(32, 10, 2)
+        >>> return_to_go = torch.randn(32, 10, 1)
+        >>> mu, std = model(observation, action, return_to_go)
+        >>> mu.shape
+        torch.Size([32, 10, 2])
+        >>> std.shape
+        torch.Size([32, 10, 2])
+    """
+    def __init__(
+        self,
+        state_dim: int,
+        action_dim: int,
+        transformer_config: dict | DecisionTransformer.DTConfig = None,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        if transformer_config is None:
+            transformer_config = self.default_config()
+        if isinstance(transformer_config, DecisionTransformer.DTConfig):
+            transformer_config = dataclasses.asdict(transformer_config)
+        self.transformer = DecisionTransformer(
+            state_dim=state_dim,
+            action_dim=action_dim,
+            config=transformer_config,
+            device=device,
+        )
+        self.action_layer_mean = nn.Linear(
+            transformer_config["n_embd"], action_dim, device=device
+        )
+        self.action_layer_logstd = nn.Linear(
+            transformer_config["n_embd"], action_dim, device=device
+        )
+        self.log_std_min, self.log_std_max = -5.0, 2.0
+        def weight_init(m):
+            """Custom weight init for Conv2D and Linear layers."""
+            if isinstance(m, torch.nn.Linear):
+                nn.init.orthogonal_(m.weight.data)
+                if hasattr(m.bias, "data"):
+                    m.bias.data.fill_(0.0)
+        self.action_layer_mean.apply(weight_init)
+        self.action_layer_logstd.apply(weight_init)
+    def forward(
+        self,
+        observation: torch.Tensor,
+        action: torch.Tensor,
+        return_to_go: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        hidden_state = self.transformer(observation, action, return_to_go)
+        mu = self.action_layer_mean(hidden_state)
+        log_std = self.action_layer_logstd(hidden_state)
+        log_std = torch.tanh(log_std)
+        # log_std is the output of tanh so it will be between [-1, 1]
+        # map it to be between [log_std_min, log_std_max]
+        log_std = self.log_std_min + 0.5 * (self.log_std_max - self.log_std_min) * (
+            log_std + 1.0
+        )
+        std = log_std.exp()
+        return mu, std
+    @classmethod
+    def default_config(cls):
+        """Default configuration for :class:`~OnlineDTActor`."""
+        return DecisionTransformer.DTConfig(
+            n_embd=512,
+            n_layer=4,
+            n_head=4,
+            n_inner=2048,
+            activation="relu",
+            n_positions=1024,
+            resid_pdrop=0.1,
+            attn_pdrop=0.1,
+        )
+class DTActor(nn.Module):
+    """Decision Transformer Actor class.
+    Actor class for the Decision Transformer to output deterministic action as
+    presented in `"Decision Transformer" <https://arxiv.org/abs/2202.05607.pdf>`.
+    Returns the deterministic actions.
+    Args:
+        state_dim (int): state dimension.
+        action_dim (int): action dimension.
+        transformer_config (Dict or :class:`DecisionTransformer.DTConfig`, optional):
+            config for the GPT2 transformer.
+            Defaults to :meth:`~.default_config`.
+        device (torch.device, optional): device to use. Defaults to None.
+    Examples:
+        >>> model = DTActor(state_dim=4, action_dim=2,
+        ...     transformer_config=DTActor.default_config())
+        >>> observation = torch.randn(32, 10, 4)
+        >>> action = torch.randn(32, 10, 2)
+        >>> return_to_go = torch.randn(32, 10, 1)
+        >>> output = model(observation, action, return_to_go)
+        >>> output.shape
+        torch.Size([32, 10, 2])
+    """
+    def __init__(
+        self,
+        state_dim: int,
+        action_dim: int,
+        transformer_config: dict | DecisionTransformer.DTConfig = None,
+        device: DEVICE_TYPING | None = None,
+    ):
+        super().__init__()
+        if transformer_config is None:
+            transformer_config = self.default_config()
+        if isinstance(transformer_config, DecisionTransformer.DTConfig):
+            transformer_config = dataclasses.asdict(transformer_config)
+        self.transformer = DecisionTransformer(
+            state_dim=state_dim,
+            action_dim=action_dim,
+            config=transformer_config,
+            device=device,
+        )
+        self.action_layer = nn.Linear(
+            transformer_config["n_embd"], action_dim, device=device
+        )
+        def weight_init(m):
+            """Custom weight init for Conv2D and Linear layers."""
+            if isinstance(m, torch.nn.Linear):
+                nn.init.orthogonal_(m.weight.data)
+                if hasattr(m.bias, "data"):
+                    m.bias.data.fill_(0.0)
+        self.action_layer.apply(weight_init)
+    def forward(
+        self,
+        observation: torch.Tensor,
+        action: torch.Tensor,
+        return_to_go: torch.Tensor,
+    ) -> torch.Tensor:
+        hidden_state = self.transformer(observation, action, return_to_go)
+        out = self.action_layer(hidden_state)
+        return out
+    @classmethod
+    def default_config(cls):
+        """Default configuration for :class:`~DTActor`."""
+        return DecisionTransformer.DTConfig(
+            n_embd=512,
+            n_layer=4,
+            n_head=4,
+            n_inner=2048,
+            activation="relu",
+            n_positions=1024,
+            resid_pdrop=0.1,
+            attn_pdrop=0.1,
+        )
+def _iter_maybe_over_single(item: dict | list[dict] | None, n):
+    if item is None:
+        return iter([{} for _ in range(n)])
+    elif isinstance(item, dict):
+        return iter([deepcopy(item) for _ in range(n)])
+    else:
+        return iter([deepcopy(_item) for _item in item])
+class _ExecutableLayer(nn.Module):
+    """A thin wrapper around a function to be executed as a module."""
+    def __init__(self, func):
+        super().__init__()
+        self.func = func
+    def forward(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+    def __repr__(self):
+        return f"{self.__class__.__name__}(func={self.func})"