PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-manylinux_2_28_aarch64.whl - Mend

torchrl 0.11.0__cp314-cp314-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (394) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-aarch64-linux-gnu.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +394 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/envs/async_envs.py ADDED Viewed

@@ -0,0 +1,1105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import abc
+import multiprocessing
+from collections.abc import Callable, Mapping, Sequence
+from concurrent.futures import as_completed, ThreadPoolExecutor
+# import queue
+from multiprocessing import Queue
+from queue import Empty
+from typing import Literal
+import torch
+from tensordict import (
+    lazy_stack,
+    LazyStackedTensorDict,
+    maybe_dense_stack,
+    TensorDict,
+    TensorDictBase,
+)
+from tensordict.tensorclass import NonTensorData, NonTensorStack
+from tensordict.utils import _zip_strict, expand_as_right
+from torchrl.data.tensor_specs import NonTensor
+from torchrl.envs.common import _EnvPostInit, EnvBase
+class _AsyncEnvMeta(_EnvPostInit):
+    """A metaclass for asynchronous environment pools that determines the backend implementation to use based on the provided arguments.
+    This class is responsible for instantiating the appropriate subclass of `AsyncEnvPool` based on the specified
+    backend, such as threading or multiprocessing.
+    """
+    def __call__(cls, *args, **kwargs):
+        backend = kwargs.get("backend", "threading")
+        if cls is AsyncEnvPool:
+            if backend == "threading":
+                instance: ThreadingAsyncEnvPool = ThreadingAsyncEnvPool(*args, **kwargs)
+            elif backend == "multiprocessing":
+                instance: ProcessorAsyncEnvPool = ProcessorAsyncEnvPool(*args, **kwargs)
+            elif backend == "asyncio":
+                raise NotImplementedError
+                # instance: AsyncioAsyncEnvPool = AsyncioAsyncEnvPool(*args, **kwargs)
+            else:
+                raise NotImplementedError
+            return instance
+        else:
+            return super().__call__(*args, **kwargs)
+class AsyncEnvPool(EnvBase, metaclass=_AsyncEnvMeta):
+    """A base class for asynchronous environment pools, providing a common interface for managing multiple environments concurrently.
+    This class supports different backends for parallel execution, such as threading
+    and multiprocessing, and provides methods for asynchronous stepping and resetting
+    of environments.
+    .. note:: This class and its subclasses should work when nested in with :class:`~torchrl.envs.TransformedEnv` and
+        batched environments, but users won't currently be able to use the async features of the base environment when
+        it's nested in these classes. One should prefer nested transformed envs within an `AsyncEnvPool` instead.
+        If this is not possible, please raise an issue.
+    Args:
+        env_makers (Callable[[], EnvBase] | EnvBase | list[EnvBase] | list[Callable[[], EnvBase]]):
+            A callable or list of callables that create environment instances, or
+            environment instances themselves.
+        backend (Literal["threading", "multiprocessing", "asyncio"], optional):
+            The backend to use for parallel execution. Defaults to `"threading"`.
+        stack (Literal["dense", "maybe_dense", "lazy"], optional):
+            The method to use for stacking environment outputs. Defaults to `"dense"`.
+        create_env_kwargs (dict, optional):
+            Keyword arguments to pass to the environment maker. Defaults to `{}`.
+    Attributes:
+        min_get (int): Minimum number of environments to process in a batch.
+        env_makers (list): List of environment makers or environments.
+        num_envs (int): Number of environments in the pool.
+        backend (str): Backend used for parallel execution.
+        stack (str): Method used for stacking environment outputs.
+    Examples:
+        >>> from functools import partial
+        >>> from torchrl.envs import AsyncEnvPool, GymEnv
+        >>> import torch
+        >>> # Choose backend
+        >>> backend = "threading"
+        >>> env = AsyncEnvPool([partial(GymEnv, "Pendulum-v1"), partial(GymEnv, "CartPole-v1")], stack="lazy", backend=backend)
+        >>> assert env.batch_size == (2,)
+        >>> # Execute a sync reset
+        >>> reset = env.reset()
+        >>> print(reset)
+        LazyStackedTensorDict(
+            fields={
+                done: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                env_index: NonTensorStack(
+                    [0, 1],
+                    batch_size=torch.Size([2]),
+                    device=None),
+                observation: Tensor(shape=torch.Size([2, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                terminated: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+            exclusive_fields={
+            },
+            batch_size=torch.Size([2]),
+            device=None,
+            is_shared=False,
+            stack_dim=0)
+        >>> # Execute a sync step
+        >>> s = env.rand_step(reset)
+        >>> print(s)
+        LazyStackedTensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                done: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                env_index: NonTensorStack(
+                    [0, 1],
+                    batch_size=torch.Size([2]),
+                    device=None),
+                next: LazyStackedTensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: Tensor(shape=torch.Size([2, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                        reward: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    exclusive_fields={
+                    },
+                    batch_size=torch.Size([2]),
+                    device=None,
+                    is_shared=False,
+                    stack_dim=0),
+                observation: Tensor(shape=torch.Size([2, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                terminated: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([2, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+            exclusive_fields={
+            },
+            batch_size=torch.Size([2]),
+            device=None,
+            is_shared=False,
+            stack_dim=0)
+        >>> s = env.step_mdp(s)
+        >>> # Execute an asynchronous step in env 0
+        >>> s0 = s[0]
+        >>> s0["action"] = torch.randn(1).clamp(-1, 1)
+        >>> # We must tell the env which data this is from
+        >>> s0["env_index"] = 0
+        >>> env.async_step_send(s0)
+        >>> # Receive data
+        >>> s0_result = env.async_step_recv()
+        >>> print('result', s0_result)
+        result LazyStackedTensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                done: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                env_index: NonTensorStack(
+                    [0],
+                    batch_size=torch.Size([1]),
+                    device=None),
+                next: LazyStackedTensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: Tensor(shape=torch.Size([1, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                        reward: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    exclusive_fields={
+                    },
+                    batch_size=torch.Size([1]),
+                    device=None,
+                    is_shared=False,
+                    stack_dim=0),
+                observation: Tensor(shape=torch.Size([1, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                terminated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+            exclusive_fields={
+            },
+            batch_size=torch.Size([1]),
+            device=None,
+            is_shared=False,
+            stack_dim=0)
+        >>> # Close env
+        >>> env.close()
+    """
+    _env_idx_key = "env_index"
+    def __init__(
+        self,
+        env_makers: Callable[[], EnvBase]
+        | EnvBase
+        | list[EnvBase]
+        | list[Callable[[], EnvBase]],
+        *,
+        backend: Literal["threading", "multiprocessing", "asyncio"] = "threading",
+        stack: Literal["dense", "maybe_dense", "lazy"] = "dense",
+        create_env_kwargs: dict | list[dict] | None = None,
+    ) -> None:
+        if not isinstance(env_makers, Sequence):
+            env_makers = [env_makers]
+        self.min_get = 1
+        self.env_makers = env_makers
+        self.num_envs = len(env_makers)
+        self.backend = backend
+        if create_env_kwargs is None:
+            create_env_kwargs = {}
+        if isinstance(create_env_kwargs, Mapping):
+            create_env_kwargs = [create_env_kwargs] * self.num_envs
+        if len(create_env_kwargs) != self.num_envs:
+            raise ValueError(
+                f"create_env_kwargs must be a dict or a list of dicts with length {self.num_envs}"
+            )
+        self.create_env_kwargs = create_env_kwargs
+        self.stack = stack
+        if stack == "dense":
+            self._stack_func = torch.stack
+        elif stack == "maybe_dense":
+            self._stack_func = maybe_dense_stack
+        elif stack == "lazy":
+            self._stack_func = lazy_stack
+        else:
+            raise NotImplementedError
+        output_spec, input_spec = self._setup()
+        input_spec["full_state_spec"].set(
+            self._env_idx_key, NonTensor(example_data=0, shape=input_spec.shape)
+        )
+        self.__dict__["_output_spec"] = output_spec
+        self.__dict__["_input_spec"] = input_spec
+        # Use spec shape as batch_size since it correctly includes both pool dimension
+        # and child env batch dimensions (e.g., (4, 1) for 4 envs with batch_size=(1,))
+        super().__init__(batch_size=input_spec.shape)
+        self._busy = set()
+    @property
+    def env_batch_sizes(self) -> list[torch.Size]:
+        """Returns the batch-sizes of every env."""
+        raise NotImplementedError
+    @abc.abstractmethod
+    def _get_child_specs(self) -> list:
+        """Returns the list of child env specs for stacking.
+        For ThreadingAsyncEnvPool, returns [env.full_*_spec for env in self.envs].
+        For ProcessorAsyncEnvPool, returns cached specs from setup.
+        """
+        raise NotImplementedError
+    # Override spec properties to properly stack child env specs.
+    # This bypasses the problematic StackedComposite.get() behavior that loses
+    # nested keys like full_action_spec when cloning stacked specs.
+    @property
+    def full_action_spec(self):
+        child_specs = self._get_child_specs()
+        return torch.stack(
+            [s["input_spec"]["full_action_spec"] for s in child_specs], dim=0
+        )
+    @property
+    def full_observation_spec(self):
+        child_specs = self._get_child_specs()
+        return torch.stack(
+            [s["output_spec"]["full_observation_spec"] for s in child_specs], dim=0
+        )
+    @property
+    def full_reward_spec(self):
+        child_specs = self._get_child_specs()
+        return torch.stack(
+            [s["output_spec"]["full_reward_spec"] for s in child_specs], dim=0
+        )
+    @property
+    def full_done_spec(self):
+        child_specs = self._get_child_specs()
+        return torch.stack(
+            [s["output_spec"]["full_done_spec"] for s in child_specs], dim=0
+        )
+    @property
+    def full_state_spec(self):
+        child_specs = self._get_child_specs()
+        specs = torch.stack(
+            [s["input_spec"]["full_state_spec"] for s in child_specs], dim=0
+        )
+        # Add env_index key for async tracking
+        specs.set(self._env_idx_key, NonTensor(example_data=0, shape=specs.shape))
+        return specs
+    # TODO: _make_single_env_spec (used by *_unbatched properties) takes spec[0],
+    # which assumes all child envs have identical specs. Should add validation
+    # that child specs match, and error if they differ.
+    def _reset(
+        self,
+        tensordict: TensorDictBase | None = None,
+        **kwargs,
+    ) -> TensorDictBase:
+        if self._current_step > 0:
+            raise RuntimeError("Some envs are still processing a step.")
+        if tensordict is None:
+            if self._stack_func in ("lazy_stack", "maybe_dense"):
+                tensordict = LazyStackedTensorDict(
+                    *[
+                        TensorDict(batch_size=self.env_batch_sizes[i])
+                        for i in range(self.num_envs)
+                    ]
+                )
+            else:
+                tensordict = TensorDict(
+                    batch_size=(self.num_envs,) + self.env_batch_sizes[0]
+                )
+        env_idx_nt = NonTensorStack(*range(tensordict.shape[0]))
+        while env_idx_nt.batch_dims < tensordict.batch_dims:
+            env_idx_nt = expand_as_right(env_idx_nt, tensordict)
+        tensordict[self._env_idx_key] = env_idx_nt
+        self._async_private_reset_send(tensordict)
+        tensordict = self._async_private_reset_recv(min_get=self.num_envs)
+        return tensordict
+    def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
+        if self._current_step > 0:
+            raise RuntimeError("Some envs are still processing a step.")
+        tensordict.set(self._env_idx_key, torch.arange(tensordict.shape[0]))
+        self._async_private_step_send(tensordict)
+        tensordict = self._async_private_step_recv(min_get=self.num_envs)
+        # Using pop instead of del to account for tensorclasses
+        tensordict.pop(self._env_idx_key)
+        return tensordict
+    def step_and_maybe_reset(
+        self, tensordict: TensorDictBase
+    ) -> tuple[TensorDictBase, TensorDictBase]:
+        if self._current_step_reset > 0:
+            raise RuntimeError("Some envs are still processing a step.")
+        tensordict.set(self._env_idx_key, torch.arange(tensordict.shape[0]))
+        self.async_step_and_maybe_reset_send(tensordict)
+        tensordict, tensordict_ = self.async_step_and_maybe_reset_recv(
+            min_get=self.num_envs
+        )
+        return tensordict, tensordict_
+    def step(self, tensordict: TensorDictBase) -> TensorDictBase:
+        if self._current_step > 0:
+            raise RuntimeError("Some envs are still processing a step.")
+        tensordict.set(self._env_idx_key, torch.arange(tensordict.shape[0]))
+        self.async_step_send(tensordict)
+        tensordict = self.async_step_recv(min_get=self.num_envs)
+        return tensordict
+    def reset(
+        self,
+        tensordict: TensorDictBase | None = None,
+        **kwargs,
+    ) -> TensorDictBase:
+        if self._current_step > 0:
+            raise RuntimeError("Some envs are still processing a step.")
+        if tensordict is None:
+            if self._stack_func in ("lazy_stack", "maybe_dense"):
+                tensordict = LazyStackedTensorDict(
+                    *[
+                        TensorDict(batch_size=self.env_batch_sizes[i])
+                        for i in range(self.num_envs)
+                    ]
+                )
+            else:
+                tensordict = TensorDict(
+                    batch_size=(self.num_envs,) + self.env_batch_sizes[0]
+                )
+        indices = NonTensorStack(*range(tensordict.shape[0]))
+        if indices.shape != tensordict.shape:
+            indices = expand_as_right(indices, tensordict)
+        tensordict[self._env_idx_key] = indices
+        self.async_reset_send(tensordict)
+        tensordict = self.async_reset_recv(min_get=self.num_envs)
+        return tensordict
+    def _sort_results(self, results, *other_results):
+        # Extract env indices from results. When child envs have a batch dimension
+        # (e.g., batch_size=(1,)), r[self._env_idx_key] may be a 1D sequence
+        # instead of a scalar, so we need to handle both cases.
+        idx = []
+        for r in results:
+            env_idx = r[self._env_idx_key]
+            # Handle sequence types (NonTensorStack, etc.) by taking first element
+            while hasattr(env_idx, "__len__") and not isinstance(env_idx, (str, bytes)):
+                if len(env_idx) == 1:
+                    env_idx = env_idx[0]
+                else:
+                    break
+            idx.append(int(env_idx))
+        argsort = torch.argsort(torch.tensor(idx)).tolist()
+        results = [results[i] for i in argsort]
+        if other_results:
+            other_results = [
+                [other_results[i] for i in argsort] for other_results in other_results
+            ]
+            return results, *other_results, idx
+        return results, idx
+    def _set_seed(self, seed: int | None) -> None:
+        raise NotImplementedError
+    @abc.abstractmethod
+    def _setup(self) -> None:
+        raise NotImplementedError
+    def _maybe_make_tensordict(self, tensordict, env_index, make_if_none):
+        if env_index is None:
+            env_idx = tensordict.view(-1)[self._env_idx_key]
+            if isinstance(env_idx, torch.Tensor):
+                env_idx = env_idx.tolist()
+            if isinstance(env_idx, int):
+                # If we squeezed a td with shape (1,) and got a NonTensorStack -> NonTensorData, then
+                #  unsqueezed the NonTensorData, we'd still have a NonTensorData with shape (1,)
+                #  This will give us an integer now, but we don't want to unsqueeze the full td because then
+                #  we'd have a td with shape (1, 1)
+                if tensordict.shape != (1, *self.env_batch_sizes[env_idx]):
+                    tensordict = tensordict.unsqueeze(0)
+                env_idx = [env_idx]
+        elif isinstance(env_index, int):
+            if make_if_none:
+                if tensordict is None:
+                    tensordict = TensorDict(
+                        batch_size=self.env_batch_sizes[env_index], device=self.device
+                    )
+                if self.stack in ("lazy_stack", "maybe_dense"):
+                    tensordict = tensordict.unsqueeze(0)
+                else:
+                    tensordict = lazy_stack([tensordict])
+            tensordict[self._env_idx_key] = NonTensorStack(env_index)
+            env_idx = [env_index]
+        else:
+            if make_if_none and tensordict is None:
+                if self.stack in ("lazy_stack", "maybe_dense"):
+                    tensordict = lazy_stack(
+                        [TensorDict(device=self.device) for _ in env_index]
+                    )
+                else:
+                    tensordict = TensorDict(
+                        batch_size=(len(env_index),), device=self.device
+                    )
+            tensordict[self._env_idx_key] = NonTensorStack(*env_index)
+            env_idx = env_index
+        return tensordict, env_idx
+    @abc.abstractmethod
+    def async_step_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        raise NotImplementedError
+    @abc.abstractmethod
+    def async_step_recv(self, min_get: int | None = None) -> TensorDictBase:
+        raise NotImplementedError
+    @abc.abstractmethod
+    def async_step_and_maybe_reset_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        raise NotImplementedError
+    @abc.abstractmethod
+    def async_step_and_maybe_reset_recv(
+        self,
+        min_get: int | None = None,
+        env_index: int | list[int] | None = None,
+    ) -> tuple[TensorDictBase, TensorDictBase]:
+        raise NotImplementedError
+    @abc.abstractmethod
+    def async_reset_send(
+        self,
+        tensordict: TensorDictBase | None = None,
+        env_index: int | list[int] | None = None,
+    ) -> None:
+        raise NotImplementedError
+    @abc.abstractmethod
+    def async_reset_recv(self, min_get: int | None = None) -> TensorDictBase:
+        raise NotImplementedError
+    def __del__(self):
+        self._maybe_shutdown()
+    def _maybe_shutdown(self):
+        try:
+            self.shutdown()
+        except Exception:
+            pass
+    @abc.abstractmethod
+    def shutdown(self):
+        raise NotImplementedError
+    def close(self, *, raise_if_closed: bool = True):
+        if raise_if_closed:
+            self.shutdown()
+        else:
+            self._maybe_shutdown()
+class ProcessorAsyncEnvPool(AsyncEnvPool):
+    """An implementation of `AsyncEnvPool` using multiprocessing for parallel execution of environments.
+    This class manages a pool of environments, each running in its own process, and
+    provides methods for asynchronous stepping and resetting of environments using
+    inter-process communication.
+    .. note:: This class and its subclasses should work when nested in with :class:`~torchrl.envs.TransformedEnv` and
+        batched environments, but users won't currently be able to use the async features of the base environment when
+        it's nested in these classes. One should prefer nested transformed envs within an `AsyncEnvPool` instead.
+        If this is not possible, please raise an issue.
+    Methods:
+        _setup(): Initializes the multiprocessing queues and processes for each
+            environment.
+        async_step_send(tensordict): Sends a step command to the environments.
+        async_step_recv(min_get): Receives the results of the step command.
+        async_reset_send(tensordict): Sends a reset command to the environments.
+        async_reset_recv(min_get): Receives the results of the reset command.
+        shutdown(): Shuts down all environment processes.
+    """
+    def _setup(self) -> None:
+        self.step_queue = Queue(maxsize=self.num_envs)
+        self.reset_queue = Queue(maxsize=self.num_envs)
+        self.step_reset_queue = Queue(maxsize=self.num_envs)
+        self.input_queue = [Queue(maxsize=1) for _ in range(self.num_envs)]
+        self.output_queue = [Queue(maxsize=1) for _ in range(self.num_envs)]
+        self._current_reset = 0
+        self._current_step = 0
+        self._current_step_reset = 0
+        num_threads = self.num_envs
+        self.threads = []
+        for i in range(num_threads):
+            # thread = threading.Thread(target=_env_exec, kwargs={"i": i, "env_or_factory": self.env_maker[i], "input_queue": self.input_queue[i], "step_queue": self.step_queue, "reset_queue": self.reset_queue})
+            thread = multiprocessing.Process(
+                target=self._env_exec,
+                kwargs={
+                    "i": i,
+                    "env_or_factory": self.env_makers[i],
+                    "create_env_kwargs": self.create_env_kwargs[i],
+                    "input_queue": self.input_queue[i],
+                    "output_queue": self.output_queue[i],
+                    "step_reset_queue": self.step_reset_queue,
+                    "step_queue": self.step_queue,
+                    "reset_queue": self.reset_queue,
+                },
+            )
+            self.threads.append(thread)
+            thread.start()
+        # Get specs from each worker and cache them for _get_child_specs()
+        for i in range(num_threads):
+            self.input_queue[i].put(("get_specs", None))
+        self._child_specs = []
+        for i in range(num_threads):
+            self._child_specs.append(self.output_queue[i].get())
+        specs = torch.stack(list(self._child_specs))
+        output_spec = specs["output_spec"]
+        input_spec = specs["input_spec"]
+        return output_spec, input_spec
+    def _get_child_specs(self) -> list:
+        """Returns the cached specs from each child environment process."""
+        return self._child_specs
+    @property
+    def env_batch_sizes(self) -> list[torch.Size]:
+        batch_sizes = getattr(self, "_env_batch_sizes", [])
+        if not batch_sizes:
+            for _env_idx in range(self.num_envs):
+                self.input_queue[_env_idx].put(("batch_size", None))
+                batch_sizes.append(self.output_queue[_env_idx].get())
+            self._env_batch_sizes = batch_sizes
+        return batch_sizes
+    def async_step_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        # puts tds in a queue and ask for env.step
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, False)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        local_tds = tensordict.unbind(0)
+        for _env_idx, local_td in _zip_strict(env_idx, local_tds):
+            self.input_queue[_env_idx].put(("step", local_td))
+        self._current_step = self._current_step + len(env_idx)
+    def async_step_recv(self, min_get: int = 1) -> TensorDictBase:
+        # gets step results from the queue
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_step:
+            raise RuntimeError(
+                f"Cannot await {min_get} step when only {self._current_step} are being stepped."
+            )
+        r = self._wait_for_one_and_get(self.step_queue, min_get)
+        self._current_step = self._current_step - len(r)
+        r, idx = self._sort_results(r)
+        self._busy.difference_update(idx)
+        return self._stack_func(r)
+    def _async_private_step_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        # puts tds in a queue and ask for env.step
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, False)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        local_tds = tensordict.unbind(0)
+        for _env_idx, local_td in _zip_strict(env_idx, local_tds):
+            self.input_queue[_env_idx].put(("_step", local_td))
+        self._current_step = self._current_step + len(env_idx)
+    _async_private_step_recv = async_step_recv
+    def async_step_and_maybe_reset_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        # puts tds in a queue and ask for env.step
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, False)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        local_tds = tensordict.unbind(0)
+        for _env_idx, local_td in _zip_strict(env_idx, local_tds):
+            self._current_step_reset = self._current_step_reset + 1
+            self.input_queue[_env_idx].put(("step_and_maybe_reset", local_td))
+    def async_step_and_maybe_reset_recv(self, min_get: int = 1) -> TensorDictBase:
+        # gets step results from the queue
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_step_reset:
+            raise RuntimeError(
+                f"Cannot await {min_get} step_and_maybe_reset when only {self._current_step_reset} are being stepped."
+            )
+        r = self._wait_for_one_and_get(self.step_reset_queue, min_get)
+        self._current_step_reset = self._current_step_reset - len(r)
+        r, r_ = zip(*r)
+        r, r_, idx = self._sort_results(r, r_)
+        self._busy.difference_update(idx)
+        return self._stack_func(r), self._stack_func(r_)
+    def async_reset_send(
+        self,
+        tensordict: TensorDictBase | None = None,
+        env_index: int | list[int] | None = None,
+    ) -> None:
+        # puts tds in a queue and ask for env.reset
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, True)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        local_tds = tensordict.unbind(0)
+        for _env_idx, local_td in _zip_strict(env_idx, local_tds):
+            self._current_reset = self._current_reset + 1
+            self.input_queue[_env_idx].put(("reset", local_td))
+    def async_reset_recv(self, min_get: int | None = None) -> TensorDictBase:
+        # gets reset results from the queue
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_reset:
+            raise RuntimeError(
+                f"Cannot await {min_get} reset when only {self._current_reset} are being reset."
+            )
+        r = self._wait_for_one_and_get(self.reset_queue, min_get)
+        self._current_reset = self._current_reset - len(r)
+        r, idx = self._sort_results(r)
+        self._busy.difference_update(idx)
+        return self._stack_func(r)
+    def _async_private_reset_send(
+        self,
+        tensordict: TensorDictBase | None = None,
+        env_index: int | list[int] | None = None,
+    ) -> None:
+        # puts tds in a queue and ask for env.reset
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, True)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        local_tds = tensordict.unbind(0)
+        for _env_idx, local_td in _zip_strict(env_idx, local_tds):
+            self._current_reset = self._current_reset + 1
+            self.input_queue[_env_idx].put(("_reset", local_td))
+    _async_private_reset_recv = async_reset_recv
+    def _wait_for_one_and_get(self, q, min_get):
+        items = [q.get()]
+        try:
+            while True:
+                item = q.get_nowait()
+                items.append(item)
+        except Empty:
+            pass
+        # Retrieve all other available items
+        while len(items) < min_get:
+            item = q.get()
+            items.append(item)
+        return items
+    def shutdown(self):
+        for env_id in range(self.num_envs):
+            self.input_queue[env_id].put(("shutdown", None))
+        for thread in self.threads:
+            thread.join()
+    @classmethod
+    def _env_exec(
+        cls,
+        i,
+        env_or_factory,
+        create_env_kwargs,
+        input_queue,
+        output_queue,
+        step_queue,
+        step_reset_queue,
+        reset_queue,
+    ):
+        if not isinstance(env_or_factory, EnvBase):
+            env = env_or_factory(**create_env_kwargs)
+        else:
+            env = env_or_factory
+        while True:
+            msg_data = input_queue.get()
+            msg, data = msg_data
+            if msg == "get_specs":
+                output_queue.put(env.specs)
+            elif msg == "batch_size":
+                output_queue.put(env.batch_size)
+            elif msg == "reset":
+                data = env.reset(data.copy())
+                data.set(cls._env_idx_key, NonTensorData(i))
+                reset_queue.put(data)
+            elif msg == "_reset":
+                data = env._reset(data.copy())
+                data.set(cls._env_idx_key, NonTensorData(i))
+                reset_queue.put(data)
+            elif msg == "step_and_maybe_reset":
+                data, data_ = env.step_and_maybe_reset(data.copy())
+                data.set(cls._env_idx_key, NonTensorData(i))
+                data_.set(cls._env_idx_key, NonTensorData(i))
+                step_reset_queue.put((data, data_))
+            elif msg == "step":
+                data = env.step(data.copy())
+                data.set(cls._env_idx_key, NonTensorData(i))
+                step_queue.put(data)
+            elif msg == "_step":
+                data = env._step(data.copy())
+                data.set(cls._env_idx_key, NonTensorData(i))
+                step_queue.put(data)
+            elif msg == "shutdown":
+                env.close()
+                break
+            else:
+                raise RuntimeError(f"Unknown msg {msg} for worker {i}")
+        return
+class ThreadingAsyncEnvPool(AsyncEnvPool):
+    """An implementation of `AsyncEnvPool` using threading for parallel execution of environments.
+    This class manages a pool of environments, each running in its own thread, and
+    provides methods for asynchronous stepping and resetting of environments using
+    a thread pool executor.
+    .. note:: This class and its subclasses should work when nested in with :class:`~torchrl.envs.TransformedEnv` and
+        batched environments, but users won't currently be able to use the async features of the base environment when
+        it's nested in these classes. One should prefer nested transformed envs within an `AsyncEnvPool` instead.
+        If this is not possible, please raise an issue.
+    Methods:
+        _setup(): Initializes the thread pool and environment instances.
+        async_step_send(tensordict): Sends a step command to the environments.
+        async_step_recv(min_get): Receives the results of the step command.
+        async_reset_send(tensordict): Sends a reset command to the environments.
+        async_reset_recv(min_get): Receives the results of the reset command.
+        shutdown(): Shuts down the thread pool.
+    """
+    def _setup(self) -> None:
+        self._pool = ThreadPoolExecutor(max_workers=self.num_envs)
+        self.envs = [
+            env_factory(**create_env_kwargs)
+            if not isinstance(env_factory, EnvBase)
+            else env_factory
+            for env_factory, create_env_kwargs in zip(
+                self.env_makers, self.create_env_kwargs
+            )
+        ]
+        self._reset_futures = []
+        self._private_reset_futures = []
+        self._step_futures = []
+        self._private_step_futures = []
+        self._step_and_maybe_reset_futures = []
+        self._current_step = 0
+        self._current_step_reset = 0
+        self._current_reset = 0
+        # get specs
+        specs = torch.stack([env.specs for env in self.envs])
+        return specs["output_spec"].clone(), specs["input_spec"].clone()
+    @property
+    def env_batch_sizes(self) -> list[torch.Size]:
+        return [env.batch_size for env in self.envs]
+    def _get_child_specs(self) -> list:
+        """Returns the specs from each child environment."""
+        return [env.specs for env in self.envs]
+    @classmethod
+    def _get_specs(cls, env: EnvBase):
+        return env.specs
+    @classmethod
+    def _step_func(cls, env_td: tuple[EnvBase, TensorDictBase, int]):
+        env, td, idx = env_td
+        return env.step(td).set(cls._env_idx_key, NonTensorData(idx))
+    @classmethod
+    def _private_step_func(cls, env_td: tuple[EnvBase, TensorDictBase, int]):
+        env, td, idx = env_td
+        return env._step(td).set(cls._env_idx_key, NonTensorData(idx))
+    @classmethod
+    def _reset_func(cls, env_td: tuple[EnvBase, TensorDictBase]):
+        env, td, idx = env_td
+        return env.reset(td).set(cls._env_idx_key, NonTensorData(idx))
+    @classmethod
+    def _private_reset_func(cls, env_td: tuple[EnvBase, TensorDictBase]):
+        env, td, idx = env_td
+        return env._reset(td).set(cls._env_idx_key, NonTensorData(idx))
+    @classmethod
+    def _step_and_maybe_reset_func(cls, env_td: tuple[EnvBase, TensorDictBase]):
+        env, td, idx = env_td
+        td, td_ = env.step_and_maybe_reset(td)
+        idx = NonTensorData(idx)
+        return td.set(cls._env_idx_key, idx), td_.set(cls._env_idx_key, idx)
+    def async_step_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, False)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        tds = tensordict.unbind(0)
+        envs = [self.envs[idx] for idx in env_idx]
+        futures = [
+            self._pool.submit(self._step_func, (env, td, idx))
+            for env, td, idx in zip(envs, tds, env_idx)
+        ]
+        self._step_futures.extend(futures)
+        self._current_step = self._current_step + len(futures)
+    def async_step_recv(self, min_get: int | None = None) -> TensorDictBase:
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_step:
+            raise RuntimeError(
+                f"Cannot await {min_get} step when only {self._current_step_reset} are being stepped."
+            )
+        results = []
+        futures = self._step_futures
+        completed_futures = []
+        for future in as_completed(futures):
+            results.append(future.result())
+            completed_futures.append(future)
+            self._current_step = self._current_step - 1
+            if len(results) >= min_get and sum([f.done() for f in futures]) == 0:
+                break
+        self._step_futures = [
+            f for f in self._step_futures if f not in completed_futures
+        ]
+        results, idx = self._sort_results(results)
+        self._busy.difference_update(idx)
+        return self._stack_func(results)
+    def _async_private_step_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, False)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        tds = tensordict.unbind(0)
+        envs = [self.envs[idx] for idx in env_idx]
+        futures = [
+            self._pool.submit(self._private_step_func, (env, td, idx))
+            for env, td, idx in zip(envs, tds, env_idx)
+        ]
+        self._private_step_futures.extend(futures)
+        self._current_step = self._current_step + len(futures)
+    def _async_private_step_recv(self, min_get: int | None = None) -> TensorDictBase:
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_step:
+            raise RuntimeError(
+                f"Cannot await {min_get} step when only {self._current_step_reset} are being stepped."
+            )
+        results = []
+        futures = self._private_step_futures
+        completed_futures = []
+        for future in as_completed(futures):
+            results.append(future.result())
+            completed_futures.append(future)
+            self._current_step = self._current_step - 1
+            if len(results) >= min_get and sum([f.done() for f in futures]) == 0:
+                break
+        self._private_step_futures = [
+            f for f in self._private_step_futures if f not in completed_futures
+        ]
+        results, idx = self._sort_results(results)
+        self._busy.difference_update(idx)
+        return self._stack_func(results)
+    def async_step_and_maybe_reset_send(
+        self, tensordict: TensorDictBase, env_index: int | list[int] | None = None
+    ) -> None:
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, False)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        tds = tensordict.unbind(0)
+        envs = [self.envs[idx] for idx in env_idx]
+        futures = [
+            self._pool.submit(self._step_and_maybe_reset_func, (env, td, idx))
+            for env, td, idx in zip(envs, tds, env_idx)
+        ]
+        self._step_and_maybe_reset_futures.extend(futures)
+        self._current_step_reset = self._current_step_reset + len(futures)
+    def async_step_and_maybe_reset_recv(
+        self, min_get: int | None = None
+    ) -> TensorDictBase:
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_step_reset:
+            raise RuntimeError(
+                f"Cannot await {min_get} step_and_maybe_reset when only {self._current_step_reset} are being stepped."
+            )
+        results = []
+        futures = self._step_and_maybe_reset_futures
+        completed_futures = []
+        for future in as_completed(futures):
+            results.append(future.result())
+            completed_futures.append(future)
+            self._current_step_reset = self._current_step_reset - 1
+            if len(results) >= min_get and sum([f.done() for f in futures]) == 0:
+                break
+        self._step_and_maybe_reset_futures = [
+            f for f in self._step_and_maybe_reset_futures if f not in completed_futures
+        ]
+        results, results_ = zip(*results)
+        results, results_, idx = self._sort_results(results, results_)
+        self._busy.difference_update(idx)
+        return self._stack_func(results), self._stack_func(results_)
+    def async_reset_send(
+        self,
+        tensordict: TensorDictBase | None = None,
+        env_index: int | list[int] | None = None,
+    ) -> None:
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, True)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        tds = tensordict.unbind(0)
+        envs = [self.envs[idx] for idx in env_idx]
+        futures = [
+            self._pool.submit(self._reset_func, (env, td, idx))
+            for env, td, idx in zip(envs, tds, env_idx)
+        ]
+        self._current_reset = self._current_reset + len(futures)
+        self._reset_futures.extend(futures)
+    def async_reset_recv(self, min_get: int | None = None) -> TensorDictBase:
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_reset:
+            raise RuntimeError(
+                f"Cannot await {min_get} reset when only {self._current_step_reset} are being reset."
+            )
+        results = []
+        futures = self._reset_futures
+        completed_futures = []
+        for future in as_completed(futures):
+            results.append(future.result())
+            completed_futures.append(future)
+            self._current_reset = self._current_reset - 1
+            if len(results) >= min_get and sum([f.done() for f in futures]) == 0:
+                break
+        self._reset_futures = [
+            f for f in self._reset_futures if f not in completed_futures
+        ]
+        results, idx = self._sort_results(results)
+        self._busy.difference_update(idx)
+        return self._stack_func(results)
+    def _async_private_reset_send(
+        self,
+        tensordict: TensorDictBase | None = None,
+        env_index: int | list[int] | None = None,
+    ) -> None:
+        tensordict, env_idx = self._maybe_make_tensordict(tensordict, env_index, True)
+        if self._busy.intersection(env_idx):
+            raise RuntimeError(
+                f"Some envs are still processing a step: envs that are busy: {self._busy}, queried: {env_idx}."
+            )
+        self._busy.update(env_idx)
+        tds = tensordict.unbind(0)
+        envs = [self.envs[idx] for idx in env_idx]
+        futures = [
+            self._pool.submit(self._private_reset_func, (env, td, idx))
+            for env, td, idx in zip(envs, tds, env_idx)
+        ]
+        self._current_reset = self._current_reset + len(futures)
+        self._private_reset_futures.extend(futures)
+    def _async_private_reset_recv(self, min_get: int | None = None) -> TensorDictBase:
+        if min_get is None:
+            min_get = self.min_get
+        if min_get > self._current_reset:
+            raise RuntimeError(
+                f"Cannot await {min_get} reset when only {self._current_step_reset} are being reset."
+            )
+        results = []
+        futures = self._private_reset_futures
+        completed_futures = []
+        for future in as_completed(futures):
+            results.append(future.result())
+            completed_futures.append(future)
+            self._current_reset = self._current_reset - 1
+            if len(results) >= min_get and sum([f.done() for f in futures]) == 0:
+                break
+        self._private_reset_futures = [
+            f for f in self._private_reset_futures if f not in completed_futures
+        ]
+        results, idx = self._sort_results(results)
+        self._busy.difference_update(idx)
+        return self._stack_func(results)
+    def shutdown(self):
+        self._pool.shutdown()