PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/data/utils.py ADDED Viewed

@@ -0,0 +1,334 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import functools
+import typing
+from collections.abc import Callable
+from typing import Any, Union
+import cloudpickle
+import numpy as np
+import torch
+from torch import Tensor
+from torchrl.data.tensor_specs import (
+    Binary,
+    Categorical,
+    Composite,
+    MultiCategorical,
+    MultiOneHot,
+    OneHot,
+    Stacked,
+    StackedComposite,
+    TensorSpec,
+)
+numpy_to_torch_dtype_dict = {
+    np.dtype("bool"): torch.bool,
+    np.dtype("uint8"): torch.uint8,
+    np.dtype("int8"): torch.int8,
+    np.dtype("int16"): torch.int16,
+    np.dtype("int32"): torch.int32,
+    np.dtype("int64"): torch.int64,
+    np.dtype("float16"): torch.float16,
+    np.dtype("float32"): torch.float32,
+    np.dtype("float64"): torch.float64,
+    np.dtype("complex64"): torch.complex64,
+    np.dtype("complex128"): torch.complex128,
+}
+torch_to_numpy_dtype_dict = {
+    value: key for key, value in numpy_to_torch_dtype_dict.items()
+}
+DEVICE_TYPING = Union[torch.device, str, int]
+if hasattr(typing, "get_args"):
+    DEVICE_TYPING_ARGS = typing.get_args(DEVICE_TYPING)
+else:
+    DEVICE_TYPING_ARGS = (torch.device, str, int)
+INDEX_TYPING = Union[None, int, slice, str, Tensor, list[Any], tuple[Any, ...]]
+ACTION_SPACE_MAP = {
+    OneHot: "one_hot",
+    MultiOneHot: "mult_one_hot",
+    Binary: "binary",
+    Categorical: "categorical",
+    "one_hot": "one_hot",
+    "one-hot": "one_hot",
+    "mult_one_hot": "mult_one_hot",
+    "mult-one-hot": "mult_one_hot",
+    "multi_one_hot": "mult_one_hot",
+    "multi-one-hot": "mult_one_hot",
+    "binary": "binary",
+    "categorical": "categorical",
+    MultiCategorical: "multi_categorical",
+    "multi_categorical": "multi_categorical",
+    "multi-categorical": "multi_categorical",
+    "multi_discrete": "multi_categorical",
+    "multi-discrete": "multi_categorical",
+}
+def consolidate_spec(
+    spec: Composite,
+    recurse_through_entries: bool = True,
+    recurse_through_stack: bool = True,
+):
+    """Given a TensorSpec, removes exclusive keys by adding 0 shaped specs.
+    Args:
+        spec (Composite): the spec to be consolidated.
+        recurse_through_entries (bool): if True, call the function recursively on all entries of the spec.
+            Default is True.
+        recurse_through_stack (bool): if True, if the provided spec is lazy, the function recursively
+            on all specs in its list. Default is True.
+    """
+    spec = spec.clone()
+    if not isinstance(spec, (Composite, StackedComposite)):
+        return spec
+    if isinstance(spec, StackedComposite):
+        keys = set(spec.keys())  # shared keys
+        exclusive_keys_per_spec = [
+            set() for _ in range(len(spec._specs))
+        ]  # list of exclusive keys per td
+        exclusive_keys_examples = (
+            {}
+        )  # map of all exclusive keys to a list of their values
+        for spec_index in range(len(spec._specs)):  # gather all exclusive keys
+            sub_spec = spec._specs[spec_index]
+            if recurse_through_stack:
+                sub_spec = consolidate_spec(
+                    sub_spec, recurse_through_entries, recurse_through_stack
+                )
+                spec._specs[spec_index] = sub_spec
+            for sub_spec_key in sub_spec.keys():
+                if sub_spec_key not in keys:  # exclusive key
+                    exclusive_keys_per_spec[spec_index].add(sub_spec_key)
+                    value = sub_spec[sub_spec_key]
+                    if sub_spec_key in exclusive_keys_examples:
+                        exclusive_keys_examples[sub_spec_key].append(value)
+                    else:
+                        exclusive_keys_examples.update({sub_spec_key: [value]})
+        for sub_spec, exclusive_keys in zip(
+            spec._specs, exclusive_keys_per_spec
+        ):  # add missing exclusive entries
+            for exclusive_key in set(exclusive_keys_examples.keys()).difference(
+                exclusive_keys
+            ):
+                exclusive_keys_example_list = exclusive_keys_examples[exclusive_key]
+                sub_spec.set(
+                    exclusive_key,
+                    _empty_like_spec(exclusive_keys_example_list, sub_spec.shape),
+                )
+    if recurse_through_entries:
+        for key, value in spec.items():
+            if isinstance(value, (Composite, StackedComposite)):
+                spec.set(
+                    key,
+                    consolidate_spec(
+                        value, recurse_through_entries, recurse_through_stack
+                    ),
+                )
+    return spec
+def _empty_like_spec(specs: list[TensorSpec], shape):
+    for spec in specs[1:]:
+        if spec.__class__ != specs[0].__class__:
+            raise ValueError(
+                "Found same key in lazy specs corresponding to entries with different classes"
+            )
+    spec = specs[0]
+    if isinstance(spec, (Composite, StackedComposite)):
+        # the exclusive key has values which are Composite specs ->
+        # we create an empty composite spec with same batch size
+        return spec.empty()
+    elif isinstance(spec, Stacked):
+        # the exclusive key has values which are Stacked specs ->
+        # we create a Stacked spec with the same shape (aka same -1s) as the first in the list.
+        # this will not add any new -1s when they are stacked
+        shape = list(shape[: spec.stack_dim]) + list(shape[spec.stack_dim + 1 :])
+        return Stacked(
+            *[_empty_like_spec(spec._specs, shape) for _ in spec._specs],
+            dim=spec.stack_dim,
+        )
+    else:
+        # the exclusive key has values which are TensorSpecs ->
+        # if the shapes of the values are all the same, we create a TensorSpec with leading shape `shape` and following dims 0 (having the same ndims as the values)
+        # if the shapes of the values differ,  we create a TensorSpec with 0 size in the differing dims
+        spec_shape = list(spec.shape)
+        for dim_index in range(len(spec_shape)):
+            hetero_dim = False
+            for sub_spec in specs:
+                if sub_spec.shape[dim_index] != spec.shape[dim_index]:
+                    hetero_dim = True
+                    break
+            if hetero_dim:
+                spec_shape[dim_index] = 0
+        if 0 not in spec_shape:  # the values have all same shape
+            spec_shape = [
+                dim if i < len(shape) else 0 for i, dim in enumerate(spec_shape)
+            ]
+        spec = spec[(0,) * len(spec.shape)]
+        spec = spec.expand(spec_shape)
+        return spec
+def check_no_exclusive_keys(spec: TensorSpec, recurse: bool = True):
+    """Given a TensorSpec, returns true if there are no exclusive keys.
+    Args:
+        spec (TensorSpec): the spec to check
+        recurse (bool): if True, check recursively in nested specs. Default is True.
+    """
+    if isinstance(spec, StackedComposite):
+        keys = set(spec.keys())
+        for inner_td in spec._specs:
+            if recurse and not check_no_exclusive_keys(inner_td):
+                return False
+            if set(inner_td.keys()) != keys:
+                return False
+    elif isinstance(spec, Composite) and recurse:
+        for value in spec.values():
+            if not check_no_exclusive_keys(value):
+                return False
+    else:
+        return True
+    return True
+def contains_lazy_spec(spec: TensorSpec) -> bool:
+    """Returns true if a spec contains lazy stacked specs.
+    Args:
+        spec (TensorSpec): the spec to check
+    """
+    if isinstance(spec, (Stacked, StackedComposite)):
+        return True
+    elif isinstance(spec, Composite):
+        for inner_spec in spec.values():
+            if contains_lazy_spec(inner_spec):
+                return True
+    return False
+class _CloudpickleWrapperMeta(type):
+    def __call__(cls, obj):
+        if isinstance(obj, cls):
+            return obj
+        else:
+            return super().__call__(obj)
+class CloudpickleWrapper(metaclass=_CloudpickleWrapperMeta):
+    """A wrapper for functions that allow for serialization in multiprocessed settings."""
+    def __init__(self, fn: Callable, **kwargs):
+        if fn.__class__.__name__ == "EnvCreator":
+            raise RuntimeError(
+                "CloudpickleWrapper usage with EnvCreator class is "
+                "prohibited as it breaks the transmission of shared tensors."
+            )
+        self.fn = fn
+        self.kwargs = kwargs
+        functools.update_wrapper(self, getattr(fn, "forward", fn))
+    def __getstate__(self):
+        return cloudpickle.dumps((self.fn, self.kwargs))
+    def __setstate__(self, ob: bytes):
+        self.fn, self.kwargs = cloudpickle.loads(ob)
+        functools.update_wrapper(self, getattr(self.fn, "forward", self.fn))
+    def __call__(self, *args, **kwargs) -> Any:
+        kwargs.update(self.kwargs)
+        return self.fn(*args, **kwargs)
+def _process_action_space_spec(action_space, spec):
+    original_spec = spec
+    composite_spec = False
+    if isinstance(spec, Composite):
+        # this will break whenever our action is more complex than a single tensor
+        try:
+            if "action" in spec.keys():
+                _key = "action"
+            else:
+                # the first key is the action
+                for _key in spec.keys(True, True):
+                    if isinstance(_key, tuple) and _key[-1] == "action":
+                        break
+                else:
+                    raise KeyError
+            spec = spec[_key]
+            composite_spec = True
+        except KeyError:
+            raise KeyError(
+                "action could not be found in the spec. Make sure "
+                "you pass a spec that is either a native action spec or a composite action spec "
+                "with a leaf 'action' entry. Otherwise, simply remove the spec and use the action_space only."
+            )
+    if action_space is not None:
+        if isinstance(action_space, Composite):
+            raise ValueError("action_space cannot be of type Composite.")
+        if (
+            spec is not None
+            and isinstance(action_space, TensorSpec)
+            and action_space is not spec
+        ):
+            raise ValueError(
+                "Passing an action_space as a TensorSpec and a spec isn't allowed, unless they match."
+            )
+        if isinstance(action_space, TensorSpec):
+            spec = action_space
+        action_space = _find_action_space(action_space)
+        # check that the spec and action_space match
+        if spec is not None and _find_action_space(spec) != action_space:
+            raise ValueError(
+                f"The action spec and the action space do not match: got action_space={action_space} and spec={spec}."
+            )
+    elif spec is not None:
+        action_space = _find_action_space(spec)
+    else:
+        raise ValueError(
+            "Neither action_space nor spec was defined. The action space cannot be inferred."
+        )
+    if composite_spec:
+        spec = original_spec
+    return action_space, spec
+def _find_action_space(action_space) -> str:
+    if isinstance(action_space, TensorSpec):
+        if isinstance(action_space, Composite):
+            if "action" in action_space.keys():
+                _key = "action"
+            else:
+                # the first key is the action
+                for _key in action_space.keys(True, True):
+                    if isinstance(_key, tuple) and _key[-1] == "action":
+                        break
+                else:
+                    raise KeyError
+            action_space = action_space[_key]
+        action_space = type(action_space)
+    try:
+        action_space = ACTION_SPACE_MAP[action_space]
+    except KeyError:
+        raise ValueError(
+            f"action_space was not specified/not compatible and could not be retrieved from the value network. Got action_space={action_space}."
+        )
+    return action_space

torchrl/envs/__init__.py ADDED Viewed

@@ -0,0 +1,265 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .async_envs import AsyncEnvPool, ProcessorAsyncEnvPool, ThreadingAsyncEnvPool
+from .batched_envs import ParallelEnv, SerialEnv
+from .common import EnvBase, EnvMetaData, make_tensordict
+from .custom import ChessEnv, LLMHashingEnv, PendulumEnv, TicTacToeEnv
+from .env_creator import env_creator, EnvCreator, get_env_metadata
+from .gym_like import default_info_dict_reader, GymLikeEnv
+from .libs import (
+    BraxEnv,
+    BraxWrapper,
+    DMControlEnv,
+    DMControlWrapper,
+    gym_backend,
+    GymEnv,
+    GymWrapper,
+    HabitatEnv,
+    IsaacGymEnv,
+    IsaacGymWrapper,
+    IsaacLabWrapper,
+    JumanjiEnv,
+    JumanjiWrapper,
+    MeltingpotEnv,
+    MeltingpotWrapper,
+    MOGymEnv,
+    MOGymWrapper,
+    MultiThreadedEnv,
+    MultiThreadedEnvWrapper,
+    OpenMLEnv,
+    OpenSpielEnv,
+    OpenSpielWrapper,
+    PettingZooEnv,
+    PettingZooWrapper,
+    ProcgenEnv,
+    ProcgenWrapper,
+    register_gym_spec_conversion,
+    RoboHiveEnv,
+    set_gym_backend,
+    SMACv2Env,
+    SMACv2Wrapper,
+    UnityMLAgentsEnv,
+    UnityMLAgentsWrapper,
+    VmasEnv,
+    VmasWrapper,
+)
+from .model_based import DreamerDecoder, DreamerEnv, ModelBasedEnvBase
+from .transforms import (
+    ActionDiscretizer,
+    ActionMask,
+    AutoResetEnv,
+    AutoResetTransform,
+    BatchSizeTransform,
+    BinarizeReward,
+    BurnInTransform,
+    CatFrames,
+    CatTensors,
+    CenterCrop,
+    ClipTransform,
+    Compose,
+    ConditionalPolicySwitch,
+    ConditionalSkip,
+    Crop,
+    DeviceCastTransform,
+    DiscreteActionProjection,
+    DoubleToFloat,
+    DTypeCastTransform,
+    EndOfLifeTransform,
+    ExcludeTransform,
+    FiniteTensorDictCheck,
+    FlattenObservation,
+    FrameSkipTransform,
+    GrayScale,
+    gSDENoise,
+    Hash,
+    InitTracker,
+    LineariseRewards,
+    MultiAction,
+    MultiStepTransform,
+    NoopResetEnv,
+    ObservationNorm,
+    ObservationTransform,
+    PermuteTransform,
+    PinMemoryTransform,
+    R3MTransform,
+    RandomCropTensorDict,
+    RemoveEmptySpecs,
+    RenameTransform,
+    Resize,
+    Reward2GoTransform,
+    RewardClipping,
+    RewardScaling,
+    RewardSum,
+    SelectTransform,
+    SignTransform,
+    SqueezeTransform,
+    Stack,
+    StepCounter,
+    TargetReturn,
+    TensorDictPrimer,
+    TimeMaxPool,
+    Timer,
+    Tokenizer,
+    ToTensorImage,
+    TrajCounter,
+    Transform,
+    TransformedEnv,
+    UnaryTransform,
+    UnsqueezeTransform,
+    VC1Transform,
+    VecGymEnvTransform,
+    VecNorm,
+    VecNormV2,
+    VIPRewardTransform,
+    VIPTransform,
+)
+from .utils import (
+    check_env_specs,
+    check_marl_grouping,
+    exploration_type,
+    ExplorationType,
+    get_available_libraries,
+    make_composite_from_td,
+    MarlGroupMapType,
+    set_exploration_type,
+    step_mdp,
+    terminated_or_truncated,
+)
+__all__ = [
+    "ActionDiscretizer",
+    "ActionMask",
+    "VecNormV2",
+    "IsaacLabWrapper",
+    "AutoResetEnv",
+    "AutoResetTransform",
+    "AsyncEnvPool",
+    "ProcessorAsyncEnvPool",
+    "ConditionalPolicySwitch",
+    "ThreadingAsyncEnvPool",
+    "BatchSizeTransform",
+    "BinarizeReward",
+    "BraxEnv",
+    "BraxWrapper",
+    "BurnInTransform",
+    "CatFrames",
+    "CatTensors",
+    "CenterCrop",
+    "ChessEnv",
+    "ClipTransform",
+    "Compose",
+    "ConditionalSkip",
+    "Crop",
+    "DMControlEnv",
+    "DMControlWrapper",
+    "DTypeCastTransform",
+    "DeviceCastTransform",
+    "DiscreteActionProjection",
+    "DoubleToFloat",
+    "DreamerDecoder",
+    "DreamerEnv",
+    "EndOfLifeTransform",
+    "EnvBase",
+    "EnvCreator",
+    "EnvMetaData",
+    "ExcludeTransform",
+    "ExplorationType",
+    "FiniteTensorDictCheck",
+    "FlattenObservation",
+    "FrameSkipTransform",
+    "GrayScale",
+    "GymEnv",
+    "GymLikeEnv",
+    "GymWrapper",
+    "HabitatEnv",
+    "Hash",
+    "InitTracker",
+    "IsaacGymEnv",
+    "IsaacGymWrapper",
+    "JumanjiEnv",
+    "JumanjiWrapper",
+    "LLMHashingEnv",
+    "LineariseRewards",
+    "MOGymEnv",
+    "MOGymWrapper",
+    "MarlGroupMapType",
+    "MeltingpotEnv",
+    "MeltingpotWrapper",
+    "ModelBasedEnvBase",
+    "MultiAction",
+    "MultiStepTransform",
+    "MultiThreadedEnv",
+    "MultiThreadedEnvWrapper",
+    "NoopResetEnv",
+    "ObservationNorm",
+    "ObservationTransform",
+    "OpenMLEnv",
+    "OpenSpielEnv",
+    "OpenSpielWrapper",
+    "ParallelEnv",
+    "PendulumEnv",
+    "PermuteTransform",
+    "PettingZooEnv",
+    "PettingZooWrapper",
+    "PinMemoryTransform",
+    "ProcgenEnv",
+    "ProcgenWrapper",
+    "R3MTransform",
+    "RandomCropTensorDict",
+    "RemoveEmptySpecs",
+    "RenameTransform",
+    "Resize",
+    "Reward2GoTransform",
+    "RewardClipping",
+    "RewardScaling",
+    "RewardSum",
+    "RoboHiveEnv",
+    "SMACv2Env",
+    "SMACv2Wrapper",
+    "SelectTransform",
+    "SerialEnv",
+    "SignTransform",
+    "SqueezeTransform",
+    "Stack",
+    "StepCounter",
+    "TargetReturn",
+    "TensorDictPrimer",
+    "TicTacToeEnv",
+    "TimeMaxPool",
+    "Timer",
+    "ToTensorImage",
+    "Tokenizer",
+    "TrajCounter",
+    "Transform",
+    "TransformedEnv",
+    "UnaryTransform",
+    "UnityMLAgentsEnv",
+    "UnityMLAgentsWrapper",
+    "UnsqueezeTransform",
+    "VC1Transform",
+    "VIPRewardTransform",
+    "VIPTransform",
+    "VecGymEnvTransform",
+    "VecNorm",
+    "VmasEnv",
+    "VmasWrapper",
+    "check_env_specs",
+    "check_marl_grouping",
+    "default_info_dict_reader",
+    "env_creator",
+    "exploration_type",
+    "gSDENoise",
+    "get_available_libraries",
+    "get_env_metadata",
+    "gym_backend",
+    "make_composite_from_td",
+    "make_tensordict",
+    "register_gym_spec_conversion",
+    "set_exploration_type",
+    "set_gym_backend",
+    "step_mdp",
+    "terminated_or_truncated",
+]