PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314t-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/envs/custom/tictactoeenv.py ADDED Viewed

@@ -0,0 +1,288 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import torch
+from tensordict import TensorDict, TensorDictBase
+from torchrl.data.tensor_specs import Categorical, Composite, Unbounded
+from torchrl.envs.common import EnvBase
+class TicTacToeEnv(EnvBase):
+    """A Tic-Tac-Toe implementation.
+    Keyword Args:
+        single_player (bool, optional): whether one or two players have to be
+            accounted for. ``single_player=True`` means that ``"player1"`` is
+            playing randomly. If ``False`` (default), at each turn,
+            one of the two players has to play.
+        device (torch.device, optional): the device where to put the tensors.
+            Defaults to ``None`` (default device).
+    The environment is stateless. To run it across multiple batches, call
+        >>> env.reset(TensorDict(batch_size=desired_batch_size))
+    If the ``"mask"`` entry is present, ``rand_action`` takes it into account to
+    generate the next action. Any policy executed on this env should take this
+    mask into account, as well as the turn of the player (stored in the ``"turn"``
+    output entry).
+    Specs:
+        >>> print(env.specs)
+        Composite(
+            output_spec: Composite(
+                full_observation_spec: Composite(
+                    board: Categorical(
+                        shape=torch.Size([3, 3]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.int32,
+                        domain=discrete),
+                    turn: Categorical(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.int32,
+                        domain=discrete),
+                    mask: Categorical(
+                        shape=torch.Size([9]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete),
+                    shape=torch.Size([])),
+                full_reward_spec: Composite(
+                    player0: Composite(
+                        reward: UnboundedContinuous(
+                            shape=torch.Size([1]),
+                            space=ContinuousBox(
+                                low=Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, contiguous=True),
+                                high=Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, contiguous=True)),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        shape=torch.Size([])),
+                    player1: Composite(
+                        reward: UnboundedContinuous(
+                            shape=torch.Size([1]),
+                            space=ContinuousBox(
+                                low=Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, contiguous=True),
+                                high=Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, contiguous=True)),
+                            dtype=torch.float32,
+                            domain=continuous),
+                        shape=torch.Size([])),
+                    shape=torch.Size([])),
+                full_done_spec: Composite(
+                    done: Categorical(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete),
+                    terminated: Categorical(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete),
+                    truncated: Categorical(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete),
+                    shape=torch.Size([])),
+                shape=torch.Size([])),
+            input_spec: Composite(
+                full_state_spec: Composite(
+                    board: Categorical(
+                        shape=torch.Size([3, 3]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.int32,
+                        domain=discrete),
+                    turn: Categorical(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.int32,
+                        domain=discrete),
+                    mask: Categorical(
+                        shape=torch.Size([9]),
+                        space=DiscreteBox(n=2),
+                        dtype=torch.bool,
+                        domain=discrete), shape=torch.Size([])),
+                full_action_spec: Composite(
+                    action: Categorical(
+                        shape=torch.Size([1]),
+                        space=DiscreteBox(n=9),
+                        dtype=torch.int64,
+                        domain=discrete),
+                    shape=torch.Size([])),
+                shape=torch.Size([])),
+            shape=torch.Size([]))
+    To run a dummy rollout, execute the following command:
+    Examples:
+        >>> env = TicTacToeEnv()
+        >>> env.rollout(10)
+        TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.int64, is_shared=False),
+                board: Tensor(shape=torch.Size([9, 3, 3]), device=cpu, dtype=torch.int32, is_shared=False),
+                done: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                mask: Tensor(shape=torch.Size([9, 9]), device=cpu, dtype=torch.bool, is_shared=False),
+                next: TensorDict(
+                    fields={
+                        board: Tensor(shape=torch.Size([9, 3, 3]), device=cpu, dtype=torch.int32, is_shared=False),
+                        done: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        mask: Tensor(shape=torch.Size([9, 9]), device=cpu, dtype=torch.bool, is_shared=False),
+                        player0: TensorDict(
+                            fields={
+                                reward: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                            batch_size=torch.Size([9]),
+                            device=None,
+                            is_shared=False),
+                        player1: TensorDict(
+                            fields={
+                                reward: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                            batch_size=torch.Size([9]),
+                            device=None,
+                            is_shared=False),
+                        terminated: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        turn: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.int32, is_shared=False)},
+                    batch_size=torch.Size([9]),
+                    device=None,
+                    is_shared=False),
+                terminated: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                turn: Tensor(shape=torch.Size([9, 1]), device=cpu, dtype=torch.int32, is_shared=False)},
+            batch_size=torch.Size([9]),
+            device=None,
+            is_shared=False)
+    """
+    # batch_locked is set to False since various batch sizes can be provided to the env
+    batch_locked: bool = False
+    def __init__(self, *, single_player: bool = False, device=None):
+        super().__init__(device=device)
+        self.single_player = single_player
+        self.action_spec: Unbounded = Categorical(
+            n=9,
+            shape=(),
+            device=device,
+        )
+        self.full_observation_spec: Composite = Composite(
+            board=Unbounded(shape=(3, 3), dtype=torch.int, device=device),
+            turn=Categorical(
+                2,
+                shape=(1,),
+                dtype=torch.int,
+                device=device,
+            ),
+            mask=Categorical(
+                2,
+                shape=(9,),
+                dtype=torch.bool,
+                device=device,
+            ),
+            device=device,
+        )
+        self.state_spec: Composite = self.observation_spec.clone()
+        self.reward_spec: Unbounded = Composite(
+            {
+                ("player0", "reward"): Unbounded(shape=(1,), device=device),
+                ("player1", "reward"): Unbounded(shape=(1,), device=device),
+            },
+            device=device,
+        )
+        self.full_done_spec: Categorical = Composite(
+            done=Categorical(2, shape=(1,), dtype=torch.bool, device=device),
+            device=device,
+        )
+        self.full_done_spec["terminated"] = self.full_done_spec["done"].clone()
+        self.full_done_spec["truncated"] = self.full_done_spec["done"].clone()
+    def _reset(self, reset_td: TensorDict) -> TensorDict:
+        shape = reset_td.shape if reset_td is not None else ()
+        state = self.state_spec.zero(shape)
+        state["board"] -= 1
+        state["mask"].fill_(True)
+        return state.update(self.full_done_spec.zero(shape))
+    def _step(self, state: TensorDict) -> TensorDict:
+        board = state["board"].clone()
+        turn = state["turn"].clone()
+        action = state["action"]
+        board.flatten(-2, -1).scatter_(index=action.unsqueeze(-1), dim=-1, value=1)
+        wins = self.win(board, action)
+        mask = board.flatten(-2, -1) == -1
+        done = wins | ~mask.any(-1, keepdim=True)
+        terminated = done.clone()
+        reward_0 = wins & (turn == 0)
+        reward_1 = wins & (turn == 1)
+        state = TensorDict(
+            {
+                "done": done,
+                "terminated": terminated,
+                ("player0", "reward"): reward_0.float(),
+                ("player1", "reward"): reward_1.float(),
+                "board": torch.where(board == -1, board, 1 - board),
+                "turn": 1 - turn,
+                "mask": mask,
+            },
+            batch_size=state.batch_size,
+        )
+        if self.single_player:
+            select = (~done & (turn == 0)).squeeze(-1)
+            if select.all():
+                state_select = state
+            elif select.any():
+                state_select = state[select]
+            else:
+                return state
+            state_select = self._step(self.rand_action(state_select))
+            if select.all():
+                return state_select
+            return torch.where(done, state, state_select)
+        return state
+    def _set_seed(self, seed: int | None) -> None:
+        ...
+    @staticmethod
+    def win(board: torch.Tensor, action: torch.Tensor):
+        row = action // 3  # type: ignore
+        col = action % 3  # type: ignore
+        if board[..., row, :].sum() == 3:
+            return True
+        if board[..., col].sum() == 3:
+            return True
+        if board.diagonal(0, -2, -1).sum() == 3:
+            return True
+        if board.flip(-1).diagonal(0, -2, -1).sum() == 3:
+            return True
+        return False
+    @staticmethod
+    def full(board: torch.Tensor) -> bool:
+        return torch.sym_int(board.abs().sum()) == 9
+    @staticmethod
+    def get_action_mask():
+        pass
+    def rand_action(self, tensordict: TensorDictBase | None = None):
+        mask = tensordict.get("mask")
+        action_spec = self.action_spec
+        if tensordict.ndim:
+            action_spec = action_spec.expand(tensordict.shape)
+        else:
+            action_spec = action_spec.clone()
+        action_spec.update_mask(mask)
+        tensordict.set(self.action_key, action_spec.rand())
+        return tensordict

torchrl/envs/env_creator.py ADDED Viewed

@@ -0,0 +1,263 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from collections import OrderedDict
+from collections.abc import Callable
+from multiprocessing.sharedctypes import Synchronized
+from multiprocessing.synchronize import Lock, RLock
+import torch
+from tensordict import TensorDictBase
+from torchrl._utils import logger as torchrl_logger
+from torchrl.data.utils import CloudpickleWrapper
+from torchrl.envs.common import EnvBase, EnvMetaData
+class EnvCreator:
+    """Environment creator class.
+    EnvCreator is a generic environment creator class that can substitute
+    lambda functions when creating environments in multiprocessing contexts.
+    If the environment created on a subprocess must share information with the
+    main process (e.g. for the VecNorm transform), EnvCreator will pass the
+    pointers to the tensordicts in shared memory to each process such that
+    all of them are synchronised.
+    Args:
+        create_env_fn (callable): a callable that returns an EnvBase
+            instance.
+        create_env_kwargs (dict, optional): the kwargs of the env creator.
+        share_memory (bool, optional): if False, the resulting tensordict
+            from the environment won't be placed in shared memory.
+        **kwargs: additional keyword arguments to be passed to the environment
+            during construction.
+    Examples:
+        >>> # We create the same environment on 2 processes using VecNorm
+        >>> # and check that the discounted count of observations matches on
+        >>> # both workers, even if one has not executed any step
+        >>> import time
+        >>> from torchrl.envs.libs.gym import GymEnv
+        >>> from torchrl.envs.transforms import VecNorm, TransformedEnv
+        >>> from torchrl.envs import EnvCreator
+        >>> from torch import multiprocessing as mp
+        >>> env_fn = lambda: TransformedEnv(GymEnv("Pendulum-v1"), VecNorm())
+        >>> env_creator = EnvCreator(env_fn)
+        >>>
+        >>> def test_env1(env_creator):
+        ...     env = env_creator()
+        ...     tensordict = env.reset()
+        ...     for _ in range(10):
+        ...         env.rand_step(tensordict)
+        ...         if tensordict.get(("next", "done")):
+        ...             tensordict = env.reset(tensordict)
+        ...     print("env 1: ", env.transform._td.get(("next", "observation_count")))
+        >>>
+        >>> def test_env2(env_creator):
+        ...     env = env_creator()
+        ...     time.sleep(5)
+        ...     print("env 2: ", env.transform._td.get(("next", "observation_count")))
+        >>>
+        >>> if __name__ == "__main__":
+        ...     ps = []
+        ...     p1 = mp.Process(target=test_env1, args=(env_creator,))
+        ...     p1.start()
+        ...     ps.append(p1)
+        ...     p2 = mp.Process(target=test_env2, args=(env_creator,))
+        ...     p2.start()
+        ...     ps.append(p1)
+        ...     for p in ps:
+        ...         p.join()
+        env 1:  tensor([11.9934])
+        env 2:  tensor([11.9934])
+    """
+    def __init__(
+        self,
+        create_env_fn: Callable[..., EnvBase],
+        create_env_kwargs: dict | None = None,
+        share_memory: bool = True,
+        **kwargs,
+    ) -> None:
+        if not isinstance(create_env_fn, (EnvCreator, CloudpickleWrapper)):
+            self.create_env_fn = CloudpickleWrapper(create_env_fn)
+        else:
+            self.create_env_fn = create_env_fn
+        self.create_env_kwargs = kwargs
+        if isinstance(create_env_kwargs, dict):
+            self.create_env_kwargs.update(create_env_kwargs)
+        self.initialized = False
+        self._meta_data = None
+        self._share_memory = share_memory
+        self.init_()
+    def make_variant(self, **kwargs) -> EnvCreator:
+        """Creates a variant of the EnvCreator, pointing to the same underlying metadata but with different keyword arguments during construction.
+        This can be useful with transforms that share a state, like :class:`~torchrl.envs.TrajCounter`.
+        Examples:
+            >>> from torchrl.envs import GymEnv
+            >>> env_creator_pendulum = EnvCreator(GymEnv, env_name="Pendulum-v1")
+            >>> env_creator_cartpole = env_creator_pendulum.make_variant(env_name="CartPole-v1")
+        """
+        # Copy self
+        out = type(self).__new__(type(self))
+        out.__dict__.update(self.__dict__)
+        out.create_env_kwargs.update(kwargs)
+        return out
+    def share_memory(self, state_dict: OrderedDict) -> None:
+        for key, item in list(state_dict.items()):
+            if isinstance(item, (TensorDictBase,)):
+                if not item.is_shared():
+                    item.share_memory_()
+                else:
+                    torchrl_logger.info(
+                        f"{self.env_type}: {item} is already shared"
+                    )  # , deleting key'val)
+                    del state_dict[key]
+            elif isinstance(item, OrderedDict):
+                self.share_memory(item)
+            elif isinstance(item, torch.Tensor):
+                del state_dict[key]
+    @property
+    def meta_data(self) -> EnvMetaData:
+        if self._meta_data is None:
+            raise RuntimeError(
+                "meta_data is None in EnvCreator. " "Make sure init_() has been called."
+            )
+        return self._meta_data
+    @meta_data.setter
+    def meta_data(self, value: EnvMetaData):
+        self._meta_data = value
+    @staticmethod
+    def _is_mp_value(val):
+        if isinstance(val, (Synchronized,)) and hasattr(val, "_obj"):
+            return True
+        # Also check for lock types which need to be shared across processes
+        if isinstance(val, (Lock, RLock)):
+            return True
+        return False
+    @classmethod
+    def _find_mp_values(cls, env_or_transform, values, prefix=()):
+        from torchrl.envs.transforms.transforms import Compose, TransformedEnv
+        if isinstance(env_or_transform, EnvBase) and isinstance(
+            env_or_transform, TransformedEnv
+        ):
+            cls._find_mp_values(
+                env_or_transform.transform,
+                values=values,
+                prefix=prefix + ("transform",),
+            )
+            cls._find_mp_values(
+                env_or_transform.base_env, values=values, prefix=prefix + ("base_env",)
+            )
+        elif isinstance(env_or_transform, Compose):
+            for i, t in enumerate(env_or_transform.transforms):
+                cls._find_mp_values(t, values=values, prefix=prefix + (i,))
+        for k, v in env_or_transform.__dict__.items():
+            if cls._is_mp_value(v):
+                values.append((prefix + (k,), v))
+        return values
+    def init_(self) -> EnvCreator:
+        shadow_env = self.create_env_fn(**self.create_env_kwargs)
+        tensordict = shadow_env.reset()
+        shadow_env.rand_step(tensordict)
+        self.env_type = type(shadow_env)
+        self._transform_state_dict = shadow_env.state_dict()
+        # Extract any mp.Value object from the env
+        self._mp_values = self._find_mp_values(shadow_env, values=[])
+        if self._share_memory:
+            self.share_memory(self._transform_state_dict)
+        self.initialized = True
+        self.meta_data = EnvMetaData.metadata_from_env(shadow_env)
+        shadow_env.close()
+        del shadow_env
+        return self
+    @classmethod
+    def _set_mp_value(cls, env, key, value):
+        if len(key) > 1:
+            if isinstance(key[0], int):
+                return cls._set_mp_value(env[key[0]], key[1:], value)
+            else:
+                return cls._set_mp_value(getattr(env, key[0]), key[1:], value)
+        else:
+            setattr(env, key[0], value)
+    def __call__(self, **kwargs) -> EnvBase:
+        if not self.initialized:
+            raise RuntimeError("EnvCreator must be initialized before being called.")
+        kwargs.update(self.create_env_kwargs)  # create_env_kwargs precedes
+        env = self.create_env_fn(**kwargs)
+        if self._mp_values:
+            for k, v in self._mp_values:
+                self._set_mp_value(env, k, v)
+        env.load_state_dict(self._transform_state_dict, strict=False)
+        return env
+    def state_dict(self) -> OrderedDict:
+        if self._transform_state_dict is None:
+            return OrderedDict()
+        return self._transform_state_dict
+    def load_state_dict(self, state_dict: OrderedDict) -> None:
+        if self._transform_state_dict is not None:
+            for key, item in state_dict.items():
+                item_to_update = self._transform_state_dict[key]
+                item_to_update.copy_(item)
+    def __repr__(self) -> str:
+        substr = ", ".join(
+            [f"{key}: {type(item)}" for key, item in self.create_env_kwargs]
+        )
+        return f"EnvCreator({self.create_env_fn}({substr}))"
+def env_creator(fun: Callable) -> EnvCreator:
+    """Helper function to call `EnvCreator`."""
+    return EnvCreator(fun)
+def get_env_metadata(env_or_creator: EnvBase | Callable, kwargs: dict | None = None):
+    """Retrieves a EnvMetaData object from an env."""
+    if isinstance(env_or_creator, (EnvBase,)):
+        return EnvMetaData.metadata_from_env(env_or_creator)
+    elif not isinstance(env_or_creator, EnvBase) and not isinstance(
+        env_or_creator, EnvCreator
+    ):
+        # then env is a creator
+        if kwargs is None:
+            kwargs = {}
+        env = env_or_creator(**kwargs)
+        return EnvMetaData.metadata_from_env(env)
+    elif isinstance(env_or_creator, EnvCreator):
+        if not (
+            kwargs == env_or_creator.create_env_kwargs
+            or kwargs is None
+            or len(kwargs) == 0
+        ):
+            raise RuntimeError(
+                "kwargs mismatch between EnvCreator and the kwargs provided to get_env_metadata:"
+                f"got EnvCreator.create_env_kwargs={env_or_creator.create_env_kwargs} and "
+                f"kwargs = {kwargs}"
+            )
+        return env_or_creator.meta_data.clone()
+    else:
+        raise NotImplementedError(
+            f"env of type {type(env_or_creator)} is not supported by get_env_metadata."
+        )