torchrl 0.11.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/benchmark_batched_envs.py +104 -0
- benchmarks/conftest.py +91 -0
- benchmarks/ecosystem/gym_env_throughput.py +321 -0
- benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
- benchmarks/requirements.txt +7 -0
- benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
- benchmarks/test_collectors_benchmark.py +240 -0
- benchmarks/test_compressed_storage_benchmark.py +145 -0
- benchmarks/test_envs_benchmark.py +133 -0
- benchmarks/test_llm.py +101 -0
- benchmarks/test_non_tensor_env_benchmark.py +70 -0
- benchmarks/test_objectives_benchmarks.py +1199 -0
- benchmarks/test_replaybuffer_benchmark.py +254 -0
- sota-check/README.md +35 -0
- sota-implementations/README.md +142 -0
- sota-implementations/a2c/README.md +39 -0
- sota-implementations/a2c/a2c_atari.py +291 -0
- sota-implementations/a2c/a2c_mujoco.py +273 -0
- sota-implementations/a2c/utils_atari.py +240 -0
- sota-implementations/a2c/utils_mujoco.py +160 -0
- sota-implementations/bandits/README.md +7 -0
- sota-implementations/bandits/dqn.py +126 -0
- sota-implementations/cql/cql_offline.py +198 -0
- sota-implementations/cql/cql_online.py +249 -0
- sota-implementations/cql/discrete_cql_offline.py +180 -0
- sota-implementations/cql/discrete_cql_online.py +227 -0
- sota-implementations/cql/utils.py +471 -0
- sota-implementations/crossq/crossq.py +271 -0
- sota-implementations/crossq/utils.py +320 -0
- sota-implementations/ddpg/ddpg.py +231 -0
- sota-implementations/ddpg/utils.py +325 -0
- sota-implementations/decision_transformer/dt.py +163 -0
- sota-implementations/decision_transformer/lamb.py +167 -0
- sota-implementations/decision_transformer/online_dt.py +178 -0
- sota-implementations/decision_transformer/utils.py +562 -0
- sota-implementations/discrete_sac/discrete_sac.py +243 -0
- sota-implementations/discrete_sac/utils.py +324 -0
- sota-implementations/dqn/README.md +30 -0
- sota-implementations/dqn/dqn_atari.py +272 -0
- sota-implementations/dqn/dqn_cartpole.py +236 -0
- sota-implementations/dqn/utils_atari.py +132 -0
- sota-implementations/dqn/utils_cartpole.py +90 -0
- sota-implementations/dreamer/README.md +129 -0
- sota-implementations/dreamer/dreamer.py +586 -0
- sota-implementations/dreamer/dreamer_utils.py +1107 -0
- sota-implementations/expert-iteration/README.md +352 -0
- sota-implementations/expert-iteration/ei_utils.py +770 -0
- sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
- sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
- sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
- sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
- sota-implementations/gail/gail.py +327 -0
- sota-implementations/gail/gail_utils.py +68 -0
- sota-implementations/gail/ppo_utils.py +157 -0
- sota-implementations/grpo/README.md +273 -0
- sota-implementations/grpo/grpo-async.py +437 -0
- sota-implementations/grpo/grpo-sync.py +435 -0
- sota-implementations/grpo/grpo_utils.py +843 -0
- sota-implementations/grpo/requirements_gsm8k.txt +11 -0
- sota-implementations/grpo/requirements_ifeval.txt +16 -0
- sota-implementations/impala/README.md +33 -0
- sota-implementations/impala/impala_multi_node_ray.py +292 -0
- sota-implementations/impala/impala_multi_node_submitit.py +284 -0
- sota-implementations/impala/impala_single_node.py +261 -0
- sota-implementations/impala/utils.py +184 -0
- sota-implementations/iql/discrete_iql.py +230 -0
- sota-implementations/iql/iql_offline.py +164 -0
- sota-implementations/iql/iql_online.py +225 -0
- sota-implementations/iql/utils.py +437 -0
- sota-implementations/multiagent/README.md +74 -0
- sota-implementations/multiagent/iql.py +237 -0
- sota-implementations/multiagent/maddpg_iddpg.py +266 -0
- sota-implementations/multiagent/mappo_ippo.py +267 -0
- sota-implementations/multiagent/qmix_vdn.py +271 -0
- sota-implementations/multiagent/sac.py +337 -0
- sota-implementations/multiagent/utils/__init__.py +4 -0
- sota-implementations/multiagent/utils/logging.py +151 -0
- sota-implementations/multiagent/utils/utils.py +43 -0
- sota-implementations/ppo/README.md +29 -0
- sota-implementations/ppo/ppo_atari.py +305 -0
- sota-implementations/ppo/ppo_mujoco.py +293 -0
- sota-implementations/ppo/utils_atari.py +238 -0
- sota-implementations/ppo/utils_mujoco.py +152 -0
- sota-implementations/ppo_trainer/train.py +21 -0
- sota-implementations/redq/README.md +7 -0
- sota-implementations/redq/redq.py +199 -0
- sota-implementations/redq/utils.py +1060 -0
- sota-implementations/sac/sac-async.py +266 -0
- sota-implementations/sac/sac.py +239 -0
- sota-implementations/sac/utils.py +381 -0
- sota-implementations/sac_trainer/train.py +16 -0
- sota-implementations/td3/td3.py +254 -0
- sota-implementations/td3/utils.py +319 -0
- sota-implementations/td3_bc/td3_bc.py +177 -0
- sota-implementations/td3_bc/utils.py +251 -0
- torchrl/__init__.py +144 -0
- torchrl/_extension.py +74 -0
- torchrl/_torchrl.cp314-win_amd64.pyd +0 -0
- torchrl/_utils.py +1431 -0
- torchrl/collectors/__init__.py +48 -0
- torchrl/collectors/_base.py +1058 -0
- torchrl/collectors/_constants.py +88 -0
- torchrl/collectors/_multi_async.py +324 -0
- torchrl/collectors/_multi_base.py +1805 -0
- torchrl/collectors/_multi_sync.py +464 -0
- torchrl/collectors/_runner.py +581 -0
- torchrl/collectors/_single.py +2009 -0
- torchrl/collectors/_single_async.py +259 -0
- torchrl/collectors/collectors.py +62 -0
- torchrl/collectors/distributed/__init__.py +32 -0
- torchrl/collectors/distributed/default_configs.py +133 -0
- torchrl/collectors/distributed/generic.py +1306 -0
- torchrl/collectors/distributed/ray.py +1092 -0
- torchrl/collectors/distributed/rpc.py +1006 -0
- torchrl/collectors/distributed/sync.py +731 -0
- torchrl/collectors/distributed/utils.py +160 -0
- torchrl/collectors/llm/__init__.py +10 -0
- torchrl/collectors/llm/base.py +494 -0
- torchrl/collectors/llm/ray_collector.py +275 -0
- torchrl/collectors/llm/utils.py +36 -0
- torchrl/collectors/llm/weight_update/__init__.py +10 -0
- torchrl/collectors/llm/weight_update/vllm.py +348 -0
- torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
- torchrl/collectors/utils.py +433 -0
- torchrl/collectors/weight_update.py +591 -0
- torchrl/csrc/numpy_utils.h +38 -0
- torchrl/csrc/pybind.cpp +27 -0
- torchrl/csrc/segment_tree.h +458 -0
- torchrl/csrc/torch_utils.h +34 -0
- torchrl/csrc/utils.cpp +48 -0
- torchrl/csrc/utils.h +31 -0
- torchrl/data/__init__.py +187 -0
- torchrl/data/datasets/__init__.py +58 -0
- torchrl/data/datasets/atari_dqn.py +878 -0
- torchrl/data/datasets/common.py +281 -0
- torchrl/data/datasets/d4rl.py +489 -0
- torchrl/data/datasets/d4rl_infos.py +187 -0
- torchrl/data/datasets/gen_dgrl.py +375 -0
- torchrl/data/datasets/minari_data.py +643 -0
- torchrl/data/datasets/openml.py +177 -0
- torchrl/data/datasets/openx.py +798 -0
- torchrl/data/datasets/roboset.py +363 -0
- torchrl/data/datasets/utils.py +11 -0
- torchrl/data/datasets/vd4rl.py +432 -0
- torchrl/data/llm/__init__.py +34 -0
- torchrl/data/llm/dataset.py +491 -0
- torchrl/data/llm/history.py +1378 -0
- torchrl/data/llm/prompt.py +198 -0
- torchrl/data/llm/reward.py +225 -0
- torchrl/data/llm/topk.py +186 -0
- torchrl/data/llm/utils.py +543 -0
- torchrl/data/map/__init__.py +21 -0
- torchrl/data/map/hash.py +185 -0
- torchrl/data/map/query.py +204 -0
- torchrl/data/map/tdstorage.py +363 -0
- torchrl/data/map/tree.py +1434 -0
- torchrl/data/map/utils.py +103 -0
- torchrl/data/postprocs/__init__.py +8 -0
- torchrl/data/postprocs/postprocs.py +391 -0
- torchrl/data/replay_buffers/__init__.py +99 -0
- torchrl/data/replay_buffers/checkpointers.py +622 -0
- torchrl/data/replay_buffers/ray_buffer.py +292 -0
- torchrl/data/replay_buffers/replay_buffers.py +2376 -0
- torchrl/data/replay_buffers/samplers.py +2578 -0
- torchrl/data/replay_buffers/scheduler.py +265 -0
- torchrl/data/replay_buffers/storages.py +2412 -0
- torchrl/data/replay_buffers/utils.py +1042 -0
- torchrl/data/replay_buffers/writers.py +781 -0
- torchrl/data/tensor_specs.py +7101 -0
- torchrl/data/utils.py +334 -0
- torchrl/envs/__init__.py +265 -0
- torchrl/envs/async_envs.py +1105 -0
- torchrl/envs/batched_envs.py +3093 -0
- torchrl/envs/common.py +4241 -0
- torchrl/envs/custom/__init__.py +11 -0
- torchrl/envs/custom/chess.py +617 -0
- torchrl/envs/custom/llm.py +214 -0
- torchrl/envs/custom/pendulum.py +401 -0
- torchrl/envs/custom/san_moves.txt +29274 -0
- torchrl/envs/custom/tictactoeenv.py +288 -0
- torchrl/envs/env_creator.py +263 -0
- torchrl/envs/gym_like.py +752 -0
- torchrl/envs/libs/__init__.py +68 -0
- torchrl/envs/libs/_gym_utils.py +326 -0
- torchrl/envs/libs/brax.py +846 -0
- torchrl/envs/libs/dm_control.py +544 -0
- torchrl/envs/libs/envpool.py +447 -0
- torchrl/envs/libs/gym.py +2239 -0
- torchrl/envs/libs/habitat.py +138 -0
- torchrl/envs/libs/isaac_lab.py +87 -0
- torchrl/envs/libs/isaacgym.py +203 -0
- torchrl/envs/libs/jax_utils.py +166 -0
- torchrl/envs/libs/jumanji.py +963 -0
- torchrl/envs/libs/meltingpot.py +599 -0
- torchrl/envs/libs/openml.py +153 -0
- torchrl/envs/libs/openspiel.py +652 -0
- torchrl/envs/libs/pettingzoo.py +1042 -0
- torchrl/envs/libs/procgen.py +351 -0
- torchrl/envs/libs/robohive.py +429 -0
- torchrl/envs/libs/smacv2.py +645 -0
- torchrl/envs/libs/unity_mlagents.py +891 -0
- torchrl/envs/libs/utils.py +147 -0
- torchrl/envs/libs/vmas.py +813 -0
- torchrl/envs/llm/__init__.py +63 -0
- torchrl/envs/llm/chat.py +730 -0
- torchrl/envs/llm/datasets/README.md +4 -0
- torchrl/envs/llm/datasets/__init__.py +17 -0
- torchrl/envs/llm/datasets/gsm8k.py +353 -0
- torchrl/envs/llm/datasets/ifeval.py +274 -0
- torchrl/envs/llm/envs.py +789 -0
- torchrl/envs/llm/libs/README.md +3 -0
- torchrl/envs/llm/libs/__init__.py +8 -0
- torchrl/envs/llm/libs/mlgym.py +869 -0
- torchrl/envs/llm/reward/__init__.py +10 -0
- torchrl/envs/llm/reward/gsm8k.py +324 -0
- torchrl/envs/llm/reward/ifeval/README.md +13 -0
- torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
- torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
- torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
- torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
- torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
- torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
- torchrl/envs/llm/transforms/__init__.py +55 -0
- torchrl/envs/llm/transforms/browser.py +292 -0
- torchrl/envs/llm/transforms/dataloading.py +859 -0
- torchrl/envs/llm/transforms/format.py +73 -0
- torchrl/envs/llm/transforms/kl.py +1544 -0
- torchrl/envs/llm/transforms/policy_version.py +189 -0
- torchrl/envs/llm/transforms/reason.py +323 -0
- torchrl/envs/llm/transforms/tokenizer.py +321 -0
- torchrl/envs/llm/transforms/tools.py +1955 -0
- torchrl/envs/model_based/__init__.py +9 -0
- torchrl/envs/model_based/common.py +180 -0
- torchrl/envs/model_based/dreamer.py +112 -0
- torchrl/envs/transforms/__init__.py +147 -0
- torchrl/envs/transforms/functional.py +48 -0
- torchrl/envs/transforms/gym_transforms.py +203 -0
- torchrl/envs/transforms/module.py +341 -0
- torchrl/envs/transforms/r3m.py +372 -0
- torchrl/envs/transforms/ray_service.py +663 -0
- torchrl/envs/transforms/rb_transforms.py +214 -0
- torchrl/envs/transforms/transforms.py +11835 -0
- torchrl/envs/transforms/utils.py +94 -0
- torchrl/envs/transforms/vc1.py +307 -0
- torchrl/envs/transforms/vecnorm.py +845 -0
- torchrl/envs/transforms/vip.py +407 -0
- torchrl/envs/utils.py +1718 -0
- torchrl/envs/vec_envs.py +11 -0
- torchrl/modules/__init__.py +206 -0
- torchrl/modules/distributions/__init__.py +73 -0
- torchrl/modules/distributions/continuous.py +830 -0
- torchrl/modules/distributions/discrete.py +908 -0
- torchrl/modules/distributions/truncated_normal.py +187 -0
- torchrl/modules/distributions/utils.py +233 -0
- torchrl/modules/llm/__init__.py +62 -0
- torchrl/modules/llm/backends/__init__.py +65 -0
- torchrl/modules/llm/backends/vllm/__init__.py +94 -0
- torchrl/modules/llm/backends/vllm/_models.py +46 -0
- torchrl/modules/llm/backends/vllm/base.py +72 -0
- torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
- torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
- torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
- torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
- torchrl/modules/llm/policies/__init__.py +28 -0
- torchrl/modules/llm/policies/common.py +1809 -0
- torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
- torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
- torchrl/modules/llm/utils.py +23 -0
- torchrl/modules/mcts/__init__.py +21 -0
- torchrl/modules/mcts/scores.py +579 -0
- torchrl/modules/models/__init__.py +86 -0
- torchrl/modules/models/batchrenorm.py +119 -0
- torchrl/modules/models/decision_transformer.py +179 -0
- torchrl/modules/models/exploration.py +731 -0
- torchrl/modules/models/llm.py +156 -0
- torchrl/modules/models/model_based.py +596 -0
- torchrl/modules/models/models.py +1712 -0
- torchrl/modules/models/multiagent.py +1067 -0
- torchrl/modules/models/recipes/impala.py +185 -0
- torchrl/modules/models/utils.py +162 -0
- torchrl/modules/planners/__init__.py +10 -0
- torchrl/modules/planners/cem.py +228 -0
- torchrl/modules/planners/common.py +73 -0
- torchrl/modules/planners/mppi.py +265 -0
- torchrl/modules/tensordict_module/__init__.py +89 -0
- torchrl/modules/tensordict_module/actors.py +2457 -0
- torchrl/modules/tensordict_module/common.py +529 -0
- torchrl/modules/tensordict_module/exploration.py +814 -0
- torchrl/modules/tensordict_module/probabilistic.py +321 -0
- torchrl/modules/tensordict_module/rnn.py +1639 -0
- torchrl/modules/tensordict_module/sequence.py +132 -0
- torchrl/modules/tensordict_module/world_models.py +34 -0
- torchrl/modules/utils/__init__.py +38 -0
- torchrl/modules/utils/mappings.py +9 -0
- torchrl/modules/utils/utils.py +89 -0
- torchrl/objectives/__init__.py +78 -0
- torchrl/objectives/a2c.py +659 -0
- torchrl/objectives/common.py +753 -0
- torchrl/objectives/cql.py +1346 -0
- torchrl/objectives/crossq.py +710 -0
- torchrl/objectives/ddpg.py +453 -0
- torchrl/objectives/decision_transformer.py +371 -0
- torchrl/objectives/deprecated.py +516 -0
- torchrl/objectives/dqn.py +683 -0
- torchrl/objectives/dreamer.py +488 -0
- torchrl/objectives/functional.py +48 -0
- torchrl/objectives/gail.py +258 -0
- torchrl/objectives/iql.py +996 -0
- torchrl/objectives/llm/__init__.py +30 -0
- torchrl/objectives/llm/grpo.py +846 -0
- torchrl/objectives/llm/sft.py +482 -0
- torchrl/objectives/multiagent/__init__.py +8 -0
- torchrl/objectives/multiagent/qmixer.py +396 -0
- torchrl/objectives/ppo.py +1669 -0
- torchrl/objectives/redq.py +683 -0
- torchrl/objectives/reinforce.py +530 -0
- torchrl/objectives/sac.py +1580 -0
- torchrl/objectives/td3.py +570 -0
- torchrl/objectives/td3_bc.py +625 -0
- torchrl/objectives/utils.py +782 -0
- torchrl/objectives/value/__init__.py +28 -0
- torchrl/objectives/value/advantages.py +1956 -0
- torchrl/objectives/value/functional.py +1459 -0
- torchrl/objectives/value/utils.py +360 -0
- torchrl/record/__init__.py +17 -0
- torchrl/record/loggers/__init__.py +23 -0
- torchrl/record/loggers/common.py +48 -0
- torchrl/record/loggers/csv.py +226 -0
- torchrl/record/loggers/mlflow.py +142 -0
- torchrl/record/loggers/tensorboard.py +139 -0
- torchrl/record/loggers/trackio.py +163 -0
- torchrl/record/loggers/utils.py +78 -0
- torchrl/record/loggers/wandb.py +214 -0
- torchrl/record/recorder.py +554 -0
- torchrl/services/__init__.py +79 -0
- torchrl/services/base.py +109 -0
- torchrl/services/ray_service.py +453 -0
- torchrl/testing/__init__.py +107 -0
- torchrl/testing/assertions.py +179 -0
- torchrl/testing/dist_utils.py +122 -0
- torchrl/testing/env_creators.py +227 -0
- torchrl/testing/env_helper.py +35 -0
- torchrl/testing/gym_helpers.py +156 -0
- torchrl/testing/llm_mocks.py +119 -0
- torchrl/testing/mocking_classes.py +2720 -0
- torchrl/testing/modules.py +295 -0
- torchrl/testing/mp_helpers.py +15 -0
- torchrl/testing/ray_helpers.py +293 -0
- torchrl/testing/utils.py +190 -0
- torchrl/trainers/__init__.py +42 -0
- torchrl/trainers/algorithms/__init__.py +11 -0
- torchrl/trainers/algorithms/configs/__init__.py +705 -0
- torchrl/trainers/algorithms/configs/collectors.py +216 -0
- torchrl/trainers/algorithms/configs/common.py +41 -0
- torchrl/trainers/algorithms/configs/data.py +308 -0
- torchrl/trainers/algorithms/configs/envs.py +104 -0
- torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
- torchrl/trainers/algorithms/configs/logging.py +80 -0
- torchrl/trainers/algorithms/configs/modules.py +570 -0
- torchrl/trainers/algorithms/configs/objectives.py +177 -0
- torchrl/trainers/algorithms/configs/trainers.py +340 -0
- torchrl/trainers/algorithms/configs/transforms.py +955 -0
- torchrl/trainers/algorithms/configs/utils.py +252 -0
- torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
- torchrl/trainers/algorithms/configs/weight_update.py +159 -0
- torchrl/trainers/algorithms/ppo.py +373 -0
- torchrl/trainers/algorithms/sac.py +308 -0
- torchrl/trainers/helpers/__init__.py +40 -0
- torchrl/trainers/helpers/collectors.py +416 -0
- torchrl/trainers/helpers/envs.py +573 -0
- torchrl/trainers/helpers/logger.py +33 -0
- torchrl/trainers/helpers/losses.py +132 -0
- torchrl/trainers/helpers/models.py +658 -0
- torchrl/trainers/helpers/replay_buffer.py +59 -0
- torchrl/trainers/helpers/trainers.py +301 -0
- torchrl/trainers/trainers.py +2052 -0
- torchrl/weight_update/__init__.py +33 -0
- torchrl/weight_update/_distributed.py +749 -0
- torchrl/weight_update/_mp.py +624 -0
- torchrl/weight_update/_noupdate.py +102 -0
- torchrl/weight_update/_ray.py +1032 -0
- torchrl/weight_update/_rpc.py +284 -0
- torchrl/weight_update/_shared.py +891 -0
- torchrl/weight_update/llm/__init__.py +32 -0
- torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
- torchrl/weight_update/llm/vllm_nccl.py +710 -0
- torchrl/weight_update/utils.py +73 -0
- torchrl/weight_update/weight_sync_schemes.py +1244 -0
- torchrl-0.11.0.dist-info/LICENSE +21 -0
- torchrl-0.11.0.dist-info/METADATA +1307 -0
- torchrl-0.11.0.dist-info/RECORD +394 -0
- torchrl-0.11.0.dist-info/WHEEL +5 -0
- torchrl-0.11.0.dist-info/entry_points.txt +2 -0
- torchrl-0.11.0.dist-info/top_level.txt +7 -0
torchrl/data/map/tree.py
ADDED
|
@@ -0,0 +1,1434 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
#
|
|
3
|
+
# This source code is licensed under the MIT license found in the
|
|
4
|
+
# LICENSE file in the root directory of this source tree.
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import weakref
|
|
8
|
+
from collections import deque
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
import torch
|
|
13
|
+
from tensordict import (
|
|
14
|
+
merge_tensordicts,
|
|
15
|
+
NestedKey,
|
|
16
|
+
TensorClass,
|
|
17
|
+
TensorDict,
|
|
18
|
+
TensorDictBase,
|
|
19
|
+
unravel_key,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from torchrl.data.map.tdstorage import TensorDictMap
|
|
23
|
+
from torchrl.data.map.utils import _plot_plotly_box, _plot_plotly_tree
|
|
24
|
+
from torchrl.data.replay_buffers.storages import ListStorage
|
|
25
|
+
from torchrl.data.tensor_specs import Composite
|
|
26
|
+
from torchrl.envs.common import EnvBase
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Tree(TensorClass["nocast"]):
|
|
30
|
+
"""Representation of a single MCTS (Monte Carlo Tree Search) Tree.
|
|
31
|
+
|
|
32
|
+
This class encapsulates the data and behavior of a tree node in an MCTS algorithm.
|
|
33
|
+
It includes attributes for storing information about the node, such as its children,
|
|
34
|
+
visit count, and rollout data. Methods are provided for traversing the tree,
|
|
35
|
+
computing statistics, and visualizing the tree structure.
|
|
36
|
+
|
|
37
|
+
It is somewhat indistinguishable from a node or a vertex - we use the term "Tree" when talking about
|
|
38
|
+
a node with children, "node" or "vertex" when talking about a place in the tree where a branching occurs.
|
|
39
|
+
A node in the tree is defined primarily by its ``hash`` value. Usually, a ``hash`` is determined by a unique
|
|
40
|
+
combination of state (or observation) and action. If one observation (found in the ``node`` attribute) has more than
|
|
41
|
+
one action associated, each branch will be stored in the ``subtree`` attribute as a stack of ``Tree`` instances.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
count (int): The number of visits to this node.
|
|
45
|
+
index (torch.Tensor): Indices of the child nodes in the data map.
|
|
46
|
+
hash (torch.Tensor): A hash value for this node.
|
|
47
|
+
It may be the case that ``hash`` is ``None`` in the specific case where the root of the tree
|
|
48
|
+
has more than one action associated. In that case, each subtree branch will have a different action
|
|
49
|
+
associated and a hash correspoding to the ``(observation, action)`` pair.
|
|
50
|
+
node_id (int): A unique identifier for this node.
|
|
51
|
+
rollout (TensorDict): Rollout data following the observation encoded in this node, in a TED format.
|
|
52
|
+
If there are multiple actions taken at this node, subtrees are stored in the corresponding
|
|
53
|
+
entry. Rollouts can be reconstructed using the :meth:`rollout_from_path` method.
|
|
54
|
+
node (TensorDict): Data defining this node (e.g., observations) before the next branching.
|
|
55
|
+
Entries usually matches the ``in_keys`` in ``MCTSForest.node_map``.
|
|
56
|
+
subtree (Tree): A stack of subtrees produced when actions are taken.
|
|
57
|
+
num_children (int): The number of child nodes (read-only).
|
|
58
|
+
is_terminal (bool): whether the tree has children nodes (read-only).
|
|
59
|
+
If the tree is compact, ``is_terminal == True`` means that there are more than one child node in
|
|
60
|
+
``self.subtree``.
|
|
61
|
+
|
|
62
|
+
Methods:
|
|
63
|
+
__contains__: Whether another tree can be found in the tree.
|
|
64
|
+
vertices: Returns a dictionary containing all vertices in the tree. Keys must be paths, ids or hashes.
|
|
65
|
+
num_vertices: Returns the total number of vertices in the tree, with or without duplicates.
|
|
66
|
+
edges: Returns a list of edges in the tree.
|
|
67
|
+
valid_paths: Yields all valid paths in the tree.
|
|
68
|
+
max_length: Returns the maximum length of any path in the tree.
|
|
69
|
+
rollout_from_path: Reconstructs a rollout from a given path.
|
|
70
|
+
plot: Visualizes the tree using a specified backend and figure type.
|
|
71
|
+
get_node_by_id: returns the vertex given by its id in the tree.
|
|
72
|
+
get_node_by_hash: returns the vertex given by its hash in the forest.
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
count: int | torch.Tensor = None
|
|
77
|
+
wins: int | torch.Tensor = None
|
|
78
|
+
|
|
79
|
+
index: torch.Tensor | None = None
|
|
80
|
+
# The hash is None if the node has more than one action associated
|
|
81
|
+
hash: int | None = None
|
|
82
|
+
node_id: int | None = None
|
|
83
|
+
|
|
84
|
+
# rollout following the observation encoded in node, in a TorchRL (TED) format
|
|
85
|
+
rollout: TensorDict | None = None
|
|
86
|
+
|
|
87
|
+
# The data specifying the node (typically an observation or a set of observations)
|
|
88
|
+
node_data: TensorDict | None = None
|
|
89
|
+
|
|
90
|
+
# Stack of subtrees. A subtree is produced when an action is taken.
|
|
91
|
+
subtree: Tree = None
|
|
92
|
+
|
|
93
|
+
# weakrefs to the parent(s) of the node
|
|
94
|
+
_parent: weakref.ref | list[weakref.ref] | None = None
|
|
95
|
+
|
|
96
|
+
# Specs: contains information such as action or observation keys and spaces.
|
|
97
|
+
# If present, they should be structured like env specs are:
|
|
98
|
+
# Composite(input_spec=Composite(full_state_spec=..., full_action_spec=...),
|
|
99
|
+
# output_spec=Composite(full_observation_spec=..., full_reward_spec=..., full_done_spec=...))
|
|
100
|
+
# where every leaf component is optional.
|
|
101
|
+
specs: Composite | None = None
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def make_node(
|
|
105
|
+
cls,
|
|
106
|
+
data: TensorDictBase,
|
|
107
|
+
*,
|
|
108
|
+
device: torch.device | None = None,
|
|
109
|
+
batch_size: torch.Size | None = None,
|
|
110
|
+
specs: Composite | None = None,
|
|
111
|
+
) -> Tree:
|
|
112
|
+
"""Creates a new node given some data."""
|
|
113
|
+
if "next" in data.keys():
|
|
114
|
+
rollout = data
|
|
115
|
+
if not rollout.ndim:
|
|
116
|
+
rollout = rollout.unsqueeze(0)
|
|
117
|
+
subtree = TensorDict.lazy_stack([cls.make_node(data["next"][..., -1])])
|
|
118
|
+
else:
|
|
119
|
+
rollout = None
|
|
120
|
+
subtree = None
|
|
121
|
+
if device is None:
|
|
122
|
+
device = data.device
|
|
123
|
+
return cls(
|
|
124
|
+
count=torch.zeros(()),
|
|
125
|
+
wins=torch.zeros(()),
|
|
126
|
+
node_data=data.exclude("action", "next"),
|
|
127
|
+
rollout=rollout,
|
|
128
|
+
subtree=subtree,
|
|
129
|
+
device=device,
|
|
130
|
+
batch_size=batch_size,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Specs
|
|
134
|
+
@property
|
|
135
|
+
def full_observation_spec(self):
|
|
136
|
+
"""The observation spec of the tree.
|
|
137
|
+
|
|
138
|
+
This is an alias for `Tree.specs['output_spec', 'full_observation_spec']`.
|
|
139
|
+
"""
|
|
140
|
+
return self.specs["output_spec", "full_observation_spec"]
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def full_reward_spec(self):
|
|
144
|
+
"""The reward spec of the tree.
|
|
145
|
+
|
|
146
|
+
This is an alias for `Tree.specs['output_spec', 'full_reward_spec']`.
|
|
147
|
+
"""
|
|
148
|
+
return self.specs["output_spec", "full_reward_spec"]
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def full_done_spec(self):
|
|
152
|
+
"""The done spec of the tree.
|
|
153
|
+
|
|
154
|
+
This is an alias for `Tree.specs['output_spec', 'full_done_spec']`.
|
|
155
|
+
"""
|
|
156
|
+
return self.specs["output_spec", "full_done_spec"]
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def full_state_spec(self):
|
|
160
|
+
"""The state spec of the tree.
|
|
161
|
+
|
|
162
|
+
This is an alias for `Tree.specs['input_spec', 'full_state_spec']`.
|
|
163
|
+
"""
|
|
164
|
+
return self.specs["input_spec", "full_state_spec"]
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def full_action_spec(self):
|
|
168
|
+
"""The action spec of the tree.
|
|
169
|
+
|
|
170
|
+
This is an alias for `Tree.specs['input_spec', 'full_action_spec']`.
|
|
171
|
+
"""
|
|
172
|
+
return self.specs["input_spec", "full_action_spec"]
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def selected_actions(self) -> torch.Tensor | TensorDictBase | None:
|
|
176
|
+
"""Returns a tensor containing all the selected actions branching out from this node."""
|
|
177
|
+
if self.subtree is None:
|
|
178
|
+
return None
|
|
179
|
+
return self.subtree.rollout[..., 0]["action"]
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def prev_action(self) -> torch.Tensor | TensorDictBase | None:
|
|
183
|
+
"""The action undertaken just before this node's observation was generated.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
a tensor, tensordict or None if the node has no parent.
|
|
187
|
+
|
|
188
|
+
.. seealso:: This will be equal to :class:`~torchrl.data.Tree.branching_action` whenever the rollout data contains a single step.
|
|
189
|
+
|
|
190
|
+
.. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
|
|
191
|
+
|
|
192
|
+
"""
|
|
193
|
+
if self.rollout is None:
|
|
194
|
+
return None
|
|
195
|
+
return self.rollout[..., -1]["action"]
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def branching_action(self) -> torch.Tensor | TensorDictBase | None:
|
|
199
|
+
"""Returns the action that branched out to this particular node.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
a tensor, tensordict or None if the node has no parent.
|
|
203
|
+
|
|
204
|
+
.. seealso:: This will be equal to :class:`~torchrl.data.Tree.prev_action` whenever the rollout data contains a single step.
|
|
205
|
+
|
|
206
|
+
.. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
|
|
207
|
+
|
|
208
|
+
"""
|
|
209
|
+
if self.rollout is None:
|
|
210
|
+
return None
|
|
211
|
+
return self.rollout[..., 0]["action"]
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def node_observation(self) -> torch.Tensor | TensorDictBase:
|
|
215
|
+
"""Returns the observation associated with this particular node.
|
|
216
|
+
|
|
217
|
+
This is the observation (or bag of observations) that defines the node before a branching occurs.
|
|
218
|
+
If the node contains a :meth:`rollout` attribute, the node observation is typically identical to the
|
|
219
|
+
observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
|
|
220
|
+
|
|
221
|
+
If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
|
|
222
|
+
is returned instead.
|
|
223
|
+
|
|
224
|
+
For a more consistent representation, see :attr:`~.node_observations`.
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
# TODO: implement specs
|
|
228
|
+
return self.node_data["observation"]
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def node_observations(self) -> torch.Tensor | TensorDictBase:
|
|
232
|
+
"""Returns the observations associated with this particular node in a TensorDict format.
|
|
233
|
+
|
|
234
|
+
This is the observation (or bag of observations) that defines the node before a branching occurs.
|
|
235
|
+
If the node contains a :meth:`rollout` attribute, the node observation is typically identical to the
|
|
236
|
+
observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
|
|
237
|
+
|
|
238
|
+
If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
|
|
239
|
+
is returned instead.
|
|
240
|
+
|
|
241
|
+
For a more consistent representation, see :attr:`~.node_observations`.
|
|
242
|
+
|
|
243
|
+
"""
|
|
244
|
+
# TODO: implement specs
|
|
245
|
+
return self.node_data.select("observation")
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def visits(self) -> int | torch.Tensor:
|
|
249
|
+
"""Returns the number of visits associated with this particular node.
|
|
250
|
+
|
|
251
|
+
This is an alias for the :attr:`~.count` attribute.
|
|
252
|
+
|
|
253
|
+
"""
|
|
254
|
+
return self.count
|
|
255
|
+
|
|
256
|
+
@visits.setter
|
|
257
|
+
def visits(self, count):
|
|
258
|
+
self.count = count
|
|
259
|
+
|
|
260
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
261
|
+
if name == "subtree" and value is not None:
|
|
262
|
+
wr = weakref.ref(self._tensordict)
|
|
263
|
+
if value._parent is None:
|
|
264
|
+
value._parent = wr
|
|
265
|
+
elif isinstance(value._parent, list):
|
|
266
|
+
value._parent.append(wr)
|
|
267
|
+
else:
|
|
268
|
+
value._parent = [value._parent, wr]
|
|
269
|
+
return super().__setattr__(name, value)
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def parent(self) -> Tree | None:
|
|
273
|
+
"""The parent of the node.
|
|
274
|
+
|
|
275
|
+
If the node has a parent and this object is still present in the python workspace, it will be returned by this
|
|
276
|
+
property.
|
|
277
|
+
|
|
278
|
+
For re-branching trees, this property may return a stack of trees where every index of the stack corresponds to
|
|
279
|
+
a different parent.
|
|
280
|
+
|
|
281
|
+
.. note:: the ``parent`` attribute will match in content but not in identity: the tensorclass object is recustructed
|
|
282
|
+
using the same tensors (i.e., tensors that point to the same memory locations).
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
A ``Tree`` containing the parent data or ``None`` if the parent data is out of scope or the node is the root.
|
|
286
|
+
"""
|
|
287
|
+
parent = self._parent
|
|
288
|
+
if parent is not None:
|
|
289
|
+
# Check that all parents match
|
|
290
|
+
queue = [parent]
|
|
291
|
+
|
|
292
|
+
def maybe_flatten_list(maybe_nested_list):
|
|
293
|
+
if isinstance(maybe_nested_list, list):
|
|
294
|
+
for p in maybe_nested_list:
|
|
295
|
+
if isinstance(p, list):
|
|
296
|
+
queue.append(p)
|
|
297
|
+
else:
|
|
298
|
+
yield p()
|
|
299
|
+
else:
|
|
300
|
+
yield maybe_nested_list()
|
|
301
|
+
|
|
302
|
+
parent_result = None
|
|
303
|
+
while len(queue):
|
|
304
|
+
local_result = None
|
|
305
|
+
for r in maybe_flatten_list(queue.pop()):
|
|
306
|
+
if local_result is None:
|
|
307
|
+
local_result = r
|
|
308
|
+
elif r is not None and r is not local_result:
|
|
309
|
+
if isinstance(local_result, list):
|
|
310
|
+
local_result.append(r)
|
|
311
|
+
else:
|
|
312
|
+
local_result = [local_result, r]
|
|
313
|
+
if local_result is None:
|
|
314
|
+
continue
|
|
315
|
+
# replicate logic at macro level
|
|
316
|
+
if parent_result is None:
|
|
317
|
+
parent_result = local_result
|
|
318
|
+
else:
|
|
319
|
+
if isinstance(local_result, list):
|
|
320
|
+
local_result = [
|
|
321
|
+
r for r in local_result if r not in parent_result
|
|
322
|
+
]
|
|
323
|
+
else:
|
|
324
|
+
local_result = [local_result]
|
|
325
|
+
if isinstance(parent_result, list):
|
|
326
|
+
parent_result.extend(local_result)
|
|
327
|
+
else:
|
|
328
|
+
parent_result = [parent_result, *local_result]
|
|
329
|
+
if isinstance(parent_result, list):
|
|
330
|
+
return TensorDict.lazy_stack(
|
|
331
|
+
[self._from_tensordict(r) for r in parent_result]
|
|
332
|
+
)
|
|
333
|
+
return self._from_tensordict(parent_result)
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def num_children(self) -> int:
|
|
337
|
+
"""Number of children of this node.
|
|
338
|
+
|
|
339
|
+
Equates to the number of elements in the ``self.subtree`` stack.
|
|
340
|
+
"""
|
|
341
|
+
return len(self.subtree) if self.subtree is not None else 0
|
|
342
|
+
|
|
343
|
+
@property
|
|
344
|
+
def is_terminal(self) -> bool | torch.Tensor:
|
|
345
|
+
"""Returns True if the tree has no children nodes."""
|
|
346
|
+
if self.rollout is not None:
|
|
347
|
+
return self.rollout[..., -1]["next", "done"].squeeze(-1)
|
|
348
|
+
# If there is no rollout, there is no preceding data - either this is a root or it's a floating node.
|
|
349
|
+
# In either case, we assume that the node is not terminal.
|
|
350
|
+
return False
|
|
351
|
+
|
|
352
|
+
def fully_expanded(self, env: EnvBase) -> bool:
|
|
353
|
+
"""Returns True if the number of children is equal to the environment cardinality."""
|
|
354
|
+
cardinality = env.cardinality(self.node_data)
|
|
355
|
+
num_actions = self.num_children
|
|
356
|
+
return cardinality == num_actions
|
|
357
|
+
|
|
358
|
+
def get_vertex_by_id(self, id: int) -> Tree:
|
|
359
|
+
"""Goes through the tree and returns the node corresponding the given id."""
|
|
360
|
+
q = deque()
|
|
361
|
+
q.append(self)
|
|
362
|
+
while len(q):
|
|
363
|
+
tree = q.popleft()
|
|
364
|
+
if tree.node_id == id:
|
|
365
|
+
return tree
|
|
366
|
+
if tree.subtree is not None:
|
|
367
|
+
q.extend(tree.subtree.unbind(0))
|
|
368
|
+
raise ValueError(f"Node with id {id} not found.")
|
|
369
|
+
|
|
370
|
+
def get_vertex_by_hash(self, hash: int) -> Tree:
|
|
371
|
+
"""Goes through the tree and returns the node corresponding the given hash."""
|
|
372
|
+
q = deque()
|
|
373
|
+
q.append(self)
|
|
374
|
+
while len(q):
|
|
375
|
+
tree = q.popleft()
|
|
376
|
+
if tree.hash == hash:
|
|
377
|
+
return tree
|
|
378
|
+
if tree.subtree is not None:
|
|
379
|
+
q.extend(tree.subtree.unbind(0))
|
|
380
|
+
raise ValueError(f"Node with hash {hash} not found.")
|
|
381
|
+
|
|
382
|
+
def __contains__(self, other: Tree) -> bool:
|
|
383
|
+
hash = other.hash
|
|
384
|
+
for vertex in self.vertices().values():
|
|
385
|
+
if vertex.hash == hash:
|
|
386
|
+
return True
|
|
387
|
+
else:
|
|
388
|
+
return False
|
|
389
|
+
|
|
390
|
+
def vertices(
|
|
391
|
+
self, *, key_type: Literal["id", "hash", "path"] = "hash"
|
|
392
|
+
) -> dict[int | tuple[int], Tree]:
|
|
393
|
+
"""Returns a map containing the vertices of the Tree.
|
|
394
|
+
|
|
395
|
+
Keyword args:
|
|
396
|
+
key_type (Literal["id", "hash", "path"], optional): Specifies the type of key to use for the vertices.
|
|
397
|
+
|
|
398
|
+
- "id": Use the vertex ID as the key.
|
|
399
|
+
- "hash": Use a hash of the vertex as the key.
|
|
400
|
+
- "path": Use the path to the vertex as the key. This may lead to a dictionary with a longer length than
|
|
401
|
+
when ``"id"`` or ``"hash"`` are used as the same node may be part of multiple trajectories.
|
|
402
|
+
Defaults to ``"hash"``.
|
|
403
|
+
|
|
404
|
+
Defaults to an empty string, which may imply a default behavior.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
Dict[int | Tuple[int], Tree]: A dictionary mapping keys to Tree vertices.
|
|
408
|
+
|
|
409
|
+
"""
|
|
410
|
+
memo = set()
|
|
411
|
+
result = {}
|
|
412
|
+
q = deque()
|
|
413
|
+
cur_path = ()
|
|
414
|
+
q.append((self, cur_path))
|
|
415
|
+
use_hash = key_type == "hash"
|
|
416
|
+
use_id = key_type == "id"
|
|
417
|
+
use_path = key_type == "path"
|
|
418
|
+
while len(q):
|
|
419
|
+
tree, cur_path = q.popleft()
|
|
420
|
+
h = tree.hash
|
|
421
|
+
if h in memo and not use_path:
|
|
422
|
+
continue
|
|
423
|
+
memo.add(h)
|
|
424
|
+
if use_path:
|
|
425
|
+
result[cur_path] = tree
|
|
426
|
+
elif use_id:
|
|
427
|
+
result[tree.node_id] = tree
|
|
428
|
+
elif use_hash:
|
|
429
|
+
result[tree.node_id] = tree
|
|
430
|
+
else:
|
|
431
|
+
raise ValueError(
|
|
432
|
+
f"key_type must be either 'hash', 'id' or 'path'. Got {key_type}."
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
n = int(tree.num_children)
|
|
436
|
+
for i in range(n):
|
|
437
|
+
cur_path_tree = cur_path + (i,)
|
|
438
|
+
q.append((tree.subtree[i], cur_path_tree))
|
|
439
|
+
return result
|
|
440
|
+
|
|
441
|
+
def num_vertices(self, *, count_repeat: bool = False) -> int:
|
|
442
|
+
"""Returns the number of unique vertices in the Tree.
|
|
443
|
+
|
|
444
|
+
Keyword Args:
|
|
445
|
+
count_repeat (bool, optional): Determines whether to count repeated
|
|
446
|
+
vertices.
|
|
447
|
+
|
|
448
|
+
- If ``False``, counts each unique vertex only once.
|
|
449
|
+
|
|
450
|
+
- If ``True``, counts vertices multiple times if they appear in different paths.
|
|
451
|
+
Defaults to ``False``.
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
int: The number of unique vertices in the Tree.
|
|
455
|
+
|
|
456
|
+
"""
|
|
457
|
+
return len(
|
|
458
|
+
{
|
|
459
|
+
v.node_id
|
|
460
|
+
for v in self.vertices(
|
|
461
|
+
key_type="hash" if not count_repeat else "path"
|
|
462
|
+
).values()
|
|
463
|
+
}
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
def edges(self) -> list[tuple[int, int]]:
|
|
467
|
+
"""Retrieves a list of edges in the tree.
|
|
468
|
+
|
|
469
|
+
Each edge is represented as a tuple of two node IDs: the parent node ID and the child node ID.
|
|
470
|
+
The tree is traversed using Breadth-First Search (BFS) to ensure all edges are visited.
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
A list of tuples, where each tuple contains a parent node ID and a child node ID.
|
|
474
|
+
"""
|
|
475
|
+
result = []
|
|
476
|
+
q = deque()
|
|
477
|
+
parent = self.node_id
|
|
478
|
+
q.append((self, parent))
|
|
479
|
+
while len(q):
|
|
480
|
+
tree, parent = q.popleft()
|
|
481
|
+
n = int(tree.num_children)
|
|
482
|
+
for i in range(n):
|
|
483
|
+
node = tree.subtree[i]
|
|
484
|
+
node_id = node.node_id
|
|
485
|
+
result.append((parent, node_id))
|
|
486
|
+
q.append((node, node_id))
|
|
487
|
+
return result
|
|
488
|
+
|
|
489
|
+
def valid_paths(self):
|
|
490
|
+
"""Generates all valid paths in the tree.
|
|
491
|
+
|
|
492
|
+
A valid path is a sequence of child indices that starts at the root node and ends at a leaf node.
|
|
493
|
+
Each path is represented as a tuple of integers, where each integer corresponds to the index of a child node.
|
|
494
|
+
|
|
495
|
+
Yields:
|
|
496
|
+
tuple: A valid path in the tree.
|
|
497
|
+
"""
|
|
498
|
+
# Initialize a queue with the current tree node and an empty path
|
|
499
|
+
q = deque()
|
|
500
|
+
cur_path = ()
|
|
501
|
+
q.append((self, cur_path))
|
|
502
|
+
# Perform BFS traversal of the tree
|
|
503
|
+
while len(q):
|
|
504
|
+
# Dequeue the next tree node and its current path
|
|
505
|
+
tree, cur_path = q.popleft()
|
|
506
|
+
# Get the number of child nodes
|
|
507
|
+
n = int(tree.num_children)
|
|
508
|
+
# If this is a leaf node, yield the current path
|
|
509
|
+
if not n:
|
|
510
|
+
yield cur_path
|
|
511
|
+
# Iterate over the child nodes
|
|
512
|
+
for i in range(n):
|
|
513
|
+
cur_path_tree = cur_path + (i,)
|
|
514
|
+
q.append((tree.subtree[i], cur_path_tree))
|
|
515
|
+
|
|
516
|
+
def max_length(self):
|
|
517
|
+
"""Returns the maximum length of all valid paths in the tree.
|
|
518
|
+
|
|
519
|
+
The length of a path is defined as the number of nodes in the path.
|
|
520
|
+
If the tree is empty, returns 0.
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
int: The maximum length of all valid paths in the tree.
|
|
524
|
+
|
|
525
|
+
"""
|
|
526
|
+
lengths = tuple(len(path) for path in self.valid_paths())
|
|
527
|
+
if len(lengths) == 0:
|
|
528
|
+
return 0
|
|
529
|
+
elif len(lengths) == 1:
|
|
530
|
+
return lengths[0]
|
|
531
|
+
return max(*lengths)
|
|
532
|
+
|
|
533
|
+
def rollout_from_path(self, path: tuple[int]) -> TensorDictBase | None:
|
|
534
|
+
"""Retrieves the rollout data along a given path in the tree.
|
|
535
|
+
|
|
536
|
+
The rollout data is concatenated along the last dimension (dim=-1) for each node in the path.
|
|
537
|
+
If no rollout data is found along the path, returns ``None``.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
path: A tuple of integers representing the path in the tree.
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
The concatenated rollout data along the path, or None if no data is found.
|
|
544
|
+
|
|
545
|
+
"""
|
|
546
|
+
r = self.rollout
|
|
547
|
+
tree = self
|
|
548
|
+
rollouts = []
|
|
549
|
+
if r is not None:
|
|
550
|
+
rollouts.append(r)
|
|
551
|
+
for i in path:
|
|
552
|
+
tree = tree.subtree[i]
|
|
553
|
+
r = tree.rollout
|
|
554
|
+
if r is not None:
|
|
555
|
+
rollouts.append(r)
|
|
556
|
+
if rollouts:
|
|
557
|
+
return torch.cat(rollouts, dim=-1)
|
|
558
|
+
|
|
559
|
+
@staticmethod
|
|
560
|
+
def _label(info: list[str], tree: Tree, root=False):
|
|
561
|
+
labels = []
|
|
562
|
+
for key in info:
|
|
563
|
+
if key == "hash":
|
|
564
|
+
hash = tree.hash
|
|
565
|
+
if hash is not None:
|
|
566
|
+
hash = hash.item()
|
|
567
|
+
v = f"hash={hash}"
|
|
568
|
+
elif root:
|
|
569
|
+
v = f"{key}=None"
|
|
570
|
+
else:
|
|
571
|
+
v = f"{key}={tree.rollout[key].mean().item()}"
|
|
572
|
+
|
|
573
|
+
labels.append(v)
|
|
574
|
+
return ", ".join(labels)
|
|
575
|
+
|
|
576
|
+
def plot(
|
|
577
|
+
self: Tree,
|
|
578
|
+
backend: str = "plotly",
|
|
579
|
+
figure: str = "tree",
|
|
580
|
+
info: list[str] = None,
|
|
581
|
+
make_labels: Callable[[Any, ...], Any] | None = None,
|
|
582
|
+
):
|
|
583
|
+
"""Plots a visualization of the tree using the specified backend and figure type.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
backend: The plotting backend to use. Currently only supports 'plotly'.
|
|
587
|
+
figure: The type of figure to plot. Can be either 'tree' or 'box'.
|
|
588
|
+
info: A list of additional information to include in the plot (not currently used).
|
|
589
|
+
make_labels: An optional function to generate custom labels for the plot.
|
|
590
|
+
|
|
591
|
+
Raises:
|
|
592
|
+
NotImplementedError: If an unsupported backend or figure type is specified.
|
|
593
|
+
"""
|
|
594
|
+
if backend == "plotly":
|
|
595
|
+
if figure == "box":
|
|
596
|
+
_plot_plotly_box(self)
|
|
597
|
+
return
|
|
598
|
+
elif figure == "tree":
|
|
599
|
+
_plot_plotly_tree(self, make_labels=make_labels)
|
|
600
|
+
return
|
|
601
|
+
else:
|
|
602
|
+
pass
|
|
603
|
+
raise NotImplementedError(
|
|
604
|
+
f"Unknown plotting backend {backend} with figure {figure}."
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
def to_string(self, node_format_fn=lambda tree: tree.node_data.to_dict()):
|
|
608
|
+
"""Generates a string representation of the tree.
|
|
609
|
+
|
|
610
|
+
This function can pull out information from each of the nodes in a tree,
|
|
611
|
+
so it can be useful for debugging. The nodes are listed line-by-line.
|
|
612
|
+
Each line contains the path to the node, followed by the string
|
|
613
|
+
representation of that node generated with ``node_format_fn``. Each
|
|
614
|
+
line is indented according to number of steps in the path required to
|
|
615
|
+
get to the corresponding node.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
node_format_fn (Callable, optional): User-defined function to
|
|
619
|
+
generate a string for each node of the tree. The signature must
|
|
620
|
+
be ``(Tree) -> Any``, and the output must be convertible to a
|
|
621
|
+
string. If this argument is not given, the generated string is
|
|
622
|
+
the node's :attr:`Tree.node_data` attribute converted to a dict.
|
|
623
|
+
|
|
624
|
+
Examples:
|
|
625
|
+
>>> from torchrl.data import MCTSForest
|
|
626
|
+
>>> from tensordict import TensorDict
|
|
627
|
+
>>> forest = MCTSForest()
|
|
628
|
+
>>> td_root = TensorDict({"observation": 0,})
|
|
629
|
+
>>> rollouts_data = [
|
|
630
|
+
... # [(action, obs), ...]
|
|
631
|
+
... [(3, 123), (1, 456)],
|
|
632
|
+
... [(2, 359), (2, 3094)],
|
|
633
|
+
... [(3, 123), (9, 392), (6, 989), (20, 809), (21, 847)],
|
|
634
|
+
... [(1, 75)],
|
|
635
|
+
... [(3, 123), (0, 948)],
|
|
636
|
+
... [(2, 359), (2, 3094), (10, 68)],
|
|
637
|
+
... [(2, 359), (2, 3094), (11, 9045)],
|
|
638
|
+
... ]
|
|
639
|
+
>>> for rollout_data in rollouts_data:
|
|
640
|
+
... td = td_root.clone().unsqueeze(0)
|
|
641
|
+
... for action, obs in rollout_data:
|
|
642
|
+
... td = td.update(TensorDict({
|
|
643
|
+
... "action": [action],
|
|
644
|
+
... "next": TensorDict({"observation": [obs]}, [1]),
|
|
645
|
+
... }, [1]))
|
|
646
|
+
... forest.extend(td)
|
|
647
|
+
... td = td["next"].clone()
|
|
648
|
+
...
|
|
649
|
+
>>> tree = forest.get_tree(td_root)
|
|
650
|
+
>>> print(tree.to_string())
|
|
651
|
+
(0,) {'observation': tensor(123)}
|
|
652
|
+
(0, 0) {'observation': tensor(456)}
|
|
653
|
+
(0, 1) {'observation': tensor(847)}
|
|
654
|
+
(0, 2) {'observation': tensor(948)}
|
|
655
|
+
(1,) {'observation': tensor(3094)}
|
|
656
|
+
(1, 0) {'observation': tensor(68)}
|
|
657
|
+
(1, 1) {'observation': tensor(9045)}
|
|
658
|
+
(2,) {'observation': tensor(75)}
|
|
659
|
+
"""
|
|
660
|
+
queue = [
|
|
661
|
+
# tree, path
|
|
662
|
+
(self, ()),
|
|
663
|
+
]
|
|
664
|
+
|
|
665
|
+
strings = []
|
|
666
|
+
|
|
667
|
+
while len(queue) > 0:
|
|
668
|
+
self, path = queue.pop()
|
|
669
|
+
if self.subtree is not None:
|
|
670
|
+
for subtree_idx, subtree in reversed(list(enumerate(self.subtree))):
|
|
671
|
+
queue.append((subtree, path + (subtree_idx,)))
|
|
672
|
+
|
|
673
|
+
if self.rollout is not None:
|
|
674
|
+
level = len(path)
|
|
675
|
+
string = node_format_fn(self)
|
|
676
|
+
strings.append(f"{' ' * (level - 1)}{path} {string}")
|
|
677
|
+
|
|
678
|
+
return "\n".join(strings)
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
class MCTSForest:
|
|
682
|
+
"""A collection of MCTS trees.
|
|
683
|
+
|
|
684
|
+
.. warning:: This class is currently under active development. Expect frequent API changes.
|
|
685
|
+
|
|
686
|
+
The class is aimed at storing rollouts in a storage, and produce trees based on a given root
|
|
687
|
+
in that dataset.
|
|
688
|
+
|
|
689
|
+
Keyword Args:
|
|
690
|
+
data_map (TensorDictMap, optional): the storage to use to store the data
|
|
691
|
+
(observation, reward, states etc). If not provided, it is lazily
|
|
692
|
+
initialized using :meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair`
|
|
693
|
+
using the list of :attr:`observation_keys` and :attr:`action_keys` as ``in_keys``.
|
|
694
|
+
node_map (TensorDictMap, optional): a map from the observation space to the index space.
|
|
695
|
+
Internally, the node map is used to gather all possible branches coming out of
|
|
696
|
+
a given node. For example, if an observation has two associated actions and outcomes
|
|
697
|
+
in the data map, then the :attr:`node_map` will return a data structure containing the
|
|
698
|
+
two indices in the :attr:`data_map` that correspond to these two outcomes.
|
|
699
|
+
If not provided, it is lazily initialized using
|
|
700
|
+
:meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair` using the list of
|
|
701
|
+
:attr:`observation_keys` as ``in_keys`` and the :class:`~torchrl.data.QueryModule` as
|
|
702
|
+
``out_keys``.
|
|
703
|
+
max_size (int, optional): the size of the maps.
|
|
704
|
+
If not provided, defaults to ``data_map.max_size`` if this can be found, then
|
|
705
|
+
``node_map.max_size``. If none of these are provided, defaults to `1000`.
|
|
706
|
+
done_keys (list of NestedKey, optional): the done keys of the environment. If not provided,
|
|
707
|
+
defaults to ``("done", "terminated", "truncated")``.
|
|
708
|
+
The :meth:`get_keys_from_env` can be used to automatically determine the keys.
|
|
709
|
+
action_keys (list of NestedKey, optional): the action keys of the environment. If not provided,
|
|
710
|
+
defaults to ``("action",)``.
|
|
711
|
+
The :meth:`get_keys_from_env` can be used to automatically determine the keys.
|
|
712
|
+
reward_keys (list of NestedKey, optional): the reward keys of the environment. If not provided,
|
|
713
|
+
defaults to ``("reward",)``.
|
|
714
|
+
The :meth:`get_keys_from_env` can be used to automatically determine the keys.
|
|
715
|
+
observation_keys (list of NestedKey, optional): the observation keys of the environment. If not provided,
|
|
716
|
+
defaults to ``("observation",)``.
|
|
717
|
+
The :meth:`get_keys_from_env` can be used to automatically determine the keys.
|
|
718
|
+
excluded_keys (list of NestedKey, optional): a list of keys to exclude from the data storage.
|
|
719
|
+
consolidated (bool, optional): if ``True``, the data_map storage will be consolidated on disk.
|
|
720
|
+
Defaults to ``False``.
|
|
721
|
+
|
|
722
|
+
Examples:
|
|
723
|
+
>>> from torchrl.envs import GymEnv
|
|
724
|
+
>>> import torch
|
|
725
|
+
>>> from tensordict import TensorDict, LazyStackedTensorDict
|
|
726
|
+
>>> from torchrl.data import TensorDictMap, ListStorage
|
|
727
|
+
>>> from torchrl.data.map.tree import MCTSForest
|
|
728
|
+
>>>
|
|
729
|
+
>>> from torchrl.envs import PendulumEnv, CatTensors, UnsqueezeTransform, StepCounter
|
|
730
|
+
>>> # Create the MCTS Forest
|
|
731
|
+
>>> forest = MCTSForest()
|
|
732
|
+
>>> # Create an environment. We're using a stateless env to be able to query it at any given state (like an oracle)
|
|
733
|
+
>>> env = PendulumEnv()
|
|
734
|
+
>>> obs_keys = list(env.observation_spec.keys(True, True))
|
|
735
|
+
>>> state_keys = set(env.full_state_spec.keys(True, True)) - set(obs_keys)
|
|
736
|
+
>>> # Appending transforms to get an "observation" key that concatenates the observations together
|
|
737
|
+
>>> env = env.append_transform(
|
|
738
|
+
... UnsqueezeTransform(
|
|
739
|
+
... in_keys=obs_keys,
|
|
740
|
+
... out_keys=[("unsqueeze", key) for key in obs_keys],
|
|
741
|
+
... dim=-1
|
|
742
|
+
... )
|
|
743
|
+
... )
|
|
744
|
+
>>> env = env.append_transform(
|
|
745
|
+
... CatTensors([("unsqueeze", key) for key in obs_keys], "observation")
|
|
746
|
+
... )
|
|
747
|
+
>>> env = env.append_transform(StepCounter())
|
|
748
|
+
>>> env.set_seed(0)
|
|
749
|
+
>>> # Get a reset state, then make a rollout out of it
|
|
750
|
+
>>> reset_state = env.reset()
|
|
751
|
+
>>> rollout0 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
|
|
752
|
+
>>> # Append the rollout to the forest. We're removing the state entries for clarity
|
|
753
|
+
>>> rollout0 = rollout0.copy()
|
|
754
|
+
>>> rollout0.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
|
|
755
|
+
>>> forest.extend(rollout0)
|
|
756
|
+
>>> # The forest should have 6 elements (the length of the rollout)
|
|
757
|
+
>>> assert len(forest) == 6
|
|
758
|
+
>>> # Let's make another rollout from the same reset state
|
|
759
|
+
>>> rollout1 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
|
|
760
|
+
>>> rollout1.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
|
|
761
|
+
>>> forest.extend(rollout1)
|
|
762
|
+
>>> assert len(forest) == 12
|
|
763
|
+
>>> # Let's make another final rollout from an intermediate step in the second rollout
|
|
764
|
+
>>> rollout1b = env.rollout(6, auto_reset=False, tensordict=rollout1[3].exclude("next"))
|
|
765
|
+
>>> rollout1b.exclude(*state_keys, inplace=True)
|
|
766
|
+
>>> rollout1b.get("next").exclude(*state_keys, inplace=True)
|
|
767
|
+
>>> forest.extend(rollout1b)
|
|
768
|
+
>>> assert len(forest) == 18
|
|
769
|
+
>>> # Since we have 2 rollouts starting at the same state, our tree should have two
|
|
770
|
+
>>> # branches if we produce it from the reset entry. Take the state, and call `get_tree`:
|
|
771
|
+
>>> r = rollout0[0]
|
|
772
|
+
>>> # Let's get the compact tree that follows the initial reset. A compact tree is
|
|
773
|
+
>>> # a tree where nodes that have a single child are collapsed.
|
|
774
|
+
>>> tree = forest.get_tree(r)
|
|
775
|
+
>>> print(tree.max_length())
|
|
776
|
+
2
|
|
777
|
+
>>> print(list(tree.valid_paths()))
|
|
778
|
+
[(0,), (1, 0), (1, 1)]
|
|
779
|
+
>>> from tensordict import assert_close
|
|
780
|
+
>>> # We can manually rebuild the tree
|
|
781
|
+
>>> assert_close(
|
|
782
|
+
... rollout1,
|
|
783
|
+
... torch.cat([tree.subtree[1].rollout, tree.subtree[1].subtree[0].rollout]),
|
|
784
|
+
... intersection=True,
|
|
785
|
+
... )
|
|
786
|
+
True
|
|
787
|
+
>>> # Or we can rebuild it using the dedicated method
|
|
788
|
+
>>> assert_close(
|
|
789
|
+
... rollout1,
|
|
790
|
+
... tree.rollout_from_path((1, 0)),
|
|
791
|
+
... intersection=True,
|
|
792
|
+
... )
|
|
793
|
+
True
|
|
794
|
+
>>> tree.plot()
|
|
795
|
+
>>> tree = forest.get_tree(r, compact=False)
|
|
796
|
+
>>> print(tree.max_length())
|
|
797
|
+
9
|
|
798
|
+
>>> print(list(tree.valid_paths()))
|
|
799
|
+
[(0, 0, 0, 0, 0, 0), (1, 0, 0, 0, 0, 0), (1, 0, 0, 1, 0, 0, 0, 0, 0)]
|
|
800
|
+
>>> assert_close(
|
|
801
|
+
... rollout1,
|
|
802
|
+
... tree.rollout_from_path((1, 0, 0, 0, 0, 0)),
|
|
803
|
+
... intersection=True,
|
|
804
|
+
... )
|
|
805
|
+
True
|
|
806
|
+
"""
|
|
807
|
+
|
|
808
|
+
def __init__(
|
|
809
|
+
self,
|
|
810
|
+
*,
|
|
811
|
+
data_map: TensorDictMap | None = None,
|
|
812
|
+
node_map: TensorDictMap | None = None,
|
|
813
|
+
max_size: int | None = None,
|
|
814
|
+
done_keys: list[NestedKey] | None = None,
|
|
815
|
+
reward_keys: list[NestedKey] = None,
|
|
816
|
+
observation_keys: list[NestedKey] = None,
|
|
817
|
+
action_keys: list[NestedKey] = None,
|
|
818
|
+
excluded_keys: list[NestedKey] = None,
|
|
819
|
+
consolidated: bool | None = None,
|
|
820
|
+
):
|
|
821
|
+
|
|
822
|
+
self.data_map = data_map
|
|
823
|
+
|
|
824
|
+
self.node_map = node_map
|
|
825
|
+
|
|
826
|
+
if max_size is None:
|
|
827
|
+
if data_map is not None:
|
|
828
|
+
max_size = data_map.max_size
|
|
829
|
+
if max_size != getattr(node_map, "max_size", max_size):
|
|
830
|
+
raise ValueError(
|
|
831
|
+
f"Conflicting max_size: got data_map.max_size={data_map.max_size} and node_map.max_size={node_map.max_size}."
|
|
832
|
+
)
|
|
833
|
+
elif node_map is not None:
|
|
834
|
+
max_size = node_map.max_size
|
|
835
|
+
else:
|
|
836
|
+
max_size = None
|
|
837
|
+
elif data_map is not None and max_size != getattr(
|
|
838
|
+
data_map, "max_size", max_size
|
|
839
|
+
):
|
|
840
|
+
raise ValueError(
|
|
841
|
+
f"Conflicting max_size: got data_map.max_size={data_map.max_size} and max_size={max_size}."
|
|
842
|
+
)
|
|
843
|
+
elif node_map is not None and max_size != getattr(
|
|
844
|
+
node_map, "max_size", max_size
|
|
845
|
+
):
|
|
846
|
+
raise ValueError(
|
|
847
|
+
f"Conflicting max_size: got node_map.max_size={node_map.max_size} and max_size={max_size}."
|
|
848
|
+
)
|
|
849
|
+
self.max_size = max_size
|
|
850
|
+
|
|
851
|
+
self.done_keys = done_keys
|
|
852
|
+
self.action_keys = action_keys
|
|
853
|
+
self.reward_keys = reward_keys
|
|
854
|
+
self.observation_keys = observation_keys
|
|
855
|
+
self.excluded_keys = excluded_keys
|
|
856
|
+
self.consolidated = consolidated
|
|
857
|
+
|
|
858
|
+
@property
|
|
859
|
+
def done_keys(self) -> list[NestedKey]:
|
|
860
|
+
"""Done Keys.
|
|
861
|
+
|
|
862
|
+
Returns the keys used to indicate that an episode has ended.
|
|
863
|
+
The default done keys are "done", "terminated", and "truncated". These keys can be
|
|
864
|
+
used in the environment's output to signal the end of an episode.
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
A list of strings representing the done keys.
|
|
868
|
+
|
|
869
|
+
"""
|
|
870
|
+
done_keys = getattr(self, "_done_keys", None)
|
|
871
|
+
if done_keys is None:
|
|
872
|
+
self._done_keys = done_keys = ["done", "terminated", "truncated"]
|
|
873
|
+
return done_keys
|
|
874
|
+
|
|
875
|
+
@done_keys.setter
|
|
876
|
+
def done_keys(self, value):
|
|
877
|
+
self._done_keys = _make_list_of_nestedkeys(value, "done_keys")
|
|
878
|
+
|
|
879
|
+
@property
|
|
880
|
+
def reward_keys(self) -> list[NestedKey]:
|
|
881
|
+
"""Reward Keys.
|
|
882
|
+
|
|
883
|
+
Returns the keys used to retrieve rewards from the environment's output.
|
|
884
|
+
The default reward key is "reward".
|
|
885
|
+
|
|
886
|
+
Returns:
|
|
887
|
+
A list of strings or tuples representing the reward keys.
|
|
888
|
+
|
|
889
|
+
"""
|
|
890
|
+
reward_keys = getattr(self, "_reward_keys", None)
|
|
891
|
+
if reward_keys is None:
|
|
892
|
+
self._reward_keys = reward_keys = ["reward"]
|
|
893
|
+
return reward_keys
|
|
894
|
+
|
|
895
|
+
@reward_keys.setter
|
|
896
|
+
def reward_keys(self, value):
|
|
897
|
+
self._reward_keys = _make_list_of_nestedkeys(value, "reward_keys")
|
|
898
|
+
|
|
899
|
+
@property
|
|
900
|
+
def action_keys(self) -> list[NestedKey]:
|
|
901
|
+
"""Action Keys.
|
|
902
|
+
|
|
903
|
+
Returns the keys used to retrieve actions from the environment's input.
|
|
904
|
+
The default action key is "action".
|
|
905
|
+
|
|
906
|
+
Returns:
|
|
907
|
+
A list of strings or tuples representing the action keys.
|
|
908
|
+
|
|
909
|
+
"""
|
|
910
|
+
action_keys = getattr(self, "_action_keys", None)
|
|
911
|
+
if action_keys is None:
|
|
912
|
+
self._action_keys = action_keys = ["action"]
|
|
913
|
+
return action_keys
|
|
914
|
+
|
|
915
|
+
@action_keys.setter
|
|
916
|
+
def action_keys(self, value):
|
|
917
|
+
self._action_keys = _make_list_of_nestedkeys(value, "action_keys")
|
|
918
|
+
|
|
919
|
+
@property
|
|
920
|
+
def observation_keys(self) -> list[NestedKey]:
|
|
921
|
+
"""Observation Keys.
|
|
922
|
+
|
|
923
|
+
Returns the keys used to retrieve observations from the environment's output.
|
|
924
|
+
The default observation key is "observation".
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
A list of strings or tuples representing the observation keys.
|
|
928
|
+
"""
|
|
929
|
+
observation_keys = getattr(self, "_observation_keys", None)
|
|
930
|
+
if observation_keys is None:
|
|
931
|
+
self._observation_keys = observation_keys = ["observation"]
|
|
932
|
+
return observation_keys
|
|
933
|
+
|
|
934
|
+
@observation_keys.setter
|
|
935
|
+
def observation_keys(self, value):
|
|
936
|
+
self._observation_keys = _make_list_of_nestedkeys(value, "observation_keys")
|
|
937
|
+
|
|
938
|
+
@property
|
|
939
|
+
def excluded_keys(self) -> list[NestedKey] | None:
|
|
940
|
+
return self._excluded_keys
|
|
941
|
+
|
|
942
|
+
@excluded_keys.setter
|
|
943
|
+
def excluded_keys(self, value):
|
|
944
|
+
self._excluded_keys = _make_list_of_nestedkeys(value, "excluded_keys")
|
|
945
|
+
|
|
946
|
+
def get_keys_from_env(self, env: EnvBase):
|
|
947
|
+
"""Writes missing done, action and reward keys to the Forest given an environment.
|
|
948
|
+
|
|
949
|
+
Existing keys are not overwritten.
|
|
950
|
+
"""
|
|
951
|
+
if getattr(self, "_reward_keys", None) is None:
|
|
952
|
+
self.reward_keys = env.reward_keys
|
|
953
|
+
if getattr(self, "_done_keys", None) is None:
|
|
954
|
+
self.done_keys = env.done_keys
|
|
955
|
+
if getattr(self, "_action_keys", None) is None:
|
|
956
|
+
self.action_keys = env.action_keys
|
|
957
|
+
if getattr(self, "_observation_keys", None) is None:
|
|
958
|
+
self.observation_keys = env.observation_keys
|
|
959
|
+
|
|
960
|
+
@classmethod
|
|
961
|
+
def _write_fn_stack(cls, new, old=None):
|
|
962
|
+
# This function updates the old values by adding the new ones
|
|
963
|
+
# if and only if the new ones are not there.
|
|
964
|
+
# If the old value is not provided, we assume there are none and the
|
|
965
|
+
# `new` is just prepared.
|
|
966
|
+
# This involves unsqueezing the last dim (since we'll be stacking tensors
|
|
967
|
+
# and calling unique).
|
|
968
|
+
# The update involves calling cat along the last dim + unique
|
|
969
|
+
# which will keep only the new values that were unknown to
|
|
970
|
+
# the storage.
|
|
971
|
+
# We use this method to track all the indices that are associated with
|
|
972
|
+
# an observation. Every time a new index is obtained, it is stacked alongside
|
|
973
|
+
# the others.
|
|
974
|
+
if old is None:
|
|
975
|
+
# we unsqueeze the values to stack them along dim -1
|
|
976
|
+
result = new.apply(lambda x: x.unsqueeze(-1), filter_empty=False)
|
|
977
|
+
result.set(
|
|
978
|
+
"count", torch.ones(result.shape, dtype=torch.int, device=result.device)
|
|
979
|
+
)
|
|
980
|
+
else:
|
|
981
|
+
|
|
982
|
+
def cat(name, x, y):
|
|
983
|
+
if name == "count":
|
|
984
|
+
return x
|
|
985
|
+
if y.ndim < x.ndim:
|
|
986
|
+
y = y.unsqueeze(-1)
|
|
987
|
+
result = torch.cat([x, y], -1)
|
|
988
|
+
# Breaks on mps
|
|
989
|
+
if result.device.type == "mps":
|
|
990
|
+
result = result.cpu()
|
|
991
|
+
result = result.unique(dim=-1, sorted=False)
|
|
992
|
+
result = result.to("mps")
|
|
993
|
+
else:
|
|
994
|
+
result = result.unique(dim=-1, sorted=False)
|
|
995
|
+
return result
|
|
996
|
+
|
|
997
|
+
result = old.named_apply(cat, new, default=None)
|
|
998
|
+
result.set_("count", old.get("count") + 1)
|
|
999
|
+
return result
|
|
1000
|
+
|
|
1001
|
+
def _make_data_map(self, source, dest):
|
|
1002
|
+
try:
|
|
1003
|
+
kwargs = {}
|
|
1004
|
+
if self.max_size is not None:
|
|
1005
|
+
kwargs["max_size"] = self.max_size
|
|
1006
|
+
self.data_map = TensorDictMap.from_tensordict_pair(
|
|
1007
|
+
source,
|
|
1008
|
+
dest,
|
|
1009
|
+
in_keys=[*self.observation_keys, *self.action_keys],
|
|
1010
|
+
consolidated=self.consolidated,
|
|
1011
|
+
**kwargs,
|
|
1012
|
+
)
|
|
1013
|
+
if self.max_size is None:
|
|
1014
|
+
self.max_size = self.data_map.max_size
|
|
1015
|
+
except KeyError as err:
|
|
1016
|
+
raise KeyError(
|
|
1017
|
+
"A KeyError occurred during data map creation. This could be due to the wrong setting of a key in the MCTSForest constructor. Scroll up for more info."
|
|
1018
|
+
) from err
|
|
1019
|
+
|
|
1020
|
+
def _make_node_map(self, source, dest):
|
|
1021
|
+
kwargs = {}
|
|
1022
|
+
if self.max_size is not None:
|
|
1023
|
+
kwargs["max_size"] = self.max_size
|
|
1024
|
+
self.node_map = TensorDictMap.from_tensordict_pair(
|
|
1025
|
+
source,
|
|
1026
|
+
dest,
|
|
1027
|
+
in_keys=[*self.observation_keys],
|
|
1028
|
+
out_keys=[
|
|
1029
|
+
*self.data_map.query_module.out_keys, # hash and index
|
|
1030
|
+
# *self.action_keys,
|
|
1031
|
+
# *[("next", rk) for rk in self.reward_keys],
|
|
1032
|
+
"count",
|
|
1033
|
+
],
|
|
1034
|
+
storage_constructor=ListStorage,
|
|
1035
|
+
collate_fn=TensorDict.lazy_stack,
|
|
1036
|
+
write_fn=self._write_fn_stack,
|
|
1037
|
+
**kwargs,
|
|
1038
|
+
)
|
|
1039
|
+
if self.max_size is None:
|
|
1040
|
+
self.max_size = self.data_map.max_size
|
|
1041
|
+
|
|
1042
|
+
def extend(self, rollout, *, return_node: bool = False):
|
|
1043
|
+
"""Add a rollout to the forest.
|
|
1044
|
+
|
|
1045
|
+
Nodes are only added to a tree at points where rollouts diverge from
|
|
1046
|
+
each other and at the endpoints of rollouts.
|
|
1047
|
+
|
|
1048
|
+
If there is no existing tree that matches the first steps of the
|
|
1049
|
+
rollout, a new tree is added. Only one node is created, for the final
|
|
1050
|
+
step.
|
|
1051
|
+
|
|
1052
|
+
If there is an existing tree that matches, the rollout is added to that
|
|
1053
|
+
tree. If the rollout diverges from all other rollouts in the tree at
|
|
1054
|
+
some step, a new node is created before the step where the rollouts
|
|
1055
|
+
diverge, and a leaf node is created for the final step of the rollout.
|
|
1056
|
+
If all of the rollout's steps match with a previously added rollout,
|
|
1057
|
+
nothing changes. If the rollout matches up to a leaf node of a tree but
|
|
1058
|
+
continues beyond it, that node is extended to the end of the rollout,
|
|
1059
|
+
and no new nodes are created.
|
|
1060
|
+
|
|
1061
|
+
Args:
|
|
1062
|
+
rollout (TensorDict): The rollout to add to the forest.
|
|
1063
|
+
return_node (bool, optional): If ``True``, the method returns the
|
|
1064
|
+
added node. Default is ``False``.
|
|
1065
|
+
|
|
1066
|
+
Returns:
|
|
1067
|
+
Tree: The node that was added to the forest. This is only
|
|
1068
|
+
returned if ``return_node`` is True.
|
|
1069
|
+
|
|
1070
|
+
Examples:
|
|
1071
|
+
>>> from torchrl.data import MCTSForest
|
|
1072
|
+
>>> from tensordict import TensorDict
|
|
1073
|
+
>>> import torch
|
|
1074
|
+
>>> forest = MCTSForest()
|
|
1075
|
+
>>> r0 = TensorDict({
|
|
1076
|
+
... 'action': torch.tensor([1, 2, 3, 4, 5]),
|
|
1077
|
+
... 'next': {'observation': torch.tensor([123, 392, 989, 809, 847])},
|
|
1078
|
+
... 'observation': torch.tensor([ 0, 123, 392, 989, 809])
|
|
1079
|
+
... }, [5])
|
|
1080
|
+
>>> r1 = TensorDict({
|
|
1081
|
+
... 'action': torch.tensor([1, 2, 6, 7]),
|
|
1082
|
+
... 'next': {'observation': torch.tensor([123, 392, 235, 38])},
|
|
1083
|
+
... 'observation': torch.tensor([ 0, 123, 392, 235])
|
|
1084
|
+
... }, [4])
|
|
1085
|
+
>>> td_root = r0[0].exclude("next")
|
|
1086
|
+
>>> forest.extend(r0)
|
|
1087
|
+
>>> forest.extend(r1)
|
|
1088
|
+
>>> tree = forest.get_tree(td_root)
|
|
1089
|
+
>>> print(tree)
|
|
1090
|
+
Tree(
|
|
1091
|
+
count=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.int32, is_shared=False),
|
|
1092
|
+
index=Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False),
|
|
1093
|
+
node_data=TensorDict(
|
|
1094
|
+
fields={
|
|
1095
|
+
observation: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.int64, is_shared=False)},
|
|
1096
|
+
batch_size=torch.Size([]),
|
|
1097
|
+
device=cpu,
|
|
1098
|
+
is_shared=False),
|
|
1099
|
+
node_id=NonTensorData(data=0, batch_size=torch.Size([]), device=None),
|
|
1100
|
+
rollout=TensorDict(
|
|
1101
|
+
fields={
|
|
1102
|
+
action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False),
|
|
1103
|
+
next: TensorDict(
|
|
1104
|
+
fields={
|
|
1105
|
+
observation: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False)},
|
|
1106
|
+
batch_size=torch.Size([2]),
|
|
1107
|
+
device=cpu,
|
|
1108
|
+
is_shared=False),
|
|
1109
|
+
observation: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False)},
|
|
1110
|
+
batch_size=torch.Size([2]),
|
|
1111
|
+
device=cpu,
|
|
1112
|
+
is_shared=False),
|
|
1113
|
+
subtree=Tree(
|
|
1114
|
+
_parent=NonTensorStack(
|
|
1115
|
+
[<weakref at 0x716eeb78fbf0; to 'TensorDict' at 0x...,
|
|
1116
|
+
batch_size=torch.Size([2]),
|
|
1117
|
+
device=None),
|
|
1118
|
+
count=Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int32, is_shared=False),
|
|
1119
|
+
hash=NonTensorStack(
|
|
1120
|
+
[4341220243998689835, 6745467818783115365],
|
|
1121
|
+
batch_size=torch.Size([2]),
|
|
1122
|
+
device=None),
|
|
1123
|
+
node_data=LazyStackedTensorDict(
|
|
1124
|
+
fields={
|
|
1125
|
+
observation: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False)},
|
|
1126
|
+
exclusive_fields={
|
|
1127
|
+
},
|
|
1128
|
+
batch_size=torch.Size([2]),
|
|
1129
|
+
device=cpu,
|
|
1130
|
+
is_shared=False,
|
|
1131
|
+
stack_dim=0),
|
|
1132
|
+
node_id=NonTensorStack(
|
|
1133
|
+
[1, 2],
|
|
1134
|
+
batch_size=torch.Size([2]),
|
|
1135
|
+
device=None),
|
|
1136
|
+
rollout=LazyStackedTensorDict(
|
|
1137
|
+
fields={
|
|
1138
|
+
action: Tensor(shape=torch.Size([2, -1]), device=cpu, dtype=torch.int64, is_shared=False),
|
|
1139
|
+
next: LazyStackedTensorDict(
|
|
1140
|
+
fields={
|
|
1141
|
+
observation: Tensor(shape=torch.Size([2, -1]), device=cpu, dtype=torch.int64, is_shared=False)},
|
|
1142
|
+
exclusive_fields={
|
|
1143
|
+
},
|
|
1144
|
+
batch_size=torch.Size([2, -1]),
|
|
1145
|
+
device=cpu,
|
|
1146
|
+
is_shared=False,
|
|
1147
|
+
stack_dim=0),
|
|
1148
|
+
observation: Tensor(shape=torch.Size([2, -1]), device=cpu, dtype=torch.int64, is_shared=False)},
|
|
1149
|
+
exclusive_fields={
|
|
1150
|
+
},
|
|
1151
|
+
batch_size=torch.Size([2, -1]),
|
|
1152
|
+
device=cpu,
|
|
1153
|
+
is_shared=False,
|
|
1154
|
+
stack_dim=0),
|
|
1155
|
+
wins=Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
|
|
1156
|
+
index=None,
|
|
1157
|
+
subtree=None,
|
|
1158
|
+
specs=None,
|
|
1159
|
+
batch_size=torch.Size([2]),
|
|
1160
|
+
device=None,
|
|
1161
|
+
is_shared=False),
|
|
1162
|
+
wins=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
|
|
1163
|
+
hash=None,
|
|
1164
|
+
_parent=None,
|
|
1165
|
+
specs=None,
|
|
1166
|
+
batch_size=torch.Size([]),
|
|
1167
|
+
device=None,
|
|
1168
|
+
is_shared=False)
|
|
1169
|
+
"""
|
|
1170
|
+
source, dest = (
|
|
1171
|
+
rollout.exclude("next").copy(),
|
|
1172
|
+
rollout.select("next", *self.action_keys).copy(),
|
|
1173
|
+
)
|
|
1174
|
+
if self.excluded_keys is not None:
|
|
1175
|
+
dest = dest.exclude(*self.excluded_keys, inplace=True)
|
|
1176
|
+
dest.get("next").exclude(*self.excluded_keys, inplace=True)
|
|
1177
|
+
|
|
1178
|
+
if self.data_map is None:
|
|
1179
|
+
self._make_data_map(source, dest)
|
|
1180
|
+
|
|
1181
|
+
# We need to set the action somewhere to keep track of what action lead to what child
|
|
1182
|
+
# # Set the action in the 'next'
|
|
1183
|
+
# dest[1:] = source[:-1].exclude(*self.done_keys)
|
|
1184
|
+
|
|
1185
|
+
# Add ('observation', 'action') -> ('next, observation')
|
|
1186
|
+
self.data_map[source] = dest
|
|
1187
|
+
value = source
|
|
1188
|
+
if self.node_map is None:
|
|
1189
|
+
self._make_node_map(source, dest)
|
|
1190
|
+
# map ('observation',) -> ('indices',)
|
|
1191
|
+
self.node_map[source] = TensorDict.lazy_stack(value.unbind(0))
|
|
1192
|
+
if return_node:
|
|
1193
|
+
return self.get_tree(rollout)
|
|
1194
|
+
|
|
1195
|
+
def add(self, step, *, return_node: bool = False):
|
|
1196
|
+
source, dest = (
|
|
1197
|
+
step.exclude("next").copy(),
|
|
1198
|
+
step.select("next", *self.action_keys).copy(),
|
|
1199
|
+
)
|
|
1200
|
+
|
|
1201
|
+
if self.data_map is None:
|
|
1202
|
+
self._make_data_map(source, dest)
|
|
1203
|
+
|
|
1204
|
+
# We need to set the action somewhere to keep track of what action lead to what child
|
|
1205
|
+
# # Set the action in the 'next'
|
|
1206
|
+
# dest[1:] = source[:-1].exclude(*self.done_keys)
|
|
1207
|
+
|
|
1208
|
+
# Add ('observation', 'action') -> ('next, observation')
|
|
1209
|
+
self.data_map[source] = dest
|
|
1210
|
+
value = source
|
|
1211
|
+
if self.node_map is None:
|
|
1212
|
+
self._make_node_map(source, dest)
|
|
1213
|
+
# map ('observation',) -> ('indices',)
|
|
1214
|
+
self.node_map[source] = value
|
|
1215
|
+
if return_node:
|
|
1216
|
+
return self.get_tree(step)
|
|
1217
|
+
|
|
1218
|
+
def get_child(self, root: TensorDictBase) -> TensorDictBase:
|
|
1219
|
+
return self.data_map[root]
|
|
1220
|
+
|
|
1221
|
+
def _make_local_tree(
|
|
1222
|
+
self,
|
|
1223
|
+
root: TensorDictBase,
|
|
1224
|
+
index: torch.Tensor | None = None,
|
|
1225
|
+
compact: bool = True,
|
|
1226
|
+
) -> tuple[Tree, torch.Tensor | None, torch.Tensor | None]:
|
|
1227
|
+
root = root.select(*self.node_map.in_keys)
|
|
1228
|
+
node_meta = None
|
|
1229
|
+
if root in self.node_map:
|
|
1230
|
+
node_meta = self.node_map[root]
|
|
1231
|
+
if index is None:
|
|
1232
|
+
node_meta = self.node_map[root]
|
|
1233
|
+
index = node_meta["_index"]
|
|
1234
|
+
elif index is not None:
|
|
1235
|
+
pass
|
|
1236
|
+
else:
|
|
1237
|
+
return None
|
|
1238
|
+
steps = []
|
|
1239
|
+
while index.numel() <= 1:
|
|
1240
|
+
index = index.squeeze()
|
|
1241
|
+
d = self.data_map.storage[index]
|
|
1242
|
+
|
|
1243
|
+
# Rebuild rollout step
|
|
1244
|
+
steps.append(merge_tensordicts(d, root, callback_exist=lambda *x: None))
|
|
1245
|
+
d = d["next"]
|
|
1246
|
+
if d in self.node_map:
|
|
1247
|
+
root = d.select(*self.node_map.in_keys)
|
|
1248
|
+
node_meta = self.node_map[root]
|
|
1249
|
+
index = node_meta["_index"]
|
|
1250
|
+
if not compact:
|
|
1251
|
+
break
|
|
1252
|
+
else:
|
|
1253
|
+
# If the root is provided and not gathered from the storage, it could be that its
|
|
1254
|
+
# device doesn't match the data_map storage device.
|
|
1255
|
+
root = steps[-1]["next"].select(*self.node_map.in_keys)
|
|
1256
|
+
device = getattr(self.data_map.storage, "device", None)
|
|
1257
|
+
if root.device != device:
|
|
1258
|
+
if device is not None:
|
|
1259
|
+
root = root.to(self.data_map.storage.device)
|
|
1260
|
+
else:
|
|
1261
|
+
root.clear_device_()
|
|
1262
|
+
index = None
|
|
1263
|
+
break
|
|
1264
|
+
rollout = None
|
|
1265
|
+
if steps:
|
|
1266
|
+
rollout = torch.stack(steps, -1)
|
|
1267
|
+
# Will be populated later
|
|
1268
|
+
hash = node_meta["_hash"]
|
|
1269
|
+
return (
|
|
1270
|
+
Tree(
|
|
1271
|
+
rollout=rollout,
|
|
1272
|
+
count=torch.zeros((), dtype=torch.int32),
|
|
1273
|
+
wins=torch.zeros(()),
|
|
1274
|
+
node_data=root,
|
|
1275
|
+
index=index,
|
|
1276
|
+
hash=None,
|
|
1277
|
+
# We do this to avoid raising an exception as rollout and subtree must be provided together
|
|
1278
|
+
subtree=None,
|
|
1279
|
+
),
|
|
1280
|
+
index,
|
|
1281
|
+
hash,
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
# The recursive implementation is slower and less compatible with compile
|
|
1285
|
+
# def _make_tree(self, root: TensorDictBase, index: torch.Tensor|None=None)->Tree:
|
|
1286
|
+
# tree, indices = self._make_local_tree(root, index=index)
|
|
1287
|
+
# subtrees = []
|
|
1288
|
+
# if indices is not None:
|
|
1289
|
+
# for i in indices:
|
|
1290
|
+
# subtree = self._make_tree(tree.node, index=i)
|
|
1291
|
+
# subtrees.append(subtree)
|
|
1292
|
+
# subtrees = TensorDict.lazy_stack(subtrees)
|
|
1293
|
+
# tree.subtree = subtrees
|
|
1294
|
+
# return tree
|
|
1295
|
+
def _make_tree_iter(
|
|
1296
|
+
self, root, index=None, max_depth: int | None = None, compact: bool = True
|
|
1297
|
+
):
|
|
1298
|
+
q = deque()
|
|
1299
|
+
memo = {}
|
|
1300
|
+
tree, indices, hash = self._make_local_tree(root, index=index, compact=compact)
|
|
1301
|
+
tree.node_id = 0
|
|
1302
|
+
|
|
1303
|
+
result = tree
|
|
1304
|
+
depth = 0
|
|
1305
|
+
counter = 1
|
|
1306
|
+
if indices is not None:
|
|
1307
|
+
q.append((tree, indices, hash, depth))
|
|
1308
|
+
|
|
1309
|
+
while len(q):
|
|
1310
|
+
tree, indices, hash, depth = q.popleft()
|
|
1311
|
+
extend = max_depth is None or depth < max_depth
|
|
1312
|
+
subtrees = []
|
|
1313
|
+
for i, h in zip(indices, hash):
|
|
1314
|
+
# TODO: remove the .item()
|
|
1315
|
+
h = h.item()
|
|
1316
|
+
subtree, subtree_indices, subtree_hash = memo.get(h, (None,) * 3)
|
|
1317
|
+
if subtree is None:
|
|
1318
|
+
subtree, subtree_indices, subtree_hash = self._make_local_tree(
|
|
1319
|
+
tree.node_data,
|
|
1320
|
+
index=i,
|
|
1321
|
+
compact=compact,
|
|
1322
|
+
)
|
|
1323
|
+
subtree.node_id = counter
|
|
1324
|
+
counter += 1
|
|
1325
|
+
subtree.hash = h
|
|
1326
|
+
memo[h] = (subtree, subtree_indices, subtree_hash)
|
|
1327
|
+
else:
|
|
1328
|
+
# We just need to save the two (or more) rollouts
|
|
1329
|
+
subtree_bis, _, _ = self._make_local_tree(
|
|
1330
|
+
tree.node_data,
|
|
1331
|
+
index=i,
|
|
1332
|
+
compact=compact,
|
|
1333
|
+
)
|
|
1334
|
+
if subtree.rollout.ndim == subtree_bis.rollout.ndim:
|
|
1335
|
+
subtree.rollout = TensorDict.stack(
|
|
1336
|
+
[subtree.rollout, subtree_bis.rollout]
|
|
1337
|
+
)
|
|
1338
|
+
else:
|
|
1339
|
+
subtree.rollout = TensorDict.stack(
|
|
1340
|
+
[*subtree.rollout, subtree_bis.rollout]
|
|
1341
|
+
)
|
|
1342
|
+
|
|
1343
|
+
subtrees.append(subtree)
|
|
1344
|
+
if extend and subtree_indices is not None:
|
|
1345
|
+
q.append((subtree, subtree_indices, subtree_hash, depth + 1))
|
|
1346
|
+
subtrees = TensorDict.lazy_stack(subtrees)
|
|
1347
|
+
tree.subtree = subtrees
|
|
1348
|
+
|
|
1349
|
+
return result
|
|
1350
|
+
|
|
1351
|
+
def get_tree(
|
|
1352
|
+
self,
|
|
1353
|
+
root,
|
|
1354
|
+
*,
|
|
1355
|
+
max_depth: int | None = None,
|
|
1356
|
+
compact: bool = True,
|
|
1357
|
+
) -> Tree:
|
|
1358
|
+
return self._make_tree_iter(root=root, max_depth=max_depth, compact=compact)
|
|
1359
|
+
|
|
1360
|
+
@classmethod
|
|
1361
|
+
def valid_paths(cls, tree: Tree):
|
|
1362
|
+
yield from tree.valid_paths()
|
|
1363
|
+
|
|
1364
|
+
def __len__(self):
|
|
1365
|
+
return len(self.data_map)
|
|
1366
|
+
|
|
1367
|
+
def to_string(self, td_root, node_format_fn=lambda tree: tree.node_data.to_dict()):
|
|
1368
|
+
"""Generates a string representation of a tree in the forest.
|
|
1369
|
+
|
|
1370
|
+
This function can pull out information from each of the nodes in a tree,
|
|
1371
|
+
so it can be useful for debugging. The nodes are listed line-by-line.
|
|
1372
|
+
Each line contains the path to the node, followed by the string
|
|
1373
|
+
representation of that node generated with ``node_format_fn``. Each
|
|
1374
|
+
line is indented according to number of steps in the path required to
|
|
1375
|
+
get to the corresponding node.
|
|
1376
|
+
|
|
1377
|
+
Args:
|
|
1378
|
+
td_root (TensorDict): Root of the tree.
|
|
1379
|
+
|
|
1380
|
+
node_format_fn (Callable, optional): User-defined function to
|
|
1381
|
+
generate a string for each node of the tree. The signature must
|
|
1382
|
+
be ``(Tree) -> Any``, and the output must be convertible to a
|
|
1383
|
+
string. If this argument is not given, the generated string is
|
|
1384
|
+
the node's :attr:`Tree.node_data` attribute converted to a dict.
|
|
1385
|
+
|
|
1386
|
+
Examples:
|
|
1387
|
+
>>> from torchrl.data import MCTSForest
|
|
1388
|
+
>>> from tensordict import TensorDict
|
|
1389
|
+
>>> forest = MCTSForest()
|
|
1390
|
+
>>> td_root = TensorDict({"observation": 0,})
|
|
1391
|
+
>>> rollouts_data = [
|
|
1392
|
+
... # [(action, obs), ...]
|
|
1393
|
+
... [(3, 123), (1, 456)],
|
|
1394
|
+
... [(2, 359), (2, 3094)],
|
|
1395
|
+
... [(3, 123), (9, 392), (6, 989), (20, 809), (21, 847)],
|
|
1396
|
+
... [(1, 75)],
|
|
1397
|
+
... [(3, 123), (0, 948)],
|
|
1398
|
+
... [(2, 359), (2, 3094), (10, 68)],
|
|
1399
|
+
... [(2, 359), (2, 3094), (11, 9045)],
|
|
1400
|
+
... ]
|
|
1401
|
+
>>> for rollout_data in rollouts_data:
|
|
1402
|
+
... td = td_root.clone().unsqueeze(0)
|
|
1403
|
+
... for action, obs in rollout_data:
|
|
1404
|
+
... td = td.update(TensorDict({
|
|
1405
|
+
... "action": [action],
|
|
1406
|
+
... "next": TensorDict({"observation": [obs]}, [1]),
|
|
1407
|
+
... }, [1]))
|
|
1408
|
+
... forest.extend(td)
|
|
1409
|
+
... td = td["next"].clone()
|
|
1410
|
+
...
|
|
1411
|
+
>>> print(forest.to_string(td_root))
|
|
1412
|
+
(0,) {'observation': tensor(123)}
|
|
1413
|
+
(0, 0) {'observation': tensor(456)}
|
|
1414
|
+
(0, 1) {'observation': tensor(847)}
|
|
1415
|
+
(0, 2) {'observation': tensor(948)}
|
|
1416
|
+
(1,) {'observation': tensor(3094)}
|
|
1417
|
+
(1, 0) {'observation': tensor(68)}
|
|
1418
|
+
(1, 1) {'observation': tensor(9045)}
|
|
1419
|
+
(2,) {'observation': tensor(75)}
|
|
1420
|
+
"""
|
|
1421
|
+
tree = self.get_tree(td_root)
|
|
1422
|
+
return tree.to_string(node_format_fn)
|
|
1423
|
+
|
|
1424
|
+
|
|
1425
|
+
def _make_list_of_nestedkeys(obj: Any, attr: str) -> list[NestedKey]:
|
|
1426
|
+
if obj is None:
|
|
1427
|
+
return obj
|
|
1428
|
+
if isinstance(obj, (str, tuple)):
|
|
1429
|
+
return [obj]
|
|
1430
|
+
if not isinstance(obj, list):
|
|
1431
|
+
raise ValueError(
|
|
1432
|
+
f"{attr} must be a list of NestedKeys or a NestedKey, got {obj}."
|
|
1433
|
+
)
|
|
1434
|
+
return [unravel_key(key) for key in obj]
|