PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-manylinux_2_28_aarch64.whl - Mend

torchrl 0.11.0__cp314-cp314-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (394) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-aarch64-linux-gnu.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +394 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/data/map/utils.py ADDED Viewed

@@ -0,0 +1,103 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from collections.abc import Callable
+from tensordict import NestedKey
+def _plot_plotly_tree(
+    tree: Tree, make_labels: Callable[[Tree], str] | None = None  # noqa: F821
+):
+    import plotly.graph_objects as go
+    from igraph import Graph
+    if make_labels is None:
+        def make_labels(tree, path, *args, **kwargs):
+            return str((tree.node_id, tree.hash))
+    nr_vertices = tree.num_vertices()
+    vertices = tree.vertices(key_type="path")
+    v_label = [make_labels(subtree, path) for path, subtree in vertices.items()]
+    G = Graph(nr_vertices, tree.edges())
+    layout = G.layout_sugiyama(range(nr_vertices))
+    position = {k: layout[k] for k in range(nr_vertices)}
+    # Y = [layout[k][1] for k in range(nr_vertices)]
+    # M = max(Y)
+    # es = EdgeSeq(G)  # sequence of edges
+    E = [e.tuple for e in G.es]  # list of edges
+    L = len(position)
+    Xn = [position[k][0] for k in range(L)]
+    # Yn = [2 * M - position[k][1] for k in range(L)]
+    Yn = [position[k][1] for k in range(L)]
+    Xe = []
+    Ye = []
+    for edge in E:
+        Xe += [position[edge[0]][0], position[edge[1]][0], None]
+        # Ye += [2 * M - position[edge[0]][1], 2 * M - position[edge[1]][1], None]
+        Ye += [position[edge[0]][1], position[edge[1]][1], None]
+    labels = v_label
+    fig = go.Figure()
+    fig.add_trace(
+        go.Scatter(
+            x=Xe,
+            y=Ye,
+            mode="lines",
+            line={"color": "rgb(210,210,210)", "width": 5},
+            hoverinfo="none",
+        )
+    )
+    fig.add_trace(
+        go.Scatter(
+            x=Xn,
+            y=Yn,
+            mode="markers+text",
+            name="bla",
+            marker={
+                "symbol": "circle-dot",
+                "size": 40,
+                "color": "#6175c1",  # '#DB4551',
+                "line": {"color": "rgb(50,50,50)", "width": 1},
+            },
+            text=labels,
+            hoverinfo="text",
+            textposition="middle right",
+            opacity=0.8,
+        )
+    )
+    fig.show()
+def _plot_plotly_box(tree: Tree, info: list[NestedKey] = None):  # noqa: F821
+    import plotly.graph_objects as go
+    if info is None:
+        info = ["hash", ("next", "reward")]
+    parents = [""]
+    labels = [tree._label(info, tree, root=True)]
+    _tree = tree
+    def extend(tree: Tree, parent):  # noqa: F821
+        children = tree.subtree
+        if children is None:
+            return
+        for child in children:
+            labels.append(tree._label(info, child))
+            parents.append(parent)
+            extend(child, labels[-1])
+    extend(_tree, labels[-1])
+    fig = go.Figure(go.Treemap(labels=labels, parents=parents))
+    fig.show()

torchrl/data/postprocs/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .postprocs import DensifyReward, MultiStep
+__all__ = ["MultiStep", "DensifyReward"]

torchrl/data/postprocs/postprocs.py ADDED Viewed

@@ -0,0 +1,391 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import torch
+from tensordict import NestedKey, TensorDictBase, unravel_key
+from tensordict.nn import TensorDictModuleBase
+from tensordict.utils import expand_right
+from torch import nn
+def _get_reward(
+    gamma: float,
+    reward: torch.Tensor,
+    done: torch.Tensor,
+    max_steps: int,
+):
+    """Sums the rewards up to max_steps in the future with a gamma decay.
+    Supports multiple consecutive trajectories.
+    Assumes that the time dimension is the *last* dim of reward and done.
+    """
+    filt = torch.tensor(
+        [gamma**i for i in range(max_steps + 1)],
+        device=reward.device,
+        dtype=reward.dtype,
+    ).view(1, 1, -1)
+    # make one done mask per trajectory
+    done_cumsum = done.cumsum(-1)
+    done_cumsum = torch.cat(
+        [torch.zeros_like(done_cumsum[..., :1]), done_cumsum[..., :-1]], -1
+    )
+    num_traj = done_cumsum.max().item() + 1
+    done_cumsum = done_cumsum.expand(num_traj, *done.shape)
+    traj_ids = done_cumsum == torch.arange(
+        num_traj, device=done.device, dtype=done_cumsum.dtype
+    ).view(num_traj, *[1 for _ in range(done_cumsum.ndim - 1)])
+    # an expanded reward tensor where each index along dim 0 is a different trajectory
+    # Note: rewards could have a different shape than done (e.g. multi-agent with a single
+    # done per group).
+    # we assume that reward has the same leading dimension as done.
+    if reward.shape != traj_ids.shape[1:]:
+        # We'll expand the ids on the right first
+        traj_ids_expand = expand_right(traj_ids, (num_traj, *reward.shape))
+        reward_traj = traj_ids_expand * reward
+        # we must make sure that the last dimension of the reward is the time
+        reward_traj = reward_traj.transpose(-1, traj_ids.ndim - 1)
+    else:
+        # simpler use case: reward shape and traj_ids match
+        reward_traj = traj_ids * reward
+    reward_traj = torch.nn.functional.pad(reward_traj, [0, max_steps], value=0.0)
+    shape = reward_traj.shape[:-1]
+    if len(shape) > 1:
+        reward_traj = reward_traj.flatten(0, reward_traj.ndim - 2)
+    reward_traj = reward_traj.unsqueeze(-2)
+    summed_rewards = torch.conv1d(reward_traj, filt)
+    summed_rewards = summed_rewards.squeeze(-2)
+    if len(shape) > 1:
+        summed_rewards = summed_rewards.unflatten(0, shape)
+    # let's check that our summed rewards have the right size
+    if reward.shape != traj_ids.shape[1:]:
+        summed_rewards = summed_rewards.transpose(-1, traj_ids.ndim - 1)
+        summed_rewards = (summed_rewards * traj_ids_expand).sum(0)
+    else:
+        summed_rewards = (summed_rewards * traj_ids).sum(0)
+    # time_to_obs is the tensor of the time delta to the next obs
+    # 0 = take the next obs (ie do nothing)
+    # 1 = take the obs after the next
+    time_to_obs = (
+        traj_ids.flip(-1).cumsum(-1).clamp_max(max_steps + 1).flip(-1) * traj_ids
+    )
+    time_to_obs = time_to_obs.sum(0)
+    time_to_obs = time_to_obs - 1
+    return summed_rewards, time_to_obs
+class MultiStep(nn.Module):
+    """Multistep reward transform.
+    Presented in
+    | Sutton, R. S. 1988. Learning to predict by the methods of temporal differences. Machine learning 3(1):9–44.
+    This module maps the "next" observation to the t + n "next" observation.
+    It is an identity transform whenever :attr:`n_steps` is 0.
+    Args:
+        gamma (:obj:`float`): Discount factor for return computation
+        n_steps (integer): maximum look-ahead steps.
+    .. note:: This class is meant to be used within a ``DataCollector``.
+        It will only treat the data passed to it at the end of a collection,
+        and ignore data preceding that collection or coming in the next batch.
+        As such, results on the last steps of the batch may likely be biased
+        by the early truncation of the trajectory.
+        To mitigate this effect, please use :class:`~torchrl.envs.transforms.MultiStepTransform`
+        within the replay buffer instead.
+    Examples:
+        >>> from torchrl.modules import RandomPolicy        >>>         >>> from torchrl.collectors import Collector
+        >>> from torchrl.data.postprocs import MultiStep
+        >>> from torchrl.envs import GymEnv, TransformedEnv, StepCounter
+        >>> env = TransformedEnv(GymEnv("CartPole-v1"), StepCounter())
+        >>> env.set_seed(0)
+        >>> collector = Collector(env, policy=RandomPolicy(env.action_spec),
+        ...     frames_per_batch=10, total_frames=2000, postproc=MultiStep(n_steps=4, gamma=0.99))
+        >>> for data in collector:
+        ...     break
+        >>> print(data["step_count"])
+        tensor([[0],
+                [1],
+                [2],
+                [3],
+                [4],
+                [5],
+                [6],
+                [7],
+                [8],
+                [9]])
+        >>> # the next step count is shifted by 3 steps in the future
+        >>> print(data["next", "step_count"])
+        tensor([[ 5],
+                [ 6],
+                [ 7],
+                [ 8],
+                [ 9],
+                [10],
+                [10],
+                [10],
+                [10],
+                [10]])
+    """
+    def __init__(
+        self,
+        gamma: float,
+        n_steps: int,
+    ):
+        super().__init__()
+        if n_steps <= 0:
+            raise ValueError("n_steps must be a non-negative integer.")
+        if not (gamma > 0 and gamma <= 1):
+            raise ValueError(f"got out-of-bounds gamma decay: gamma={gamma}")
+        self.gamma = gamma
+        self.n_steps = n_steps
+        self.register_buffer(
+            "gammas",
+            torch.tensor(
+                [gamma**i for i in range(n_steps + 1)],
+                dtype=torch.float,
+            ).reshape(1, 1, -1),
+        )
+        self.done_key = "done"
+        self.done_keys = ("done", "terminated", "truncated")
+        self.reward_keys = ("reward",)
+        self.mask_key = ("collector", "mask")
+    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
+        """Re-writes a tensordict following the multi-step transform.
+        Args:
+            tensordict: :class:`tensordict.TensorDictBase` instance with
+                ``[*Batch x Time-steps] shape.
+                The TensorDict must contain a ``("next", "reward")`` and
+                ``("next", "done")`` keys.
+                All keys that are contained within the "next" nested tensordict
+                will be shifted by (at most) :attr:`~.n_steps` frames.
+                The TensorDict will also be updated with new key-value pairs:
+                - gamma: indicating the discount to be used for the next
+                  reward;
+                - nonterminal: boolean value indicating whether a step is
+                  non-terminal (not done or not last of trajectory);
+                - original_reward: previous reward collected in the
+                  environment (i.e. before multi-step);
+                - The "reward" values will be replaced by the newly computed
+                  rewards.
+                The ``"done"`` key can have either the shape of the tensordict
+                OR the shape of the tensordict followed by a singleton
+                dimension OR the shape of the tensordict followed by other
+                dimensions. In the latter case, the tensordict *must* be
+                compatible with a reshape that follows the done shape (ie. the
+                leading dimensions of every tensor it contains must match the
+                shape of the ``"done"`` entry).
+                The ``"reward"`` tensor can have either the shape of the
+                tensordict (or done state) or this shape followed by a singleton
+                dimension.
+        Returns:
+            in-place transformation of the input tensordict.
+        """
+        return _multi_step_func(
+            tensordict,
+            done_key=self.done_key,
+            done_keys=self.done_keys,
+            reward_keys=self.reward_keys,
+            mask_key=self.mask_key,
+            n_steps=self.n_steps,
+            gamma=self.gamma,
+        )
+def _multi_step_func(
+    tensordict,
+    *,
+    done_key,
+    done_keys,
+    reward_keys,
+    mask_key,
+    n_steps,
+    gamma,
+):
+    # in accordance with common understanding of what n_steps should be
+    n_steps = n_steps - 1
+    tensordict = tensordict.clone(False)
+    done = tensordict.get(("next", done_key))
+    # we'll be using the done states to index the tensordict.
+    # if the shapes don't match we're in trouble.
+    ndim = tensordict.ndim
+    if done.shape != tensordict.shape:
+        if done.shape[-1] == 1 and done.shape[:-1] == tensordict.shape:
+            done = done.squeeze(-1)
+        else:
+            try:
+                # let's try to reshape the tensordict
+                tensordict.batch_size = done.shape
+                tensordict = tensordict.transpose(ndim - 1, tensordict.ndim - 1)
+                done = tensordict.get(("next", done_key))
+            except Exception as err:
+                raise RuntimeError(
+                    "tensordict shape must be compatible with the done's shape "
+                    "(trailing singleton dimension excluded)."
+                ) from err
+    if mask_key is not None:
+        mask = tensordict.get(mask_key, None)
+    else:
+        mask = None
+    *batch, T = tensordict.batch_size
+    summed_rewards = []
+    for reward_key in reward_keys:
+        reward = tensordict.get(("next", reward_key))
+        # sum rewards
+        summed_reward, time_to_obs = _get_reward(gamma, reward, done, n_steps)
+        summed_rewards.append(summed_reward)
+    idx_to_gather = torch.arange(
+        T, device=time_to_obs.device, dtype=time_to_obs.dtype
+    ).expand(*batch, T)
+    idx_to_gather = idx_to_gather + time_to_obs
+    # idx_to_gather looks like  tensor([[ 2,  3,  4,  5,  5,  5,  8,  9, 10, 10, 10]])
+    # with a done state         tensor([[ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1]])
+    # meaning that the first obs will be replaced by the third, the second by the fourth etc.
+    # The fifth remains the fifth as it is terminal
+    tensordict_gather = (
+        tensordict.get("next")
+        .exclude(*reward_keys, *done_keys)
+        .gather(-1, idx_to_gather)
+    )
+    tensordict.set("steps_to_next_obs", time_to_obs + 1)
+    for reward_key, summed_reward in zip(reward_keys, summed_rewards):
+        tensordict.rename_key_(("next", reward_key), ("next", "original_reward"))
+        tensordict.set(("next", reward_key), summed_reward)
+    tensordict.get("next").update(tensordict_gather)
+    tensordict.set("gamma", gamma ** (time_to_obs + 1))
+    nonterminal = time_to_obs != 0
+    if mask is not None:
+        mask = mask.view(*batch, T)
+        nonterminal[~mask] = False
+    tensordict.set("nonterminal", nonterminal)
+    if tensordict.ndim != ndim:
+        tensordict = tensordict.apply(
+            lambda x: x.transpose(ndim - 1, tensordict.ndim - 1),
+            batch_size=done.transpose(ndim - 1, tensordict.ndim - 1).shape,
+        )
+        tensordict.batch_size = tensordict.batch_size[:ndim]
+    return tensordict
+class DensifyReward(TensorDictModuleBase):
+    """A util to reassign the reward at done state to the rest of the trajectory.
+    This transform is to be used with sparse rewards to assign a reward to each step of a trajectory when only the
+    reward at `done` is non-null.
+    .. note:: The class calls the :func:`~torchrl.objectives.value.functional.reward2go` function, which will
+        also sum intermediate rewards. Make sure you understand what the `reward2go` function returns before using
+        this module.
+    Args:
+        reward_key (NestedKey, optional): The key in the input TensorDict where the reward is stored.
+            Defaults to `"reward"`.
+        done_key (NestedKey, optional): The key in the input TensorDict where the done flag is stored.
+            Defaults to `"done"`.
+        reward_key_out (NestedKey | None, optional): The key in the output TensorDict where the reassigned reward
+            will be stored. If None, it defaults to the value of `reward_key`.
+            Defaults to `None`.
+        time_dim (int, optional): The dimension in the input TensorDict where the time is unrolled.
+            Defaults to `2`.
+        discount (float, optional): The discount factor to use for computing the discounted cumulative sum of rewards.
+            Defaults to `1.0` (no discounting).
+    Returns:
+        TensorDict: The input TensorDict with the reassigned reward stored under the key specified by `reward_key_out`.
+    Examples:
+        >>> import torch
+        >>> from tensordict import TensorDict
+        >>>
+        >>> from torchrl.data import DensifyReward
+        >>>
+        >>> # Create a sample TensorDict
+        >>> tensordict = TensorDict({
+        ...     "next": {
+        ...         "reward": torch.zeros(10, 1),
+        ...         "done": torch.zeros(10, 1, dtype=torch.bool)
+        ...     }
+        ... }, batch_size=[10])
+        >>> # Set some done flags and rewards
+        >>> tensordict["next", "done"][[3, 7]] = True
+        >>> tensordict["next", "reward"][3] = 3
+        >>> tensordict["next", "reward"][7] = 7
+        >>> # Create an instance of LastRewardToTraj
+        >>> last_reward_to_traj = DensifyReward()
+        >>> # Apply the transform
+        >>> new_tensordict = last_reward_to_traj(tensordict)
+        >>> # Print the reassigned rewards
+        >>> print(new_tensordict["next", "reward"])
+        tensor([[3.],
+                [3.],
+                [3.],
+                [3.],
+                [7.],
+                [7.],
+                [7.],
+                [7.],
+                [0.],
+                [0.]])
+    """
+    def __init__(
+        self,
+        *,
+        reward_key: NestedKey = "reward",
+        done_key: NestedKey = "done",
+        reward_key_out: NestedKey | None = None,
+        time_dim: int = 2,
+        discount: float = 1.0,
+    ):
+        from torchrl.objectives.value.functional import reward2go
+        super().__init__()
+        self.in_keys = [unravel_key(reward_key), unravel_key(done_key)]
+        if reward_key_out is None:
+            reward_key_out = reward_key
+        self.out_keys = [unravel_key(reward_key_out)]
+        self.time_dim = time_dim
+        self.discount = discount
+        self.reward2go = reward2go
+    def forward(self, tensordict):
+        # Get done
+        done = tensordict.get(("next", self.in_keys[1]))
+        # Get reward
+        reward = tensordict.get(("next", self.in_keys[0]))
+        if reward.shape != done.shape:
+            raise RuntimeError(
+                f"reward and done state are expected to have the same shape. Got reard.shape={reward.shape} "
+                f"and done.shape={done.shape}."
+            )
+        reward = self.reward2go(reward, done, time_dim=-2, gamma=self.discount)
+        tensordict.set(("next", self.out_keys[0]), reward)
+        return tensordict

torchrl/data/replay_buffers/__init__.py ADDED Viewed

@@ -0,0 +1,99 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .checkpointers import (
+    CompressedListStorageCheckpointer,
+    FlatStorageCheckpointer,
+    H5StorageCheckpointer,
+    ListStorageCheckpointer,
+    NestedStorageCheckpointer,
+    StorageCheckpointerBase,
+    StorageEnsembleCheckpointer,
+    TensorStorageCheckpointer,
+)
+from .ray_buffer import RayReplayBuffer
+from .replay_buffers import (
+    PrioritizedReplayBuffer,
+    RemoteTensorDictReplayBuffer,
+    ReplayBuffer,
+    ReplayBufferEnsemble,
+    TensorDictPrioritizedReplayBuffer,
+    TensorDictReplayBuffer,
+)
+from .samplers import (
+    PrioritizedSampler,
+    PrioritizedSliceSampler,
+    RandomSampler,
+    Sampler,
+    SamplerEnsemble,
+    SamplerWithoutReplacement,
+    SliceSampler,
+    SliceSamplerWithoutReplacement,
+)
+from .storages import (
+    CompressedListStorage,
+    LazyMemmapStorage,
+    LazyStackStorage,
+    LazyTensorStorage,
+    ListStorage,
+    Storage,
+    StorageEnsemble,
+    TensorStorage,
+)
+from .utils import Flat2TED, H5Combine, H5Split, Nested2TED, TED2Flat, TED2Nested
+from .writers import (
+    ImmutableDatasetWriter,
+    RoundRobinWriter,
+    TensorDictMaxValueWriter,
+    TensorDictRoundRobinWriter,
+    Writer,
+    WriterEnsemble,
+)
+__all__ = [
+    "CompressedListStorage",
+    "CompressedListStorageCheckpointer",
+    "FlatStorageCheckpointer",
+    "H5StorageCheckpointer",
+    "ListStorageCheckpointer",
+    "NestedStorageCheckpointer",
+    "StorageCheckpointerBase",
+    "StorageEnsembleCheckpointer",
+    "TensorStorageCheckpointer",
+    "RayReplayBuffer",
+    "PrioritizedReplayBuffer",
+    "RemoteTensorDictReplayBuffer",
+    "ReplayBuffer",
+    "ReplayBufferEnsemble",
+    "TensorDictPrioritizedReplayBuffer",
+    "TensorDictReplayBuffer",
+    "PrioritizedSampler",
+    "PrioritizedSliceSampler",
+    "RandomSampler",
+    "Sampler",
+    "SamplerEnsemble",
+    "SamplerWithoutReplacement",
+    "SliceSampler",
+    "SliceSamplerWithoutReplacement",
+    "LazyMemmapStorage",
+    "LazyStackStorage",
+    "LazyTensorStorage",
+    "ListStorage",
+    "Storage",
+    "StorageEnsemble",
+    "TensorStorage",
+    "Flat2TED",
+    "H5Combine",
+    "H5Split",
+    "Nested2TED",
+    "TED2Flat",
+    "TED2Nested",
+    "ImmutableDatasetWriter",
+    "RoundRobinWriter",
+    "TensorDictMaxValueWriter",
+    "TensorDictRoundRobinWriter",
+    "Writer",
+    "WriterEnsemble",
+]