PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314t-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

benchmarks/test_replaybuffer_benchmark.py ADDED Viewed

@@ -0,0 +1,254 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+import functools
+import pytest
+import torch
+from tensordict import TensorDict
+from torchrl.data import (
+    LazyMemmapStorage,
+    LazyTensorStorage,
+    ListStorage,
+    ReplayBuffer,
+    TensorDictPrioritizedReplayBuffer,
+    TensorDictReplayBuffer,
+)
+from torchrl.data.replay_buffers import (
+    RandomSampler,
+    SamplerWithoutReplacement,
+    SliceSampler,
+)
+_TensorDictPrioritizedReplayBuffer = functools.partial(
+    TensorDictPrioritizedReplayBuffer, alpha=1, beta=0.9
+)
+# preserve the name of the class even after partial
+_TensorDictPrioritizedReplayBuffer.__name__ = TensorDictPrioritizedReplayBuffer.__name__
+class create_rb:
+    def __init__(self, rb, storage, sampler, populated, size=1_000_000):
+        self.storage = storage
+        self.rb = rb
+        self.sampler = sampler
+        self.populated = populated
+        self.size = size
+    def __call__(self):
+        kwargs = {"batch_size": 256}
+        if self.sampler is not None:
+            kwargs["sampler"] = self.sampler()
+        if self.storage is not None:
+            kwargs["storage"] = self.storage(self.size)
+        rb = self.rb(**kwargs)
+        data = TensorDict(
+            {
+                "a": torch.zeros(self.size, 5),
+                ("b", "c"): torch.zeros(self.size, 3, 32, 32, dtype=torch.uint8),
+            },
+            batch_size=[self.size],
+        )
+        if "sampler" in kwargs and isinstance(kwargs["sampler"], SliceSampler):
+            data["traj"] = torch.arange(self.size) // 123
+        if self.populated:
+            rb.extend(data)
+            return ((rb,), {})
+        else:
+            return ((rb, data), {})
+def populate(rb, td):
+    rb.extend(td)
+def sample(rb):
+    rb.sample()
+def iterate(rb):
+    next(rb)
+@pytest.mark.parametrize(
+    "rb,storage,sampler,size",
+    [
+        [TensorDictReplayBuffer, ListStorage, RandomSampler, 4000],
+        [TensorDictReplayBuffer, LazyMemmapStorage, RandomSampler, 10_000],
+        [TensorDictReplayBuffer, LazyTensorStorage, RandomSampler, 10_000],
+        [TensorDictReplayBuffer, ListStorage, SamplerWithoutReplacement, 4000],
+        [TensorDictReplayBuffer, LazyMemmapStorage, SamplerWithoutReplacement, 10_000],
+        [TensorDictReplayBuffer, LazyTensorStorage, SamplerWithoutReplacement, 10_000],
+        [
+            TensorDictReplayBuffer,
+            LazyMemmapStorage,
+            functools.partial(SliceSampler, num_slices=8, traj_key="traj"),
+            10_000,
+        ],
+        [
+            TensorDictReplayBuffer,
+            LazyTensorStorage,
+            functools.partial(SliceSampler, num_slices=8, traj_key="traj"),
+            10_000,
+        ],
+        [_TensorDictPrioritizedReplayBuffer, ListStorage, None, 4000],
+        [_TensorDictPrioritizedReplayBuffer, LazyMemmapStorage, None, 10_000],
+        [_TensorDictPrioritizedReplayBuffer, LazyTensorStorage, None, 10_000],
+    ],
+)
+def test_rb_sample(benchmark, rb, storage, sampler, size):
+    (rb,), _ = create_rb(
+        rb=rb,
+        storage=storage,
+        sampler=sampler,
+        populated=True,
+        size=size,
+    )()
+    torch.manual_seed(0)
+    benchmark(sample, rb)
+def infinite_iter(obj):
+    torch.manual_seed(0)
+    while True:
+        yield from iter(obj)
+@pytest.mark.parametrize(
+    "rb,storage,sampler,size",
+    [
+        [TensorDictReplayBuffer, ListStorage, RandomSampler, 4000],
+        [TensorDictReplayBuffer, LazyMemmapStorage, RandomSampler, 10_000],
+        [TensorDictReplayBuffer, LazyTensorStorage, RandomSampler, 10_000],
+        [TensorDictReplayBuffer, ListStorage, SamplerWithoutReplacement, 4000],
+        [TensorDictReplayBuffer, LazyMemmapStorage, SamplerWithoutReplacement, 10_000],
+        [TensorDictReplayBuffer, LazyTensorStorage, SamplerWithoutReplacement, 10_000],
+        [_TensorDictPrioritizedReplayBuffer, ListStorage, None, 4000],
+        [_TensorDictPrioritizedReplayBuffer, LazyMemmapStorage, None, 10_000],
+        [_TensorDictPrioritizedReplayBuffer, LazyTensorStorage, None, 10_000],
+    ],
+)
+def test_rb_iterate(benchmark, rb, storage, sampler, size):
+    (rb,), _ = create_rb(
+        rb=rb,
+        storage=storage,
+        sampler=sampler,
+        populated=True,
+        size=size,
+    )()
+    benchmark(iterate, infinite_iter(rb))
+@pytest.mark.parametrize(
+    "rb,storage,sampler,size",
+    [
+        [TensorDictReplayBuffer, ListStorage, RandomSampler, 400],
+        [TensorDictReplayBuffer, LazyMemmapStorage, RandomSampler, 400],
+        [TensorDictReplayBuffer, LazyTensorStorage, RandomSampler, 400],
+        [TensorDictReplayBuffer, ListStorage, SamplerWithoutReplacement, 400],
+        [TensorDictReplayBuffer, LazyMemmapStorage, SamplerWithoutReplacement, 400],
+        [TensorDictReplayBuffer, LazyTensorStorage, SamplerWithoutReplacement, 400],
+        [_TensorDictPrioritizedReplayBuffer, ListStorage, None, 400],
+        [_TensorDictPrioritizedReplayBuffer, LazyMemmapStorage, None, 400],
+        [_TensorDictPrioritizedReplayBuffer, LazyTensorStorage, None, 400],
+    ],
+)
+def test_rb_populate(benchmark, rb, storage, sampler, size):
+    benchmark.pedantic(
+        populate,
+        setup=create_rb(
+            rb=rb,
+            storage=storage,
+            sampler=sampler,
+            populated=False,
+            size=size,
+        ),
+        iterations=1,
+        rounds=50,
+    )
+class create_compiled_tensor_rb:
+    def __init__(
+        self, rb, storage, sampler, storage_size, data_size, iters, compilable=False
+    ):
+        self.storage = storage
+        self.rb = rb
+        self.sampler = sampler
+        self.storage_size = storage_size
+        self.data_size = data_size
+        self.iters = iters
+        self.compilable = compilable
+    def __call__(self):
+        kwargs = {}
+        if self.sampler is not None:
+            kwargs["sampler"] = self.sampler()
+        if self.storage is not None:
+            kwargs["storage"] = self.storage(
+                self.storage_size, compilable=self.compilable
+            )
+        rb = self.rb(batch_size=3, compilable=self.compilable, **kwargs)
+        data = torch.randn(self.data_size, 1)
+        return ((rb, data, self.iters), {})
+def extend_and_sample(rb, td, iters):
+    for _ in range(iters):
+        rb.extend(td)
+        rb.sample()
+def extend_and_sample_compiled(rb, td, iters):
+    @torch.compile
+    def fn(td):
+        rb.extend(td)
+        rb.sample()
+    for _ in range(iters):
+        fn(td)
+@pytest.mark.parametrize(
+    "rb,storage,sampler,storage_size,data_size,iters,compiled",
+    [
+        [ReplayBuffer, LazyTensorStorage, RandomSampler, 10_000, 10_000, 100, True],
+        [ReplayBuffer, LazyTensorStorage, RandomSampler, 10_000, 10_000, 100, False],
+        [ReplayBuffer, LazyTensorStorage, RandomSampler, 100_000, 10_000, 100, True],
+        [ReplayBuffer, LazyTensorStorage, RandomSampler, 100_000, 10_000, 100, False],
+        [ReplayBuffer, LazyTensorStorage, RandomSampler, 1_000_000, 10_000, 100, True],
+        [ReplayBuffer, LazyTensorStorage, RandomSampler, 1_000_000, 10_000, 100, False],
+    ],
+)
+def test_rb_extend_sample(
+    benchmark, rb, storage, sampler, storage_size, data_size, iters, compiled
+):
+    if compiled:
+        torch._dynamo.reset_code_caches()
+    benchmark.pedantic(
+        extend_and_sample_compiled if compiled else extend_and_sample,
+        setup=create_compiled_tensor_rb(
+            rb=rb,
+            storage=storage,
+            sampler=sampler,
+            storage_size=storage_size,
+            data_size=data_size,
+            iters=iters,
+            compilable=compiled,
+        ),
+        iterations=1,
+        warmup_rounds=10,
+        rounds=50,
+    )
+if __name__ == "__main__":
+    args, unknown = argparse.ArgumentParser().parse_known_args()
+    pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)

sota-check/README.md ADDED Viewed

@@ -0,0 +1,35 @@
+# SOTA Performance checks
+This folder contains a `submitit-release-check.sh` file that executes all
+the training scripts using `sbatch` with the default configuration and long them
+into a common WandB project.
+This script is to be executed before every release to assess the performance of
+the various algorithms available in torchrl. The name of the project will include
+the specific commit of torchrl used to run the scripts (e.g. `torchrl-examples-check-<commit>`).
+## Usage
+To display the script usage, you can use the `--help` option:
+```bash
+./submitit-release-check.sh --help
+```
+## Setup
+The following setup should allow you to run the scripts:
+```bash
+export MUJOCO_GL=egl
+conda create -n rl-sota-bench python=3.10 -y
+conda install anaconda::libglu -y
+pip3 install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
+pip3 install "gymnasium[atari,mujoco]" vmas tqdm wandb pygame "moviepy<2.0.0" imageio submitit hydra-core transformers
+cd /path/to/tensordict
+python setup.py develop
+cd /path/to/torchrl
+python setup.py develop
+```

sota-implementations/README.md ADDED Viewed

@@ -0,0 +1,142 @@
+# Examples
+We provide examples to train the following algorithms:
+- [CQL](../sota-implementations/cql/)
+- [DDPG](ddpg/ddpg.py)
+- [DQN](../sota-implementations/dqn/)
+- [Decision Transformers](../sota-implementations/decision_transformer)
+- [Discrete SAC](discrete_sac/discrete_sac.py)
+- [Dreamer](../sota-implementations/dreamer)
+- [IQL](iql/)
+- [Impala](impala/)
+- [PPO](../sota-implementations/ppo/)
+- [REDQ](redq/redq.py)
+- [SAC](sac/sac.py)
+- [TD3](../sota-implementations/td3/td3.py)
+- [Various multiagent examples](multiagent/)
+To run these examples, make sure you have installed hydra:
+```
+pip install hydra-core
+```
+Scripts can be run from the directory of interest using:
+```
+python sac.py
+```
+or similar. Hyperparameters can be easily changed by providing the arguments to hydra:
+```
+python sac.py collector.frames_per_batch=63
+```
+[//]: # (# Results)
+[//]: # ()
+[//]: # (Here we can see some results for the SAC and REDQ algorithm.)
+[//]: # (We average the results over 5 different seeds and plot the standard error.)
+[//]: # (## Gym's HalfCheetah-v4)
+[//]: # ()
+[//]: # (<p align="center">)
+[//]: # (<img src="media/halfcheetah_chart.png" width="600px">)
+[//]: # (</p>)
+[//]: # (To reproduce a single run:)
+[//]: # ()
+[//]: # (```)
+[//]: # (python sac/sac.py env.name="HalfCheetah-v4" env.task="" env.library="gym")
+[//]: # (```)
+[//]: # ()
+[//]: # (``` )
+[//]: # (python redq/redq.py env.name="HalfCheetah-v4" env.library="gymnasium")
+[//]: # (```)
+[//]: # ()
+[//]: # ()
+[//]: # (## dm_control's cheetah-run)
+[//]: # ()
+[//]: # (<p align="center">)
+[//]: # (<img src="media/cheetah_chart.png" width="600px">)
+[//]: # (</p>)
+[//]: # (To reproduce a single run:)
+[//]: # ()
+[//]: # (```)
+[//]: # (python sac/sac.py env.name="cheetah" env.task="run" env.library="dm_control")
+[//]: # (```)
+[//]: # ()
+[//]: # (``` )
+[//]: # (python redq/redq.py env.name="cheetah" env.task="run" env.library="dm_control")
+[//]: # (```)
+[//]: # ()
+[//]: # ([//]: # &#40;TODO: adapt these scripts&#41;)
+[//]: # ([//]: # &#40;## Gym's Ant-v4&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;<p align="center">&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;<img src="media/ant_chart.png" width="600px">&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;</p>&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;To reproduce a single run:&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;```&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;python sac/sac.py env.name="Ant-v4" env.task="" env.library="gym"&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;```&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;``` &#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;python redq/redq.py env_name="Ant-v4" env_task="" env_library="gym"&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;```&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;## Gym's Walker2D-v4&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;<p align="center">&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;<img src="media/walker2d_chart.png" width="600px">&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;</p>&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;To reproduce a single run:&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;```&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;python sac/sac.py env_name="Walker2D-v4" env_task="" env_library="gym"&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;```&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;&#41;)
+[//]: # ([//]: # &#40;``` &#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;python redq/redq.py env_name="Walker2D-v4" env_task="" env_library="gym"&#41;)
+[//]: # ()
+[//]: # ([//]: # &#40;```&#41;)

sota-implementations/a2c/README.md ADDED Viewed

@@ -0,0 +1,39 @@
+## Reproducing Advantage Actor Critic (A2C) Algorithm Results
+This repository contains scripts that enable training agents using the Advantage Actor Critic (A2C) Algorithm on MuJoCo and Atari environments. We follow the original paper [Asynchronous Methods for Deep Reinforcement Learning](https://arxiv.org/abs/1602.01783) by Mnih et al. (2016) to implement the A2C algorithm but fix the number of steps during the collection phase.
+## Examples Structure
+Please note that each example is independent of each other for the sake of simplicity. Each example contains the following files:
+1. **Main Script:** The definition of algorithm components and the training loop can be found in the main script  (e.g. a2c_atari.py).
+2. **Utils File:** A utility file is provided to contain various helper functions, generally to create the environment and the models (e.g. utils_atari.py).
+3. **Configuration File:** This file includes default hyperparameters specified in the original paper. Users can modify these hyperparameters to customize their experiments  (e.g. config_atari.yaml).
+## Running the Examples
+You can execute the A2C algorithm on Atari environments by running the following command:
+```bash
+python a2c_atari.py compile.compile=1 compile.cudagraphs=1
+```
+You can execute the A2C algorithm on MuJoCo environments by running the following command:
+```bash
+python a2c_mujoco.py compile.compile=1 compile.cudagraphs=1
+```
+## Runtimes
+Runtimes when executed on H100:
+| Environment | Eager     | Compile   | Compile+cudagraphs |
+|-------------|-----------|-----------|--------------------|
+| MUJOCO      | < 25 mins | < 23 mins | < 20 mins          |
+| ATARI       | < 85 mins | < 60 mins | < 45 mins          |