PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py ADDED Viewed

@@ -0,0 +1,231 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import pickle
+import time
+from pathlib import Path
+import numpy as np
+import ray
+import vmas
+from matplotlib import pyplot as plt
+from ray import tune
+from ray.rllib.agents.ppo import PPOTrainer
+from ray.rllib.algorithms.callbacks import DefaultCallbacks
+from ray.tune import register_env
+from torchrl._utils import logger as torchrl_logger
+from torchrl.collectors import SyncDataCollector
+from torchrl.envs.libs.vmas import VmasEnv
+from vmas import Wrapper
+def store_pickled_evaluation(name: str, evaluation: dict):
+    save_folder = f"{os.path.dirname(os.path.realpath(__file__))}"
+    file = f"{save_folder}/{name}.pkl"
+    pickle.dump(evaluation, open(file, "wb"))
+def load_pickled_evaluation(
+    name: str,
+):
+    save_folder = f"{os.path.dirname(os.path.realpath(__file__))}"
+    file = Path(f"{save_folder}/{name}.pkl")
+    if file.is_file():
+        return pickle.load(open(file, "rb"))
+    return None
+def run_vmas_torchrl(
+    scenario_name: str, n_envs: int, n_steps: int, device: str, seed: int = 0
+):
+    env = VmasEnv(
+        scenario_name,
+        device=device,
+        num_envs=n_envs,
+        continuous_actions=False,
+        seed=seed,
+    )
+    collector = SyncDataCollector(
+        env,
+        policy=None,
+        device=device,
+        frames_per_batch=n_envs * n_steps,
+        total_frames=n_envs * n_steps,
+    )
+    init_time = time.time()
+    for _data in collector:
+        pass
+    total_time = time.time() - init_time
+    collector.shutdown()
+    return total_time
+def run_vmas_rllib(
+    scenario_name: str, n_envs: int, n_steps: int, device: str, seed: int = 0
+):
+    class TimerCallback(DefaultCallbacks):
+        result_time = None
+        def on_train_result(
+            self,
+            *,
+            algorithm,
+            result: dict,
+            **kwargs,
+        ) -> None:
+            TimerCallback.result_time = (
+                result["timers"]["training_iteration_time_ms"]
+                - result["timers"]["learn_time_ms"]
+            )
+    def env_creator(config: dict):
+        env = vmas.make_env(
+            scenario=config["scenario_name"],
+            num_envs=config["num_envs"],
+            device=config["device"],
+            continuous_actions=False,
+            wrapper=Wrapper.RLLIB,
+        )
+        return env
+    if not ray.is_initialized():
+        ray.init()
+    register_env(scenario_name, lambda config: env_creator(config))
+    num_gpus = 0.5 if device == "cuda" else 0
+    num_gpus_per_worker = 0.5 if device == "cuda" else 0
+    tune.run(
+        PPOTrainer,
+        stop={"training_iteration": 1},
+        config={
+            "seed": seed,
+            "framework": "torch",
+            "env": scenario_name,
+            "train_batch_size": n_envs * n_steps,
+            "rollout_fragment_length": n_steps,
+            "sgd_minibatch_size": n_envs * n_steps,
+            "num_gpus": num_gpus,
+            "num_workers": 0,
+            "num_gpus_per_worker": num_gpus_per_worker,
+            "num_envs_per_worker": n_envs,
+            "batch_mode": "truncate_episodes",
+            "env_config": {
+                "device": device,
+                "num_envs": n_envs,
+                "scenario_name": scenario_name,
+                "max_steps": n_steps,
+            },
+            "callbacks": TimerCallback,
+        },
+    )
+    assert TimerCallback.result_time is not None
+    TimerCallback.result_time /= 1_000  # convert to seconds
+    return TimerCallback.result_time
+def run_comparison_torchrl_rllib(
+    scenario_name: str,
+    device: str,
+    n_steps: int = 100,
+    max_n_envs: int = 3000,
+    step_n_envs: int = 3,
+):
+    """
+    Args:
+        scenario_name (str): name of scenario to benchmark
+        device (str):  device to ron comparison on ("cpu" or "cuda")
+        n_steps (int):  number of environment steps
+        max_n_envs (int): the maximum number of parallel environments to test
+        step_n_envs (int): the step size in number of environments from 1 to max_n_envs
+    """
+    list_n_envs = np.linspace(1, max_n_envs, step_n_envs)
+    figure_name = f"VMAS_{scenario_name}_{n_steps}_{device}_steps_rllib_vs_torchrl"
+    figure_name_pkl = figure_name + f"_range_{1}_{max_n_envs}_num_{step_n_envs}"
+    evaluation = load_pickled_evaluation(figure_name_pkl)
+    if not evaluation:
+        evaluation = {}
+    for framework in ["TorchRL", "RLlib"]:
+        if framework not in evaluation.keys():
+            torchrl_logger.info(f"\nFramework {framework}")
+            vmas_times = []
+            for n_envs in list_n_envs:
+                n_envs = int(n_envs)
+                torchrl_logger.info(f"Running {n_envs} environments")
+                if framework == "TorchRL":
+                    vmas_times.append(
+                        (n_envs * n_steps)
+                        / run_vmas_torchrl(
+                            scenario_name=scenario_name,
+                            n_envs=n_envs,
+                            n_steps=n_steps,
+                            device=device,
+                        )
+                    )
+                else:
+                    vmas_times.append(
+                        (n_envs * n_steps)
+                        / run_vmas_rllib(
+                            scenario_name=scenario_name,
+                            n_envs=n_envs,
+                            n_steps=n_steps,
+                            device=device,
+                        )
+                    )
+                torchrl_logger.info(f"fps {vmas_times[-1]}s")
+            evaluation[framework] = vmas_times
+    store_pickled_evaluation(name=figure_name_pkl, evaluation=evaluation)
+    fig, ax = plt.subplots()
+    for key, item in evaluation.items():
+        ax.plot(
+            list_n_envs,
+            item,
+            label=key,
+        )
+    plt.xlabel("Number of batched environments", fontsize=14)
+    plt.ylabel("Frames per second", fontsize=14)
+    ax.legend(loc="upper left")
+    ax.set_title(
+        f"Execution time of '{scenario_name}' for {n_steps} steps on {device}.",
+        fontsize=8,
+    )
+    save_folder = os.path.dirname(os.path.realpath(__file__))
+    plt.savefig(f"{save_folder}/{figure_name}.pdf")
+if __name__ == "__main__":
+    # pip install matplotlib
+    # pip install "ray[rllib]"==2.1.0
+    # pip install torchrl
+    # pip install vmas
+    # pip install numpy==1.23.5
+    run_comparison_torchrl_rllib(
+        scenario_name="simple_spread",
+        device="cuda",
+        n_steps=100,
+        max_n_envs=30000,
+        step_n_envs=10,
+    )

benchmarks/requirements.txt ADDED Viewed

@@ -0,0 +1,7 @@
+pytest-benchmark
+tenacity
+safetensors
+tqdm
+pandas
+numpy
+matplotlib

benchmarks/storage/benchmark_sample_latency_over_rpc.py ADDED Viewed

@@ -0,0 +1,193 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Sample latency benchmarking (using RPC)
+======================================
+A rough benchmark of sample latency using different storage types over the network using `torch.rpc`.
+Run this script with --rank=0 and --rank=1 flags set in separate processes - these ranks correspond to the trainer worker and buffer worker respectively, and both need to be initialised.
+e.g. to benchmark LazyMemmapStorage, run the following commands using either two separate shells or multiprocessing.
+    - python3 benchmark_sample_latency_over_rpc.py --rank=0 --storage=LazyMemmapStorage
+    - python3 benchmark_sample_latency_over_rpc.py --rank=1 --storage=LazyMemmapStorage
+This code is based on examples/distributed/distributed_replay_buffer.py.
+"""
+import argparse
+import os
+import pickle
+import sys
+import time
+import timeit
+from datetime import datetime
+import torch
+import torch.distributed.rpc as rpc
+from tensordict import TensorDict
+from torchrl._utils import logger as torchrl_logger
+from torchrl.data.replay_buffers import RemoteTensorDictReplayBuffer
+from torchrl.data.replay_buffers.samplers import RandomSampler
+from torchrl.data.replay_buffers.storages import (
+    LazyMemmapStorage,
+    LazyTensorStorage,
+    ListStorage,
+)
+from torchrl.data.replay_buffers.writers import RoundRobinWriter
+RETRY_LIMIT = 2
+RETRY_DELAY_SECS = 3
+REPLAY_BUFFER_NODE = "ReplayBuffer"
+TRAINER_NODE = "Trainer"
+TENSOR_SIZE = 3 * 86 * 86
+BUFFER_SIZE = 1001
+BATCH_SIZE = 256
+REPEATS = 1000
+storage_options = {
+    "LazyMemmapStorage": LazyMemmapStorage,
+    "LazyTensorStorage": LazyTensorStorage,
+    "ListStorage": ListStorage,
+}
+storage_arg_options = {
+    "LazyMemmapStorage": {"scratch_dir": "/tmp/", "device": torch.device("cpu")},
+    "LazyTensorStorage": {},
+    "ListStorage": {},
+}
+parser = argparse.ArgumentParser(
+    description="RPC Replay Buffer Example",
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+)
+parser.add_argument(
+    "--rank",
+    type=int,
+    default=-1,
+    help="Node Rank [0 = Replay Buffer, 1 = Dummy Trainer, 2+ = Dummy Data Collector]",
+)
+parser.add_argument(
+    "--storage",
+    type=str,
+    default="LazyMemmapStorage",
+    help="Storage type [LazyMemmapStorage, LazyTensorStorage, ListStorage]",
+)
+class DummyTrainerNode:
+    def __init__(self) -> None:
+        self.id = rpc.get_worker_info().id
+        self.replay_buffer = self._create_replay_buffer()
+        self._ret = None
+    def train(self, batch_size: int) -> None:
+        start_time = timeit.default_timer()
+        ret = rpc.rpc_sync(
+            self.replay_buffer.owner(),
+            ReplayBufferNode.sample,
+            args=(self.replay_buffer, batch_size),
+        )
+        if storage_type == "ListStorage":
+            self._ret = ret[0]
+        else:
+            if self._ret is None:
+                self._ret = ret
+            else:
+                self._ret.update_(ret)
+        # make sure the content is read
+        self._ret["observation"] + 1
+        self._ret["next_observation"] + 1
+        return timeit.default_timer() - start_time
+    def _create_replay_buffer(self) -> rpc.RRef:
+        while True:
+            try:
+                replay_buffer_info = rpc.get_worker_info(REPLAY_BUFFER_NODE)
+                buffer_rref = rpc.remote(
+                    replay_buffer_info, ReplayBufferNode, args=(1000000,)
+                )
+                torchrl_logger.info(f"Connected to replay buffer {replay_buffer_info}")
+                return buffer_rref
+            except Exception:
+                torchrl_logger.info("Failed to connect to replay buffer")
+                time.sleep(RETRY_DELAY_SECS)
+class ReplayBufferNode(RemoteTensorDictReplayBuffer):
+    def __init__(self, capacity: int):
+        super().__init__(
+            storage=storage_options[storage_type](
+                max_size=capacity, **storage_arg_options[storage_type]
+            ),
+            sampler=RandomSampler(),
+            writer=RoundRobinWriter(),
+            collate_fn=lambda x: x,
+        )
+        tds = TensorDict(
+            {
+                "observation": torch.randn(
+                    BUFFER_SIZE,
+                    TENSOR_SIZE,
+                ),
+                "next_observation": torch.randn(
+                    BUFFER_SIZE,
+                    TENSOR_SIZE,
+                ),
+            },
+            batch_size=[BUFFER_SIZE],
+        )
+        self.extend(tds)
+if __name__ == "__main__":
+    args = parser.parse_args()
+    rank = args.rank
+    storage_type = args.storage
+    torchrl_logger.debug(f"RANK: {rank}; Storage: {storage_type}")
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = "29500"
+    os.environ["TORCH_DISTRIBUTED_DEBUG"] = "DETAIL"
+    options = rpc.TensorPipeRpcBackendOptions(
+        num_worker_threads=16, init_method="tcp://localhost:10002", rpc_timeout=120
+    )
+    if rank == 0:
+        # rank 0 is the trainer
+        rpc.init_rpc(
+            TRAINER_NODE,
+            rank=rank,
+            backend=rpc.BackendType.TENSORPIPE,
+            rpc_backend_options=options,
+        )
+        trainer = DummyTrainerNode()
+        results = []
+        for i in range(REPEATS):
+            result = trainer.train(batch_size=BATCH_SIZE)
+            if i == 0:
+                continue
+            results.append(result)
+            torchrl_logger.info(f"{i}, {results[-1]}")
+        with open(
+            f'./benchmark_{datetime.now().strftime("%d-%m-%Y%H:%M:%S")};batch_size={BATCH_SIZE};tensor_size={TENSOR_SIZE};repeat={REPEATS};storage={storage_type}.pkl',
+            "wb+",
+        ) as f:
+            pickle.dump(results, f)
+        tensor_results = torch.tensor(results)
+        torchrl_logger.info(f"Mean: {torch.mean(tensor_results)}")
+        breakpoint()
+    elif rank == 1:
+        # rank 1 is the replay buffer
+        # replay buffer waits passively for construction instructions from trainer node
+        rpc.init_rpc(
+            REPLAY_BUFFER_NODE,
+            rank=rank,
+            backend=rpc.BackendType.TENSORPIPE,
+            rpc_backend_options=options,
+        )
+        breakpoint()
+    else:
+        sys.exit(1)
+    rpc.shutdown()

benchmarks/test_collectors_benchmark.py ADDED Viewed

@@ -0,0 +1,240 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+import time
+import pytest
+import torch.cuda
+import tqdm
+from torchrl.collectors import (
+    MultiaSyncDataCollector,
+    MultiSyncDataCollector,
+    SyncDataCollector,
+)
+from torchrl.data import LazyTensorStorage, ReplayBuffer
+from torchrl.data.utils import CloudpickleWrapper
+from torchrl.envs import EnvCreator, GymEnv, ParallelEnv, StepCounter, TransformedEnv
+from torchrl.envs.libs.dm_control import DMControlEnv
+from torchrl.modules import RandomPolicy
+def single_collector_setup():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = TransformedEnv(DMControlEnv("cheetah", "run", device=device), StepCounter(50))
+    c = SyncDataCollector(
+        env,
+        RandomPolicy(env.action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+def sync_collector_setup():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = EnvCreator(
+        lambda: TransformedEnv(
+            DMControlEnv("cheetah", "run", device=device), StepCounter(50)
+        )
+    )
+    c = MultiSyncDataCollector(
+        [env, env],
+        RandomPolicy(env().action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+def async_collector_setup():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = EnvCreator(
+        lambda: TransformedEnv(
+            DMControlEnv("cheetah", "run", device=device), StepCounter(50)
+        )
+    )
+    c = MultiaSyncDataCollector(
+        [env, env],
+        RandomPolicy(env().action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+def single_collector_setup_pixels():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    # env = TransformedEnv(
+    #     DMControlEnv("cheetah", "run", device=device, from_pixels=True), StepCounter(50)
+    # )
+    env = TransformedEnv(GymEnv("ALE/Pong-v5"), StepCounter(50))
+    c = SyncDataCollector(
+        env,
+        RandomPolicy(env.action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+def sync_collector_setup_pixels():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = EnvCreator(
+        lambda: TransformedEnv(
+            # DMControlEnv("cheetah", "run", device=device, from_pixels=True),
+            GymEnv("ALE/Pong-v5"),
+            StepCounter(50),
+        )
+    )
+    c = MultiSyncDataCollector(
+        [env, env],
+        RandomPolicy(env().action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+def async_collector_setup_pixels():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = EnvCreator(
+        lambda: TransformedEnv(
+            # DMControlEnv("cheetah", "run", device=device, from_pixels=True),
+            GymEnv("ALE/Pong-v5"),
+            StepCounter(50),
+        )
+    )
+    c = MultiaSyncDataCollector(
+        [env, env],
+        RandomPolicy(env().action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+def execute_collector(c):
+    # will run for 9 iterations (1 during setup)
+    next(c)
+def test_single(benchmark):
+    (c,), _ = single_collector_setup()
+    benchmark(execute_collector, c)
+def test_sync(benchmark):
+    (c,), _ = sync_collector_setup()
+    benchmark(execute_collector, c)
+def test_async(benchmark):
+    (c,), _ = async_collector_setup()
+    benchmark(execute_collector, c)
+@pytest.mark.skipif(not torch.cuda.device_count(), reason="no rendering without cuda")
+def test_single_pixels(benchmark):
+    (c,), _ = single_collector_setup_pixels()
+    benchmark(execute_collector, c)
+@pytest.mark.skipif(not torch.cuda.device_count(), reason="no rendering without cuda")
+def test_sync_pixels(benchmark):
+    (c,), _ = sync_collector_setup_pixels()
+    benchmark(execute_collector, c)
+@pytest.mark.skipif(not torch.cuda.device_count(), reason="no rendering without cuda")
+def test_async_pixels(benchmark):
+    (c,), _ = async_collector_setup_pixels()
+    benchmark(execute_collector, c)
+class TestRBGCollector:
+    @pytest.mark.parametrize(
+        "n_col,n_wokrers_per_col",
+        [
+            [2, 2],
+            [4, 2],
+            [8, 2],
+            [16, 2],
+            [2, 1],
+            [4, 1],
+            [8, 1],
+            [16, 1],
+        ],
+    )
+    def test_multiasync_rb(self, n_col, n_wokrers_per_col):
+        make_env = EnvCreator(lambda: GymEnv("ALE/Pong-v5"))
+        if n_wokrers_per_col > 1:
+            make_env = ParallelEnv(n_wokrers_per_col, make_env)
+            env = make_env
+            policy = RandomPolicy(env.action_spec)
+        else:
+            env = make_env()
+            policy = RandomPolicy(env.action_spec)
+        storage = LazyTensorStorage(10_000)
+        rb = ReplayBuffer(storage=storage)
+        rb.extend(env.rollout(2, policy).reshape(-1))
+        rb.append_transform(CloudpickleWrapper(lambda x: x.reshape(-1)), invert=True)
+        fpb = n_wokrers_per_col * 100
+        total_frames = n_wokrers_per_col * 100_000
+        c = MultiaSyncDataCollector(
+            [make_env] * n_col,
+            policy,
+            frames_per_batch=fpb,
+            total_frames=total_frames,
+            replay_buffer=rb,
+        )
+        frames = 0
+        pbar = tqdm.tqdm(total=total_frames - (n_col * fpb))
+        for i, _ in enumerate(c):
+            if i == n_col:
+                t0 = time.time()
+            if i >= n_col:
+                frames += fpb
+            if i > n_col:
+                fps = frames / (time.time() - t0)
+                pbar.update(fpb)
+                pbar.set_description(f"fps: {fps: 4.4f}")
+if __name__ == "__main__":
+    args, unknown = argparse.ArgumentParser().parse_known_args()
+    pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)