PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/trainers/algorithms/configs/collectors.py ADDED Viewed

@@ -0,0 +1,216 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from dataclasses import dataclass, field
+from functools import partial
+from typing import Any
+from omegaconf import MISSING
+from torchrl.trainers.algorithms.configs.common import ConfigBase
+from torchrl.trainers.algorithms.configs.envs import EnvConfig
+@dataclass
+class BaseCollectorConfig(ConfigBase):
+    """Parent class to configure a data collector."""
+@dataclass
+class CollectorConfig(BaseCollectorConfig):
+    """A class to configure a synchronous data collector (Collector)."""
+    create_env_fn: ConfigBase = MISSING
+    policy: Any = None
+    policy_factory: Any = None
+    frames_per_batch: int | None = None
+    total_frames: int = -1
+    init_random_frames: int | None = 0
+    device: str | None = None
+    storing_device: str | None = None
+    policy_device: str | None = None
+    env_device: str | None = None
+    create_env_kwargs: dict | None = None
+    max_frames_per_traj: int | None = None
+    reset_at_each_iter: bool = False
+    postproc: Any = None
+    split_trajs: bool = False
+    exploration_type: str = "RANDOM"
+    return_same_td: bool = False
+    interruptor: Any = None
+    set_truncated: bool = False
+    use_buffers: bool = False
+    replay_buffer: Any = None
+    extend_buffer: bool = False
+    trust_policy: bool = True
+    compile_policy: Any = None
+    cudagraph_policy: Any = None
+    no_cuda_sync: bool = False
+    weight_updater: Any = None
+    weight_sync_schemes: Any = None
+    track_policy_version: bool = False
+    local_init_rb: bool = False
+    _target_: str = "torchrl.collectors.Collector"
+    _partial_: bool = False
+    def __post_init__(self):
+        self.create_env_fn._partial_ = True
+        if self.policy_factory is not None:
+            self.policy_factory._partial_ = True
+        if self.weight_updater is not None:
+            self.weight_updater._partial_ = True
+# Legacy alias
+SyncDataCollectorConfig = CollectorConfig
+@dataclass
+class AsyncCollectorConfig(BaseCollectorConfig):
+    """Configuration for asynchronous data collector (AsyncCollector)."""
+    create_env_fn: ConfigBase = field(
+        default_factory=partial(EnvConfig, _partial_=True)
+    )
+    policy: Any = None
+    policy_factory: Any = None
+    frames_per_batch: int | None = None
+    init_random_frames: int | None = 0
+    total_frames: int = -1
+    device: str | None = None
+    storing_device: str | None = None
+    policy_device: str | None = None
+    env_device: str | None = None
+    create_env_kwargs: dict | None = None
+    max_frames_per_traj: int | None = None
+    reset_at_each_iter: bool = False
+    postproc: ConfigBase | None = None
+    split_trajs: bool = False
+    exploration_type: str = "RANDOM"
+    set_truncated: bool = False
+    use_buffers: bool = False
+    replay_buffer: ConfigBase | None = None
+    extend_buffer: bool = False
+    trust_policy: bool = True
+    compile_policy: Any = None
+    cudagraph_policy: Any = None
+    no_cuda_sync: bool = False
+    weight_updater: Any = None
+    weight_sync_schemes: Any = None
+    track_policy_version: bool = False
+    local_init_rb: bool = False
+    _target_: str = "torchrl.collectors.AsyncCollector"
+    _partial_: bool = False
+    def __post_init__(self):
+        self.create_env_fn._partial_ = True
+        if self.policy_factory is not None:
+            self.policy_factory._partial_ = True
+        if self.weight_updater is not None:
+            self.weight_updater._partial_ = True
+# Legacy alias
+AsyncDataCollectorConfig = AsyncCollectorConfig
+@dataclass
+class MultiSyncCollectorConfig(BaseCollectorConfig):
+    """Configuration for multi-synchronous data collector (MultiSyncCollector)."""
+    create_env_fn: Any = MISSING
+    num_workers: int | None = None
+    policy: Any = None
+    policy_factory: Any = None
+    frames_per_batch: int | None = None
+    init_random_frames: int | None = 0
+    total_frames: int = -1
+    device: str | None = None
+    storing_device: str | None = None
+    policy_device: str | None = None
+    env_device: str | None = None
+    create_env_kwargs: dict | None = None
+    max_frames_per_traj: int | None = None
+    reset_at_each_iter: bool = False
+    postproc: ConfigBase | None = None
+    split_trajs: bool = False
+    exploration_type: str = "RANDOM"
+    set_truncated: bool = False
+    use_buffers: bool = False
+    replay_buffer: ConfigBase | None = None
+    extend_buffer: bool = False
+    trust_policy: bool = True
+    compile_policy: Any = None
+    cudagraph_policy: Any = None
+    no_cuda_sync: bool = False
+    weight_updater: Any = None
+    weight_sync_schemes: Any = None
+    track_policy_version: bool = False
+    local_init_rb: bool = False
+    _target_: str = "torchrl.collectors.MultiSyncCollector"
+    _partial_: bool = False
+    def __post_init__(self):
+        for env_cfg in self.create_env_fn:
+            env_cfg._partial_ = True
+        if self.policy_factory is not None:
+            self.policy_factory._partial_ = True
+        if self.weight_updater is not None:
+            self.weight_updater._partial_ = True
+# Legacy alias
+MultiSyncCollectorConfig = MultiSyncCollectorConfig
+@dataclass
+class MultiAsyncCollectorConfig(BaseCollectorConfig):
+    """Configuration for multi-asynchronous data collector (MultiAsyncCollector)."""
+    create_env_fn: Any = MISSING
+    num_workers: int | None = None
+    policy: Any = None
+    policy_factory: Any = None
+    frames_per_batch: int | None = None
+    init_random_frames: int | None = 0
+    total_frames: int = -1
+    device: str | None = None
+    storing_device: str | None = None
+    policy_device: str | None = None
+    env_device: str | None = None
+    create_env_kwargs: dict | None = None
+    max_frames_per_traj: int | None = None
+    reset_at_each_iter: bool = False
+    postproc: ConfigBase | None = None
+    split_trajs: bool = False
+    exploration_type: str = "RANDOM"
+    set_truncated: bool = False
+    use_buffers: bool = False
+    replay_buffer: ConfigBase | None = None
+    extend_buffer: bool = False
+    trust_policy: bool = True
+    compile_policy: Any = None
+    cudagraph_policy: Any = None
+    no_cuda_sync: bool = False
+    weight_updater: Any = None
+    weight_sync_schemes: Any = None
+    track_policy_version: bool = False
+    local_init_rb: bool = False
+    _target_: str = "torchrl.collectors.MultiAsyncCollector"
+    _partial_: bool = False
+    def __post_init__(self):
+        for env_cfg in self.create_env_fn:
+            env_cfg._partial_ = True
+        if self.policy_factory is not None:
+            self.policy_factory._partial_ = True
+        if self.weight_updater is not None:
+            self.weight_updater._partial_ = True
+# Legacy alias
+MultiAsyncCollectorConfig = MultiAsyncCollectorConfig

torchrl/trainers/algorithms/configs/common.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from omegaconf import DictConfig
+@dataclass
+class ConfigBase(ABC):
+    """Abstract base class for all configuration classes.
+    This class serves as the foundation for all configuration classes in the
+    configurable configuration system, providing a common interface and structure.
+    """
+    @abstractmethod
+    def __post_init__(self) -> None:
+        """Post-initialization hook for configuration classes."""
+@dataclass
+class Config:
+    """A flexible config that allows arbitrary fields."""
+    def __init__(self, **kwargs):
+        self._config = DictConfig(kwargs)
+    def __getattr__(self, name):
+        return getattr(self._config, name)
+    def __setattr__(self, name, value):
+        if name == "_config":
+            super().__setattr__(name, value)
+        else:
+            setattr(self._config, name, value)

torchrl/trainers/algorithms/configs/data.py ADDED Viewed

@@ -0,0 +1,308 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from omegaconf import MISSING
+from torchrl.trainers.algorithms.configs.common import ConfigBase
+@dataclass
+class WriterConfig(ConfigBase):
+    """Base configuration class for replay buffer writers."""
+    _target_: str = "torchrl.data.replay_buffers.Writer"
+    def __post_init__(self) -> None:
+        """Post-initialization hook for writer configurations."""
+@dataclass
+class RoundRobinWriterConfig(WriterConfig):
+    """Configuration for round-robin writer that distributes data across multiple storages."""
+    _target_: str = "torchrl.data.replay_buffers.RoundRobinWriter"
+    compilable: bool = False
+    def __post_init__(self) -> None:
+        """Post-initialization hook for round-robin writer configurations."""
+        super().__post_init__()
+@dataclass
+class SamplerConfig(ConfigBase):
+    """Base configuration class for replay buffer samplers."""
+    _target_: str = "torchrl.data.replay_buffers.Sampler"
+    def __post_init__(self) -> None:
+        """Post-initialization hook for sampler configurations."""
+@dataclass
+class RandomSamplerConfig(SamplerConfig):
+    """Configuration for random sampling from replay buffer."""
+    _target_: str = "torchrl.data.replay_buffers.RandomSampler"
+    def __post_init__(self) -> None:
+        """Post-initialization hook for random sampler configurations."""
+        super().__post_init__()
+@dataclass
+class WriterEnsembleConfig(WriterConfig):
+    """Configuration for ensemble writer that combines multiple writers."""
+    _target_: str = "torchrl.data.replay_buffers.WriterEnsemble"
+    writers: list[Any] = field(default_factory=list)
+    p: Any = None
+@dataclass
+class TensorDictMaxValueWriterConfig(WriterConfig):
+    """Configuration for TensorDict max value writer."""
+    _target_: str = "torchrl.data.replay_buffers.TensorDictMaxValueWriter"
+    rank_key: Any = None
+    reduction: str = "sum"
+@dataclass
+class TensorDictRoundRobinWriterConfig(WriterConfig):
+    """Configuration for TensorDict round-robin writer."""
+    _target_: str = "torchrl.data.replay_buffers.TensorDictRoundRobinWriter"
+    compilable: bool = False
+@dataclass
+class ImmutableDatasetWriterConfig(WriterConfig):
+    """Configuration for immutable dataset writer."""
+    _target_: str = "torchrl.data.replay_buffers.ImmutableDatasetWriter"
+@dataclass
+class SamplerEnsembleConfig(SamplerConfig):
+    """Configuration for ensemble sampler that combines multiple samplers."""
+    _target_: str = "torchrl.data.replay_buffers.SamplerEnsemble"
+    samplers: list[Any] = field(default_factory=list)
+    p: Any = None
+@dataclass
+class PrioritizedSliceSamplerConfig(SamplerConfig):
+    """Configuration for prioritized slice sampling from replay buffer."""
+    num_slices: int | None = None
+    slice_len: int | None = None
+    end_key: Any = None
+    traj_key: Any = None
+    ends: Any = None
+    trajectories: Any = None
+    cache_values: bool = False
+    truncated_key: Any = ("next", "truncated")
+    strict_length: bool = True
+    compile: Any = False
+    span: Any = False
+    use_gpu: Any = False
+    max_capacity: int | None = None
+    alpha: float | None = None
+    beta: float | None = None
+    eps: float | None = None
+    reduction: str | None = None
+    _target_: str = "torchrl.data.replay_buffers.PrioritizedSliceSampler"
+@dataclass
+class SliceSamplerWithoutReplacementConfig(SamplerConfig):
+    """Configuration for slice sampling without replacement."""
+    _target_: str = "torchrl.data.replay_buffers.SliceSamplerWithoutReplacement"
+    num_slices: int | None = None
+    slice_len: int | None = None
+    end_key: Any = None
+    traj_key: Any = None
+    ends: Any = None
+    trajectories: Any = None
+    cache_values: bool = False
+    truncated_key: Any = ("next", "truncated")
+    strict_length: bool = True
+    compile: Any = False
+    span: Any = False
+    use_gpu: Any = False
+@dataclass
+class SliceSamplerConfig(SamplerConfig):
+    """Configuration for slice sampling from replay buffer."""
+    _target_: str = "torchrl.data.replay_buffers.SliceSampler"
+    num_slices: int | None = None
+    slice_len: int | None = None
+    end_key: Any = None
+    traj_key: Any = None
+    ends: Any = None
+    trajectories: Any = None
+    cache_values: bool = False
+    truncated_key: Any = ("next", "truncated")
+    strict_length: bool = True
+    compile: Any = False
+    span: Any = False
+    use_gpu: Any = False
+@dataclass
+class PrioritizedSamplerConfig(SamplerConfig):
+    """Configuration for prioritized sampling from replay buffer."""
+    max_capacity: int | None = None
+    alpha: float | None = None
+    beta: float | None = None
+    eps: float | None = None
+    reduction: str | None = None
+    _target_: str = "torchrl.data.replay_buffers.PrioritizedSampler"
+@dataclass
+class SamplerWithoutReplacementConfig(SamplerConfig):
+    """Configuration for sampling without replacement."""
+    _target_: str = "torchrl.data.replay_buffers.SamplerWithoutReplacement"
+    drop_last: bool = False
+    shuffle: bool = True
+@dataclass
+class StorageConfig(ConfigBase):
+    """Base configuration class for replay buffer storage."""
+    _partial_: bool = False
+    _target_: str = "torchrl.data.replay_buffers.Storage"
+    def __post_init__(self) -> None:
+        """Post-initialization hook for storage configurations."""
+@dataclass
+class TensorStorageConfig(StorageConfig):
+    """Configuration for tensor-based storage in replay buffer."""
+    _target_: str = "torchrl.data.replay_buffers.TensorStorage"
+    max_size: int | None = None
+    storage: Any = None
+    device: Any = None
+    ndim: int | None = None
+    compilable: bool = False
+    def __post_init__(self) -> None:
+        """Post-initialization hook for tensor storage configurations."""
+        super().__post_init__()
+@dataclass
+class ListStorageConfig(StorageConfig):
+    """Configuration for list-based storage in replay buffer."""
+    _target_: str = "torchrl.data.replay_buffers.ListStorage"
+    max_size: int | None = None
+    compilable: bool = False
+@dataclass
+class StorageEnsembleWriterConfig(StorageConfig):
+    """Configuration for storage ensemble writer."""
+    _target_: str = "torchrl.data.replay_buffers.StorageEnsembleWriter"
+    writers: list[Any] = MISSING
+    transforms: list[Any] = MISSING
+@dataclass
+class LazyStackStorageConfig(StorageConfig):
+    """Configuration for lazy stack storage."""
+    _target_: str = "torchrl.data.replay_buffers.LazyStackStorage"
+    max_size: int | None = None
+    compilable: bool = False
+    stack_dim: int = 0
+@dataclass
+class StorageEnsembleConfig(StorageConfig):
+    """Configuration for storage ensemble."""
+    _target_: str = "torchrl.data.replay_buffers.StorageEnsemble"
+    storages: list[Any] = MISSING
+    transforms: list[Any] = MISSING
+@dataclass
+class LazyMemmapStorageConfig(StorageConfig):
+    """Configuration for lazy memory-mapped storage."""
+    _target_: str = "torchrl.data.replay_buffers.LazyMemmapStorage"
+    max_size: int | None = None
+    device: Any = None
+    ndim: int = 1
+    compilable: bool = False
+    shared_init: bool = False
+@dataclass
+class LazyTensorStorageConfig(StorageConfig):
+    """Configuration for lazy tensor storage."""
+    _target_: str = "torchrl.data.replay_buffers.LazyTensorStorage"
+    max_size: int | None = None
+    device: Any = None
+    ndim: int = 1
+    compilable: bool = False
+    shared_init: bool = False
+@dataclass
+class ReplayBufferBaseConfig(ConfigBase):
+    """Base configuration class for replay buffers."""
+    _partial_: bool = False
+    def __post_init__(self) -> None:
+        """Post-initialization hook for replay buffer configurations."""
+@dataclass
+class TensorDictReplayBufferConfig(ReplayBufferBaseConfig):
+    """Configuration for TensorDict-based replay buffer."""
+    _target_: str = "torchrl.data.replay_buffers.TensorDictReplayBuffer"
+    sampler: Any = None
+    storage: Any = None
+    writer: Any = None
+    transform: Any = None
+    batch_size: int | None = None
+    def __post_init__(self) -> None:
+        """Post-initialization hook for TensorDict replay buffer configurations."""
+        super().__post_init__()
+@dataclass
+class ReplayBufferConfig(ReplayBufferBaseConfig):
+    """Configuration for generic replay buffer."""
+    _target_: str = "torchrl.data.replay_buffers.ReplayBuffer"
+    sampler: Any = None
+    storage: Any = None
+    writer: Any = None
+    transform: Any = None
+    batch_size: int | None = None
+    shared: bool = False

torchrl/trainers/algorithms/configs/envs.py ADDED Viewed

@@ -0,0 +1,104 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from omegaconf import MISSING
+from torchrl.envs.common import EnvBase
+from torchrl.trainers.algorithms.configs.common import ConfigBase
+@dataclass
+class EnvConfig(ConfigBase):
+    """Base configuration class for environments."""
+    _partial_: bool = False
+    def __post_init__(self) -> None:
+        """Post-initialization hook for environment configurations."""
+        self._partial_ = False
+@dataclass
+class BatchedEnvConfig(EnvConfig):
+    """Configuration for batched environments."""
+    create_env_fn: Any = MISSING
+    num_workers: int = 1
+    create_env_kwargs: dict = field(default_factory=dict)
+    batched_env_type: str = "parallel"
+    device: str | None = None
+    # batched_env_type: Literal["parallel", "serial", "async"] = "parallel"
+    _target_: str = "torchrl.trainers.algorithms.configs.envs.make_batched_env"
+    def __post_init__(self) -> None:
+        """Post-initialization hook for batched environment configurations."""
+        super().__post_init__()
+        if hasattr(self.create_env_fn, "_partial_"):
+            self.create_env_fn._partial_ = True
+@dataclass
+class TransformedEnvConfig(EnvConfig):
+    """Configuration for transformed environments."""
+    base_env: Any = MISSING
+    transform: Any = None
+    cache_specs: bool = True
+    auto_unwrap: bool | None = None
+    _target_: str = "torchrl.envs.TransformedEnv"
+def make_batched_env(
+    create_env_fn, num_workers, batched_env_type="parallel", device=None, **kwargs
+):
+    """Create a batched environment.
+    Args:
+        create_env_fn: Function to create individual environments or environment instance.
+        num_workers: Number of worker environments.
+        batched_env_type: Type of batched environment (parallel, serial, async).
+        device: Device to place the batched environment on.
+        **kwargs: Additional keyword arguments.
+    Returns:
+        The created batched environment instance.
+    """
+    from torchrl.envs import AsyncEnvPool, ParallelEnv, SerialEnv
+    if create_env_fn is None:
+        raise ValueError("create_env_fn must be provided")
+    if num_workers is None:
+        raise ValueError("num_workers must be provided")
+    # If create_env_fn is a config object, create a lambda that instantiates it each time
+    if isinstance(create_env_fn, EnvBase):
+        # Already an instance (either instantiated config or actual env), wrap in lambda
+        env_instance = create_env_fn
+        def env_fn(env_instance=env_instance):
+            return env_instance
+    else:
+        env_fn = create_env_fn
+    assert callable(env_fn), env_fn
+    # Add device to kwargs if provided
+    if device is not None:
+        kwargs["device"] = device
+    if batched_env_type == "parallel":
+        return ParallelEnv(num_workers, env_fn, **kwargs)
+    elif batched_env_type == "serial":
+        return SerialEnv(num_workers, env_fn, **kwargs)
+    elif batched_env_type == "async":
+        return AsyncEnvPool([env_fn] * num_workers, **kwargs)
+    else:
+        raise ValueError(f"Unknown batched_env_type: {batched_env_type}")