PyPI - torchrl - Versions diffs - 0.10.1__cp313-cp313-win_amd64.whl → 0.11.0__cp313-cp313-win_amd64.whl - Mend

torchrl 0.10.1__cp313-cp313-win_amd64.whl → 0.11.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

benchmarks/ecosystem/gym_env_throughput.py +1 -1
benchmarks/storage/benchmark_sample_latency_over_rpc.py +1 -1
benchmarks/test_collectors_benchmark.py +1 -1
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +22 -0
sota-implementations/a2c/a2c_atari.py +6 -8
sota-implementations/a2c/a2c_mujoco.py +6 -8
sota-implementations/cql/cql_offline.py +4 -8
sota-implementations/cql/cql_online.py +4 -8
sota-implementations/cql/discrete_cql_offline.py +4 -5
sota-implementations/cql/discrete_cql_online.py +4 -8
sota-implementations/crossq/crossq.py +6 -8
sota-implementations/ddpg/ddpg.py +9 -16
sota-implementations/decision_transformer/dt.py +4 -8
sota-implementations/decision_transformer/online_dt.py +4 -8
sota-implementations/dqn/dqn_atari.py +2 -8
sota-implementations/dqn/dqn_cartpole.py +2 -8
sota-implementations/dreamer/README.md +128 -6
sota-implementations/dreamer/dreamer.py +408 -159
sota-implementations/dreamer/dreamer_utils.py +456 -76
sota-implementations/expert-iteration/ei_utils.py +24 -28
sota-implementations/expert-iteration/expert-iteration-async.py +29 -26
sota-implementations/expert-iteration/expert-iteration-sync.py +25 -28
sota-implementations/expert-iteration/requirements_gsm8k.txt +2 -2
sota-implementations/expert-iteration/requirements_ifeval.txt +2 -2
sota-implementations/gail/gail.py +6 -10
sota-implementations/grpo/grpo-async.py +26 -22
sota-implementations/grpo/grpo-sync.py +21 -20
sota-implementations/grpo/grpo_utils.py +40 -10
sota-implementations/grpo/requirements_gsm8k.txt +11 -13
sota-implementations/grpo/requirements_ifeval.txt +16 -16
sota-implementations/impala/impala_multi_node_ray.py +2 -2
sota-implementations/impala/impala_multi_node_submitit.py +2 -2
sota-implementations/impala/impala_single_node.py +2 -2
sota-implementations/iql/discrete_iql.py +4 -8
sota-implementations/iql/iql_offline.py +6 -10
sota-implementations/iql/iql_online.py +4 -8
sota-implementations/multiagent/mappo_ippo.py +1 -1
sota-implementations/multiagent/sac.py +1 -1
sota-implementations/ppo/ppo_atari.py +6 -10
sota-implementations/ppo/ppo_mujoco.py +6 -10
sota-implementations/sac/sac-async.py +11 -8
sota-implementations/sac/sac.py +6 -8
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +6 -9
sota-implementations/td3_bc/td3_bc.py +6 -8
torchrl/__init__.py +52 -29
torchrl/_extension.py +12 -2
torchrl/_torchrl.cp313-win_amd64.pyd +0 -0
torchrl/_utils.py +472 -5
torchrl/collectors/__init__.py +27 -14
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +54 -3877
torchrl/collectors/distributed/__init__.py +10 -3
torchrl/collectors/distributed/default_configs.py +99 -0
torchrl/collectors/distributed/generic.py +393 -159
torchrl/collectors/distributed/ray.py +367 -78
torchrl/collectors/distributed/rpc.py +198 -103
torchrl/collectors/distributed/sync.py +182 -93
torchrl/collectors/distributed/utils.py +4 -5
torchrl/collectors/llm/base.py +44 -29
torchrl/collectors/llm/ray_collector.py +1 -1
torchrl/collectors/llm/weight_update/vllm.py +25 -17
torchrl/collectors/llm/weight_update/vllm_v2.py +20 -20
torchrl/collectors/utils.py +177 -3
torchrl/collectors/weight_update.py +25 -9
torchrl/data/__init__.py +0 -24
torchrl/data/datasets/d4rl.py +14 -6
torchrl/data/datasets/minari_data.py +110 -8
torchrl/data/datasets/openx.py +1 -1
torchrl/data/postprocs/postprocs.py +2 -2
torchrl/data/replay_buffers/checkpointers.py +54 -37
torchrl/data/replay_buffers/ray_buffer.py +16 -6
torchrl/data/replay_buffers/replay_buffers.py +248 -39
torchrl/data/replay_buffers/samplers.py +46 -40
torchrl/data/replay_buffers/storages.py +488 -18
torchrl/data/replay_buffers/utils.py +4 -4
torchrl/data/replay_buffers/writers.py +22 -0
torchrl/data/tensor_specs.py +49 -170
torchrl/data/utils.py +6 -9
torchrl/envs/__init__.py +4 -4
torchrl/envs/async_envs.py +83 -8
torchrl/envs/batched_envs.py +347 -76
torchrl/envs/common.py +164 -9
torchrl/envs/custom/chess.py +5 -6
torchrl/envs/env_creator.py +7 -2
torchrl/envs/libs/__init__.py +3 -0
torchrl/envs/libs/brax.py +76 -4
torchrl/envs/libs/dm_control.py +64 -5
torchrl/envs/libs/envpool.py +40 -0
torchrl/envs/libs/gym.py +174 -29
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/vmas.py +10 -40
torchrl/envs/llm/datasets/gsm8k.py +2 -2
torchrl/envs/llm/envs.py +3 -1
torchrl/envs/llm/transforms/__init__.py +25 -6
torchrl/envs/llm/transforms/browser.py +4 -4
torchrl/envs/llm/transforms/dataloading.py +3 -7
torchrl/envs/llm/transforms/kl.py +3 -2
torchrl/envs/llm/transforms/tools.py +1382 -251
torchrl/envs/model_based/common.py +8 -5
torchrl/envs/model_based/dreamer.py +28 -2
torchrl/envs/transforms/__init__.py +5 -4
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/{llm/transforms → transforms}/ray_service.py +51 -32
torchrl/envs/transforms/transforms.py +259 -36
torchrl/envs/transforms/vecnorm.py +27 -0
torchrl/envs/transforms/vip.py +6 -6
torchrl/envs/utils.py +8 -32
torchrl/modules/__init__.py +20 -0
torchrl/modules/distributions/continuous.py +74 -11
torchrl/modules/distributions/discrete.py +2 -2
torchrl/modules/distributions/truncated_normal.py +6 -4
torchrl/modules/distributions/utils.py +7 -3
torchrl/modules/llm/__init__.py +29 -15
torchrl/modules/llm/backends/__init__.py +42 -17
torchrl/modules/llm/backends/vllm/__init__.py +48 -16
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +235 -152
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +26 -6
torchrl/modules/llm/backends/vllm/vllm_utils.py +23 -4
torchrl/modules/llm/policies/__init__.py +6 -1
torchrl/modules/llm/policies/common.py +447 -9
torchrl/modules/llm/policies/transformers_wrapper.py +31 -20
torchrl/modules/llm/policies/vllm_wrapper.py +116 -48
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/decision_transformer.py +1 -1
torchrl/modules/models/exploration.py +5 -5
torchrl/modules/models/model_based.py +256 -36
torchrl/modules/models/multiagent.py +28 -12
torchrl/modules/planners/cem.py +5 -2
torchrl/modules/planners/common.py +4 -1
torchrl/modules/planners/mppi.py +5 -1
torchrl/modules/tensordict_module/__init__.py +2 -0
torchrl/modules/tensordict_module/actors.py +10 -10
torchrl/modules/tensordict_module/common.py +2 -2
torchrl/modules/tensordict_module/exploration.py +80 -11
torchrl/modules/tensordict_module/probabilistic.py +2 -2
torchrl/modules/tensordict_module/rnn.py +21 -15
torchrl/objectives/a2c.py +24 -23
torchrl/objectives/common.py +70 -12
torchrl/objectives/cql.py +25 -6
torchrl/objectives/crossq.py +19 -8
torchrl/objectives/ddpg.py +22 -3
torchrl/objectives/decision_transformer.py +5 -6
torchrl/objectives/deprecated.py +14 -2
torchrl/objectives/dqn.py +42 -6
torchrl/objectives/dreamer.py +52 -21
torchrl/objectives/iql.py +19 -8
torchrl/objectives/llm/__init__.py +22 -2
torchrl/objectives/llm/grpo.py +381 -65
torchrl/objectives/llm/sft.py +4 -4
torchrl/objectives/multiagent/qmixer.py +8 -1
torchrl/objectives/ppo.py +39 -31
torchrl/objectives/redq.py +16 -3
torchrl/objectives/reinforce.py +8 -0
torchrl/objectives/sac.py +106 -155
torchrl/objectives/td3.py +21 -4
torchrl/objectives/td3_bc.py +21 -4
torchrl/objectives/utils.py +44 -4
torchrl/objectives/value/advantages.py +16 -2
torchrl/objectives/value/functional.py +41 -12
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +9 -1
torchrl/record/loggers/wandb.py +10 -0
torchrl/record/recorder.py +5 -3
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +6 -4
torchrl/trainers/algorithms/__init__.py +2 -1
torchrl/trainers/algorithms/configs/__init__.py +138 -13
torchrl/trainers/algorithms/configs/collectors.py +44 -13
torchrl/trainers/algorithms/configs/data.py +3 -0
torchrl/trainers/algorithms/configs/modules.py +228 -4
torchrl/trainers/algorithms/configs/objectives.py +108 -6
torchrl/trainers/algorithms/configs/trainers.py +211 -7
torchrl/trainers/algorithms/configs/transforms.py +33 -2
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/ppo.py +139 -26
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/collectors.py +15 -15
torchrl/trainers/helpers/trainers.py +5 -5
torchrl/trainers/trainers.py +431 -100
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
{torchrl-0.10.1.dist-info → torchrl-0.11.0.dist-info}/METADATA +23 -6
torchrl-0.11.0.dist-info/RECORD +394 -0
{torchrl-0.10.1.dist-info → torchrl-0.11.0.dist-info}/top_level.txt +2 -0
torchrl/data/rlhf.py +0 -41
torchrl/envs/transforms/llm.py +0 -329
torchrl/envs/transforms/rlhf.py +0 -27
torchrl/modules/models/rlhf.py +0 -17
torchrl-0.10.1.dist-info/RECORD +0 -352
{torchrl-0.10.1.dist-info → torchrl-0.11.0.dist-info}/LICENSE +0 -0
{torchrl-0.10.1.dist-info → torchrl-0.11.0.dist-info}/WHEEL +0 -0
{torchrl-0.10.1.dist-info → torchrl-0.11.0.dist-info}/entry_points.txt +0 -0

benchmarks/ecosystem/gym_env_throughput.py CHANGED Viewed

@@ -27,7 +27,7 @@ from torchrl.collectors import (
 )
 from torchrl.envs import EnvCreator, GymEnv, ParallelEnv
 from torchrl.envs.libs.gym import gym_backend as gym_bc, set_gym_backend
-from torchrl.envs.utils import RandomPolicy
+from torchrl.modules import RandomPolicy
 if __name__ == "__main__":
     avail_devices = ("cpu",)

benchmarks/storage/benchmark_sample_latency_over_rpc.py CHANGED Viewed

@@ -144,7 +144,7 @@ if __name__ == "__main__":
     rank = args.rank
     storage_type = args.storage
-    torchrl_logger.info(f"Rank: {rank}; Storage: {storage_type}")
+    torchrl_logger.debug(f"RANK: {rank}; Storage: {storage_type}")
     os.environ["MASTER_ADDR"] = "localhost"
     os.environ["MASTER_PORT"] = "29500"

benchmarks/test_collectors_benchmark.py CHANGED Viewed

@@ -18,7 +18,7 @@ from torchrl.data import LazyTensorStorage, ReplayBuffer
 from torchrl.data.utils import CloudpickleWrapper
 from torchrl.envs import EnvCreator, GymEnv, ParallelEnv, StepCounter, TransformedEnv
 from torchrl.envs.libs.dm_control import DMControlEnv
-from torchrl.envs.utils import RandomPolicy
+from torchrl.modules import RandomPolicy
 def single_collector_setup():

benchmarks/test_non_tensor_env_benchmark.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import gc
+import time
+import pytest
+from tensordict import set_capture_non_tensor_stack
+from torchrl.envs import ParallelEnv, SerialEnv
+from torchrl.testing.mocking_classes import EnvWithMetadata
+def _rollout(env, n_steps: int, break_when_any_done: bool) -> None:
+    env.rollout(n_steps, break_when_any_done=break_when_any_done)
+@pytest.mark.parametrize("break_when_any_done", [True, False])
+@pytest.mark.parametrize(
+    "kind,use_buffers",
+    [
+        pytest.param("single", None, id="single"),
+        pytest.param("serial", False, id="serial-no-buffers"),
+        pytest.param("serial", True, id="serial-buffers"),
+        pytest.param("parallel", False, id="parallel-no-buffers"),
+        pytest.param("parallel", True, id="parallel-buffers"),
+    ],
+)
+@pytest.mark.parametrize("n_steps", [1000])
+def test_non_tensor_env_rollout_speed(
+    benchmark,
+    break_when_any_done: bool,
+    kind: str,
+    use_buffers: bool | None,
+    n_steps: int,
+):
+    """Benchmarks a single rollout, after a warmup rollout, for non-tensor stacking envs.
+    Mirrors `test/test_envs.py::TestNonTensorEnv`'s option matrix (single/serial/parallel,
+    break_when_any_done, use_buffers).
+    """
+    with set_capture_non_tensor_stack(False):
+        if kind == "single":
+            env = EnvWithMetadata()
+        elif kind == "serial":
+            env = SerialEnv(2, EnvWithMetadata, use_buffers=use_buffers)
+        elif kind == "parallel":
+            env = ParallelEnv(2, EnvWithMetadata, use_buffers=use_buffers)
+        else:
+            raise RuntimeError(f"Unknown kind={kind}")
+        env.set_seed(0)
+        env.reset()
+        try:
+            # Warmup run (not timed)
+            _rollout(env, n_steps=n_steps, break_when_any_done=break_when_any_done)
+            # Timed run(s)
+            benchmark(
+                _rollout, env, n_steps=n_steps, break_when_any_done=break_when_any_done
+            )
+        finally:
+            env.close(raise_if_closed=False)
+            del env
+            # Give multiprocessing envs a brief chance to terminate cleanly.
+            time.sleep(0.05)
+            gc.collect()

benchmarks/test_objectives_benchmarks.py CHANGED Viewed

@@ -172,6 +172,8 @@ def _maybe_compile(fn, compile, td, fullgraph=FULLGRAPH, warmup=3):
 def test_dqn_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, depth=3, ncells=128, batch=128
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -227,6 +229,8 @@ def test_dqn_speed(
 def test_ddpg_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -303,6 +307,8 @@ def test_ddpg_speed(
 def test_sac_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -393,6 +399,8 @@ def test_sac_speed(
 def test_redq_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -484,6 +492,8 @@ def test_redq_speed(
 def test_redq_deprec_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -573,6 +583,8 @@ def test_redq_deprec_speed(
 def test_td3_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -667,6 +679,8 @@ def test_td3_speed(
 def test_cql_speed(
     benchmark, backward, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -763,6 +777,8 @@ def test_a2c_speed(
     batch=128,
     T=10,
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -866,6 +882,8 @@ def test_ppo_speed(
     batch=128,
     T=10,
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -969,6 +987,8 @@ def test_reinforce_speed(
     batch=128,
     T=10,
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -1072,6 +1092,8 @@ def test_iql_speed(
     batch=128,
     T=10,
 ):
+    if compile == "reduce-overhead" and backward is not None:
+        pytest.skip("reduce-overhead with backward causes segfaults in CI")
     if compile:
         torch._dynamo.reset_code_caches()
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

sota-implementations/a2c/a2c_atari.py CHANGED Viewed

@@ -22,7 +22,7 @@ def main(cfg: DictConfig):  # noqa: F821
     from tensordict import from_module
     from tensordict.nn import CudaGraphModule
-    from torchrl._utils import timeit
+    from torchrl._utils import get_available_device, timeit
     from torchrl.collectors import SyncDataCollector
     from torchrl.data import LazyTensorStorage, TensorDictReplayBuffer
     from torchrl.data.replay_buffers.samplers import SamplerWithoutReplacement
@@ -33,11 +33,9 @@ def main(cfg: DictConfig):  # noqa: F821
     from torchrl.record.loggers import generate_exp_name, get_logger
     from utils_atari import eval_model, make_parallel_env, make_ppo_models
-    device = cfg.loss.device
-    if not device:
-        device = torch.device("cpu" if not torch.cuda.is_available() else "cuda:0")
-    else:
-        device = torch.device(device)
+    device = (
+        torch.device(cfg.loss.device) if cfg.loss.device else get_available_device()
+    )
     # Correct for frame_skip
     frame_skip = 4
@@ -76,8 +74,8 @@ def main(cfg: DictConfig):  # noqa: F821
         actor_network=actor,
         critic_network=critic,
         loss_critic_type=cfg.loss.loss_critic_type,
-        entropy_coef=cfg.loss.entropy_coef,
-        critic_coef=cfg.loss.critic_coef,
+        entropy_coeff=cfg.loss.entropy_coeff,
+        critic_coeff=cfg.loss.critic_coeff,
     )
     # use end-of-life as done key

sota-implementations/a2c/a2c_mujoco.py CHANGED Viewed

@@ -23,7 +23,7 @@ def main(cfg: DictConfig):  # noqa: F821
     from tensordict import from_module
     from tensordict.nn import CudaGraphModule
-    from torchrl._utils import timeit
+    from torchrl._utils import get_available_device, timeit
     from torchrl.collectors import SyncDataCollector
     from torchrl.data import LazyTensorStorage, TensorDictReplayBuffer
     from torchrl.data.replay_buffers.samplers import SamplerWithoutReplacement
@@ -36,11 +36,9 @@ def main(cfg: DictConfig):  # noqa: F821
     # Define paper hyperparameters
-    device = cfg.loss.device
-    if not device:
-        device = torch.device("cpu" if not torch.cuda.is_available() else "cuda:0")
-    else:
-        device = torch.device(device)
+    device = (
+        torch.device(cfg.loss.device) if cfg.loss.device else get_available_device()
+    )
     num_mini_batches = cfg.collector.frames_per_batch // cfg.loss.mini_batch_size
     total_network_updates = (
@@ -77,8 +75,8 @@ def main(cfg: DictConfig):  # noqa: F821
         actor_network=actor,
         critic_network=critic,
         loss_critic_type=cfg.loss.loss_critic_type,
-        entropy_coef=cfg.loss.entropy_coef,
-        critic_coef=cfg.loss.critic_coef,
+        entropy_coeff=cfg.loss.entropy_coeff,
+        critic_coeff=cfg.loss.critic_coeff,
     )
     # Create optimizers

sota-implementations/cql/cql_offline.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 import torch
 import tqdm
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.objectives import group_optimizers
 from torchrl.record.loggers import generate_exp_name, get_logger
@@ -55,13 +55,9 @@ def main(cfg: DictConfig):  # noqa: F821
     # Set seeds
     torch.manual_seed(cfg.env.seed)
     np.random.seed(cfg.env.seed)
-    device = cfg.optim.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = "cuda:0"
-        else:
-            device = "cpu"
-    device = torch.device(device)
+    device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
     # Create replay buffer
     replay_buffer = make_offline_replay_buffer(cfg.replay_buffer)

sota-implementations/cql/cql_online.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch
 import tqdm
 from tensordict import TensorDict
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.objectives import group_optimizers
 from torchrl.record.loggers import generate_exp_name, get_logger
@@ -60,13 +60,9 @@ def main(cfg: DictConfig):  # noqa: F821
     # Set seeds
     torch.manual_seed(cfg.env.seed)
     np.random.seed(cfg.env.seed)
-    device = cfg.optim.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = "cuda:0"
-        else:
-            device = "cpu"
-    device = torch.device(device)
+    device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
     # Create env
     train_env, eval_env = make_environment(

sota-implementations/cql/discrete_cql_offline.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 import torch
 import tqdm
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.record.loggers import generate_exp_name, get_logger
 from utils import (
@@ -36,10 +36,9 @@ torch.set_float32_matmul_precision("high")
 @hydra.main(version_base="1.1", config_path="", config_name="discrete_offline_config")
 def main(cfg):  # noqa: F821
-    device = cfg.optim.device
-    if device in ("", None):
-        device = "cuda:0" if torch.cuda.is_available() else "cpu"
-    device = torch.device(device)
+    device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
     # Create logger
     exp_name = generate_exp_name("DiscreteCQL", cfg.logger.exp_name)

sota-implementations/cql/discrete_cql_online.py CHANGED Viewed

@@ -20,7 +20,7 @@ import torch
 import torch.cuda
 import tqdm
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.record.loggers import generate_exp_name, get_logger
 from utils import (
@@ -38,13 +38,9 @@ torch.set_float32_matmul_precision("high")
 @hydra.main(version_base="1.1", config_path="", config_name="discrete_online_config")
 def main(cfg: DictConfig):  # noqa: F821
-    device = cfg.optim.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = "cuda:0"
-        else:
-            device = "cpu"
-    device = torch.device(device)
+    device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
     # Create logger
     exp_name = generate_exp_name("DiscreteCQL", cfg.logger.exp_name)

sota-implementations/crossq/crossq.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch.cuda
 import tqdm
 from tensordict import TensorDict
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.objectives import group_optimizers
 from torchrl.record.loggers import generate_exp_name, get_logger
@@ -40,13 +40,11 @@ torch.set_float32_matmul_precision("high")
 @hydra.main(version_base="1.1", config_path=".", config_name="config")
 def main(cfg: DictConfig):  # noqa: F821
-    device = cfg.network.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = torch.device("cuda:0")
-        else:
-            device = torch.device("cpu")
-    device = torch.device(device)
+    device = (
+        torch.device(cfg.network.device)
+        if cfg.network.device
+        else get_available_device()
+    )
     # Create logger
     exp_name = generate_exp_name("CrossQ", cfg.logger.exp_name)

sota-implementations/ddpg/ddpg.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch.cuda
 import tqdm
 from tensordict import TensorDict
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.objectives import group_optimizers
 from torchrl.record.loggers import generate_exp_name, get_logger
@@ -39,21 +39,14 @@ from utils import (
 @hydra.main(version_base="1.1", config_path="", config_name="config")
 def main(cfg: DictConfig):  # noqa: F821
-    device = cfg.optim.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = "cuda:0"
-        else:
-            device = "cpu"
-    device = torch.device(device)
-    collector_device = cfg.collector.device
-    if collector_device in ("", None):
-        if torch.cuda.is_available():
-            collector_device = "cuda:0"
-        else:
-            collector_device = "cpu"
-    collector_device = torch.device(collector_device)
+    device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
+    collector_device = (
+        torch.device(cfg.collector.device)
+        if cfg.collector.device
+        else get_available_device()
+    )
     # Create logger
     exp_name = generate_exp_name("DDPG", cfg.logger.exp_name)

sota-implementations/decision_transformer/dt.py CHANGED Viewed

@@ -17,7 +17,7 @@ import torch
 import tqdm
 from tensordict import TensorDict
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import logger as torchrl_logger, timeit
+from torchrl._utils import get_available_device, logger as torchrl_logger, timeit
 from torchrl.envs.libs.gym import set_gym_backend
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.modules.tensordict_module import DecisionTransformerInferenceWrapper
@@ -38,13 +38,9 @@ from utils import (
 def main(cfg: DictConfig):  # noqa: F821
     set_gym_backend(cfg.env.backend).set()
-    model_device = cfg.optim.device
-    if model_device in ("", None):
-        if torch.cuda.is_available():
-            model_device = "cuda:0"
-        else:
-            model_device = "cpu"
-    model_device = torch.device(model_device)
+    model_device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
     # Set seeds
     torch.manual_seed(cfg.env.seed)

sota-implementations/decision_transformer/online_dt.py CHANGED Viewed

@@ -15,7 +15,7 @@ import numpy as np
 import torch
 import tqdm
 from tensordict.nn import CudaGraphModule
-from torchrl._utils import logger as torchrl_logger, timeit
+from torchrl._utils import get_available_device, logger as torchrl_logger, timeit
 from torchrl.envs.libs.gym import set_gym_backend
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 from torchrl.modules.tensordict_module import DecisionTransformerInferenceWrapper
@@ -36,13 +36,9 @@ from utils import (
 def main(cfg: DictConfig):  # noqa: F821
     set_gym_backend(cfg.env.backend).set()
-    model_device = cfg.optim.device
-    if model_device in ("", None):
-        if torch.cuda.is_available():
-            model_device = "cuda:0"
-        else:
-            model_device = "cpu"
-    model_device = torch.device(model_device)
+    model_device = (
+        torch.device(cfg.optim.device) if cfg.optim.device else get_available_device()
+    )
     # Set seeds
     torch.manual_seed(cfg.env.seed)

sota-implementations/dqn/dqn_atari.py CHANGED Viewed

@@ -17,7 +17,7 @@ import torch.nn
 import torch.optim
 import tqdm
 from tensordict.nn import CudaGraphModule, TensorDictSequential
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.collectors import SyncDataCollector
 from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
 from torchrl.envs import ExplorationType, set_exploration_type
@@ -33,13 +33,7 @@ torch.set_float32_matmul_precision("high")
 @hydra.main(config_path="", config_name="config_atari", version_base="1.1")
 def main(cfg: DictConfig):  # noqa: F821
-    device = cfg.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = "cuda:0"
-        else:
-            device = "cpu"
-    device = torch.device(device)
+    device = torch.device(cfg.device) if cfg.device else get_available_device()
     # Correct for frame_skip
     frame_skip = 4

sota-implementations/dqn/dqn_cartpole.py CHANGED Viewed

@@ -12,7 +12,7 @@ import torch.nn
 import torch.optim
 import tqdm
 from tensordict.nn import CudaGraphModule, TensorDictSequential
-from torchrl._utils import timeit
+from torchrl._utils import get_available_device, timeit
 from torchrl.collectors import SyncDataCollector
 from torchrl.data import LazyTensorStorage, TensorDictReplayBuffer
 from torchrl.envs import ExplorationType, set_exploration_type
@@ -28,13 +28,7 @@ torch.set_float32_matmul_precision("high")
 @hydra.main(config_path="", config_name="config_cartpole", version_base="1.1")
 def main(cfg: DictConfig):  # noqa: F821
-    device = cfg.device
-    if device in ("", None):
-        if torch.cuda.is_available():
-            device = "cuda:0"
-        else:
-            device = "cpu"
-    device = torch.device(device)
+    device = torch.device(cfg.device) if cfg.device else get_available_device()
     # Make the components
     model = make_dqn_model(cfg.env.env_name, device=device)