PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314t-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/modules/distributions/truncated_normal.py ADDED Viewed

@@ -0,0 +1,187 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# from https://github.com/toshas/torch_truncnorm
+from __future__ import annotations
+import math
+from numbers import Number
+import torch
+from torch.distributions import constraints, Distribution
+from torch.distributions.utils import broadcast_all
+CONST_SQRT_2 = math.sqrt(2)
+CONST_INV_SQRT_2PI = 1 / math.sqrt(2 * math.pi)
+CONST_INV_SQRT_2 = 1 / math.sqrt(2)
+CONST_LOG_INV_SQRT_2PI = math.log(CONST_INV_SQRT_2PI)
+CONST_LOG_SQRT_2PI_E = 0.5 * math.log(2 * math.pi * math.e)
+class TruncatedStandardNormal(Distribution):
+    """Truncated Standard Normal distribution.
+    Source: https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    """
+    arg_constraints = {
+        "a": constraints.real,
+        "b": constraints.real,
+    }
+    has_rsample = True
+    eps = 1e-6
+    def __init__(self, a, b, validate_args=None, device=None):
+        self.a, self.b = broadcast_all(a, b)
+        _non_blocking = device is not None and torch.device(device).type == "cuda"
+        self.a = self.a.to(device, non_blocking=_non_blocking)
+        self.b = self.b.to(device, non_blocking=_non_blocking)
+        if isinstance(a, Number) and isinstance(b, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.a.size()
+        super().__init__(batch_shape, validate_args=validate_args)
+        if self.a.dtype != self.b.dtype:
+            raise ValueError("Truncation bounds types are different")
+        if any(
+            (self.a >= self.b)
+            .view(
+                -1,
+            )
+            .tolist()
+        ):
+            raise ValueError("Incorrect truncation range")
+        eps = self.eps
+        self._dtype_min_gt_0 = eps
+        self._dtype_max_lt_1 = 1 - eps
+        self._little_phi_a = self._little_phi(self.a)
+        self._little_phi_b = self._little_phi(self.b)
+        self._big_phi_a = self._big_phi(self.a)
+        self._big_phi_b = self._big_phi(self.b)
+        self._Z = (self._big_phi_b - self._big_phi_a).clamp(eps, 1 - eps)
+        self._log_Z = self._Z.log()
+        little_phi_coeff_a = torch.nan_to_num(self.a, nan=math.nan)
+        little_phi_coeff_b = torch.nan_to_num(self.b, nan=math.nan)
+        self._lpbb_m_lpaa_d_Z = (
+            self._little_phi_b * little_phi_coeff_b
+            - self._little_phi_a * little_phi_coeff_a
+        ) / self._Z
+        self._mean = -(self._little_phi_b - self._little_phi_a) / self._Z
+        self._variance = (
+            1
+            - self._lpbb_m_lpaa_d_Z
+            - ((self._little_phi_b - self._little_phi_a) / self._Z) ** 2
+        )
+        self._entropy = CONST_LOG_SQRT_2PI_E + self._log_Z - 0.5 * self._lpbb_m_lpaa_d_Z
+    @constraints.dependent_property
+    def support(self):
+        return constraints.interval(self.a, self.b)
+    @property
+    def mean(self):
+        return self._mean
+    @property
+    def deterministic_sample(self):
+        return self.mean
+    @property
+    def variance(self):
+        return self._variance
+    def entropy(self):
+        return self._entropy
+    @property
+    def auc(self):
+        return self._Z
+    @staticmethod
+    def _little_phi(x):
+        return (-(x**2) * 0.5).exp() * CONST_INV_SQRT_2PI
+    def _big_phi(self, x):
+        phi = 0.5 * (1 + (x * CONST_INV_SQRT_2).erf())
+        return phi.clamp(self.eps, 1 - self.eps)
+    @staticmethod
+    def _inv_big_phi(x):
+        return CONST_SQRT_2 * (2 * x - 1).erfinv()
+    def cdf(self, value):
+        if self._validate_args:
+            self._validate_sample(value)
+        return ((self._big_phi(value) - self._big_phi_a) / self._Z).clamp(0, 1)
+    def icdf(self, value):
+        y = self._big_phi_a + value * self._Z
+        y = y.clamp(self.eps, 1 - self.eps)
+        return self._inv_big_phi(y)
+    def log_prob(self, value):
+        if self._validate_args:
+            self._validate_sample(value)
+        return CONST_LOG_INV_SQRT_2PI - self._log_Z - (value**2) * 0.5
+    def rsample(self, sample_shape=None):
+        if sample_shape is None:
+            sample_shape = torch.Size([])
+        shape = self._extended_shape(sample_shape)
+        p = torch.empty(shape, device=self.a.device).uniform_(
+            self._dtype_min_gt_0, self._dtype_max_lt_1
+        )
+        return self.icdf(p)
+class TruncatedNormal(TruncatedStandardNormal):
+    """Truncated Normal distribution.
+    https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    """
+    has_rsample = True
+    def __init__(self, loc, scale, a, b, validate_args=None, device=None):
+        scale = scale.clamp_min(self.eps)
+        self.loc, self.scale, a, b = broadcast_all(loc, scale, a, b)
+        _non_blocking = device is not None and torch.device(device).type == "cuda"
+        a = a.to(device, non_blocking=_non_blocking)
+        b = b.to(device, non_blocking=_non_blocking)
+        self._non_std_a = a
+        self._non_std_b = b
+        a = (a - self.loc) / self.scale
+        b = (b - self.loc) / self.scale
+        super().__init__(a, b, validate_args=validate_args)
+        self._log_scale = self.scale.log()
+        self._mean = self._mean * self.scale + self.loc
+        self._variance = self._variance * self.scale**2
+        self._entropy += self._log_scale
+    def _to_std_rv(self, value):
+        return (value - self.loc) / self.scale
+    def _from_std_rv(self, value):
+        return value * self.scale + self.loc
+    def cdf(self, value):
+        return super().cdf(self._to_std_rv(value))
+    def icdf(self, value):
+        sample = self._from_std_rv(super().icdf(value))
+        # clamp data but keep gradients
+        sample_clip = torch.stack(
+            [sample.detach(), self._non_std_a.detach().expand_as(sample)], 0
+        ).max(0)[0]
+        sample_clip = torch.stack(
+            [sample_clip, self._non_std_b.detach().expand_as(sample)], 0
+        ).min(0)[0]
+        sample.data.copy_(sample_clip)
+        return sample
+    def log_prob(self, value):
+        value = self._to_std_rv(value)
+        return super().log_prob(value) - self._log_scale

torchrl/modules/distributions/utils.py ADDED Viewed

@@ -0,0 +1,233 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import torch
+from torch import autograd, distributions as d
+from torch.distributions import Independent, Transform, TransformedDistribution
+try:
+    from torch.compiler import is_dynamo_compiling
+except ImportError:
+    from torch._dynamo import is_compiling as is_dynamo_compiling
+def _cast_device(elt: torch.Tensor | float, device) -> torch.Tensor | float:
+    if isinstance(elt, torch.Tensor):
+        _non_blocking = device is not None and torch.device(device).type == "cuda"
+        return elt.to(device, non_blocking=_non_blocking)
+    return elt
+def _cast_transform_device(transform, device):
+    if transform is None:
+        return transform
+    _non_blocking = device is not None and torch.device(device).type == "cuda"
+    if isinstance(transform, d.ComposeTransform):
+        for i, t in enumerate(transform.parts):
+            transform.parts[i] = _cast_transform_device(t, device)
+    elif isinstance(transform, d.Transform):
+        for attribute in dir(transform):
+            value = getattr(transform, attribute)
+            if isinstance(value, torch.Tensor):
+                setattr(
+                    transform, attribute, value.to(device, non_blocking=_non_blocking)
+                )
+        return transform
+    else:
+        raise TypeError(
+            f"Cannot perform device casting for transform of type {type(transform)}"
+        )
+class FasterTransformedDistribution(TransformedDistribution):
+    """A faster implementation of TransformedDistribution."""
+    __doc__ = __doc__ + TransformedDistribution.__doc__
+    def __init__(self, base_distribution, transforms, validate_args=None):
+        if is_dynamo_compiling():
+            return super().__init__(
+                base_distribution, transforms, validate_args=validate_args
+            )
+        if isinstance(transforms, Transform):
+            self.transforms = [transforms]
+        elif isinstance(transforms, list):
+            raise ValueError("Make a ComposeTransform first.")
+        else:
+            raise ValueError(
+                f"transforms must be a Transform or list, but was {transforms}"
+            )
+        transform = self.transforms[0]
+        # Reshape base_distribution according to transforms.
+        base_shape = base_distribution.batch_shape + base_distribution.event_shape
+        base_event_dim = len(base_distribution.event_shape)
+        # transform = ComposeTransform(self.transforms)
+        # if len(base_shape) < transform.domain.event_dim:
+        #     raise ValueError("base_distribution needs to have shape with size at least {}, but got {}."
+        #                      .format(transform.domain.event_dim, base_shape))
+        transform_codomain_event_dim = transform.codomain.event_dim
+        transform_domain_event_dim = transform.domain.event_dim
+        forward_shape = transform.forward_shape(base_shape)
+        expanded_base_shape = transform.inverse_shape(forward_shape)
+        if base_shape != expanded_base_shape:
+            base_batch_shape = expanded_base_shape[
+                : len(expanded_base_shape) - base_event_dim
+            ]
+            base_distribution = base_distribution.expand(base_batch_shape)
+        reinterpreted_batch_ndims = transform_domain_event_dim - base_event_dim
+        if reinterpreted_batch_ndims > 0:
+            base_distribution = Independent(
+                base_distribution, reinterpreted_batch_ndims
+            )
+        self.base_dist = base_distribution
+        # Compute shapes.
+        transform_change_in_event_dim = (
+            transform_codomain_event_dim - transform_domain_event_dim
+        )
+        event_dim = max(
+            transform_codomain_event_dim,  # the transform is coupled
+            base_event_dim + transform_change_in_event_dim,  # the base dist is coupled
+        )
+        cut = len(forward_shape) - event_dim
+        batch_shape = forward_shape[:cut]
+        event_shape = forward_shape[cut:]
+        super(TransformedDistribution, self).__init__(
+            batch_shape, event_shape, validate_args=validate_args
+        )
+def _safetanh(x, eps):  # noqa: D103
+    lim = 1.0 - eps
+    y = x.tanh()
+    return y.clamp(-lim, lim)
+def _safeatanh(y, eps):  # noqa: D103
+    lim = 1.0 - eps
+    return y.clamp(-lim, lim).atanh()
+class _SafeTanh(autograd.Function):
+    generate_vmap_rule = True
+    @staticmethod
+    def forward(input, eps):
+        output = input.tanh()
+        lim = 1.0 - eps
+        output = output.clamp(-lim, lim)
+        # ctx.save_for_backward(output)
+        return output
+    @staticmethod
+    def setup_context(ctx, inputs, output):
+        # input, eps = inputs
+        # ctx.mark_non_differentiable(ind, ind_inv)
+        # # Tensors must be saved via ctx.save_for_backward. Please do not
+        # # assign them directly onto the ctx object.
+        ctx.save_for_backward(output)
+    @staticmethod
+    def backward(ctx, *grad):
+        grad = grad[0]
+        (output,) = ctx.saved_tensors
+        return (grad * (1 - output.pow(2)), None)
+class _SafeTanhNoEps(autograd.Function):
+    generate_vmap_rule = True
+    @staticmethod
+    def forward(input):
+        output = input.tanh()
+        eps = torch.finfo(input.dtype).resolution
+        lim = 1.0 - eps
+        output = output.clamp(-lim, lim)
+        return output
+    @staticmethod
+    def setup_context(ctx, inputs, output):
+        ctx.save_for_backward(output)
+    @staticmethod
+    def backward(ctx, *grad):
+        grad = grad[0]
+        (output,) = ctx.saved_tensors
+        return (grad * (1 - output.pow(2)),)
+class _SafeaTanh(autograd.Function):
+    generate_vmap_rule = True
+    @staticmethod
+    def forward(tanh_val, eps):
+        if eps is None:
+            eps = torch.finfo(tanh_val.dtype).resolution
+        lim = 1.0 - eps
+        output = tanh_val.clamp(-lim, lim)
+        # ctx.save_for_backward(output)
+        output = output.atanh()
+        return output
+    @staticmethod
+    def setup_context(ctx, inputs, output):
+        tanh_val, eps = inputs
+        # ctx.mark_non_differentiable(ind, ind_inv)
+        # # Tensors must be saved via ctx.save_for_backward. Please do not
+        # # assign them directly onto the ctx object.
+        ctx.save_for_backward(tanh_val)
+        ctx.eps = eps
+    @staticmethod
+    def backward(ctx, *grad):
+        grad = grad[0]
+        (tanh_val,) = ctx.saved_tensors
+        eps = ctx.eps
+        lim = 1.0 - eps
+        output = tanh_val.clamp(-lim, lim)
+        return (grad / (1 - output.pow(2)), None)
+class _SafeaTanhNoEps(autograd.Function):
+    generate_vmap_rule = True
+    @staticmethod
+    def forward(tanh_val):
+        eps = torch.finfo(tanh_val.dtype).resolution
+        lim = 1.0 - eps
+        output = tanh_val.clamp(-lim, lim)
+        # ctx.save_for_backward(output)
+        output = output.atanh()
+        return output
+    @staticmethod
+    def setup_context(ctx, inputs, output):
+        tanh_val = inputs[0]
+        eps = torch.finfo(tanh_val.dtype).resolution
+        # ctx.mark_non_differentiable(ind, ind_inv)
+        # # Tensors must be saved via ctx.save_for_backward. Please do not
+        # # assign them directly onto the ctx object.
+        ctx.save_for_backward(tanh_val)
+        ctx.eps = eps
+    @staticmethod
+    def backward(ctx, *grad):
+        grad = grad[0]
+        (tanh_val,) = ctx.saved_tensors
+        eps = ctx.eps
+        lim = 1.0 - eps
+        output = tanh_val.clamp(-lim, lim)
+        return (grad / (1 - output.pow(2)),)
+safetanh = _SafeTanh.apply
+safeatanh = _SafeaTanh.apply
+safetanh_noeps = _SafeTanhNoEps.apply
+safeatanh_noeps = _SafeaTanhNoEps.apply

torchrl/modules/llm/__init__.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""LLM utilities for TorchRL.
+Note:
+    This package contains optional integrations (e.g. vLLM) that may rely on native
+    extensions. To keep `import torchrl` / `import torchrl.envs` lightweight and
+    robust, we **avoid importing optional backends at module import time** and
+    instead only import those backends on demand.
+"""
+from __future__ import annotations
+from typing import Any
+from .policies.common import ChatHistory, LLMWrapperBase, LogProbs, Masks, Text, Tokens
+from .policies.transformers_wrapper import (
+    RemoteTransformersWrapper,
+    TransformersWrapper,
+)
+from .policies.vllm_wrapper import vLLMWrapper
+__all__ = [
+    # Data structures
+    "ChatHistory",
+    "LogProbs",
+    "Masks",
+    "Text",
+    "Tokens",
+    # Wrapper base class
+    "LLMWrapperBase",
+    # Local wrappers
+    "TransformersWrapper",
+    "vLLMWrapper",
+    # Remote wrappers
+    "RemoteTransformersWrapper",
+    # Async vLLM (recommended)
+    "AsyncVLLM",
+    "make_async_vllm_engine",
+    "stateless_init_process_group_async",
+    # Sync vLLM utilities
+    "make_vllm_worker",
+    "stateless_init_process_group",
+]
+def __getattr__(name: str) -> Any:  # noqa: ANN401
+    # Keep backends optional and on-demand to avoid importing vLLM native extensions
+    # as a side-effect of importing torchrl.
+    if name in {
+        "AsyncVLLM",
+        "make_async_vllm_engine",
+        "make_vllm_worker",
+        "stateless_init_process_group",
+        "stateless_init_process_group_async",
+    }:
+        from . import backends  # local import is intentional / required
+        return getattr(backends, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

torchrl/modules/llm/backends/__init__.py ADDED Viewed

@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""LLM backends.
+These backends can be optional and may rely on native extensions. We avoid
+importing them at module import time and lazily load on attribute access.
+"""
+from __future__ import annotations
+from typing import Any
+__all__ = [
+    # Base classes
+    "RLvLLMEngine",
+    # Sync vLLM
+    "make_vllm_worker",
+    "RayLLMWorker",
+    "LocalLLMWrapper",
+    # Async vLLM
+    "_AsyncvLLMWorker",
+    "_AsyncLLMEngine",
+    "AsyncVLLM",
+    "make_async_vllm_engine",
+    # Utilities
+    "stateless_init_process_group",
+    "stateless_init_process_group_async",
+]
+_LAZY_ATTRS: dict[str, tuple[str, str]] = {
+    # Base classes and interfaces
+    "RLvLLMEngine": ("torchrl.modules.llm.backends.vllm", "RLvLLMEngine"),
+    # Sync vLLM
+    "make_vllm_worker": ("torchrl.modules.llm.backends.vllm", "make_vllm_worker"),
+    "RayLLMWorker": ("torchrl.modules.llm.backends.vllm", "RayLLMWorker"),
+    "LocalLLMWrapper": ("torchrl.modules.llm.backends.vllm", "LocalLLMWrapper"),
+    # Async vLLM
+    "_AsyncvLLMWorker": ("torchrl.modules.llm.backends.vllm", "_AsyncvLLMWorker"),
+    "_AsyncLLMEngine": ("torchrl.modules.llm.backends.vllm", "_AsyncLLMEngine"),
+    "AsyncVLLM": ("torchrl.modules.llm.backends.vllm", "AsyncVLLM"),
+    "make_async_vllm_engine": (
+        "torchrl.modules.llm.backends.vllm",
+        "make_async_vllm_engine",
+    ),
+    # Utilities
+    "stateless_init_process_group": (
+        "torchrl.modules.llm.backends.vllm",
+        "stateless_init_process_group",
+    ),
+    "stateless_init_process_group_async": (
+        "torchrl.modules.llm.backends.vllm",
+        "stateless_init_process_group_async",
+    ),
+}
+def __getattr__(name: str) -> Any:  # noqa: ANN401
+    target = _LAZY_ATTRS.get(name)
+    if target is None:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module_name, attr_name = target
+    module = __import__(module_name, fromlist=[attr_name])
+    return getattr(module, attr_name)

torchrl/modules/llm/backends/vllm/__init__.py ADDED Viewed

@@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""vLLM backends for TorchRL.
+This module provides comprehensive vLLM integration including:
+- Base classes and interfaces
+- Synchronous vLLM workers
+- Asynchronous vLLM services
+- Shared utilities
+Examples:
+    >>> # Create an async vLLM service (recommended)
+    >>> from torchrl.modules.llm.backends.vllm import AsyncVLLM
+    >>> service = AsyncVLLM.from_pretrained("Qwen/Qwen2.5-3B")
+    >>> # Create a sync Ray worker
+    >>> from torchrl.modules.llm.backends.vllm import make_vllm_worker
+    >>> worker = make_vllm_worker("Qwen/Qwen2.5-3B", make_ray_worker=True)
+    >>> # All engines implement the same interface
+    >>> from torchrl.modules.llm.backends.vllm import RLvLLMEngine
+    >>> updater = vLLMUpdaterV2(any_engine)  # Works with any RLvLLMEngine
+"""
+from __future__ import annotations
+from typing import Any
+__all__ = [
+    # Base classes and interfaces
+    "RLvLLMEngine",
+    # Synchronous vLLM
+    "make_vllm_worker",
+    "RayLLMWorker",
+    "LocalLLMWrapper",
+    # Asynchronous vLLM
+    "AsyncVLLM",
+    "make_async_vllm_engine",
+    "_AsyncLLMEngine",
+    "_AsyncvLLMWorker",
+    # Utilities
+    "stateless_init_process_group",
+    "stateless_init_process_group_async",
+]
+_LAZY_ATTRS: dict[str, tuple[str, str]] = {
+    # Base
+    "RLvLLMEngine": ("torchrl.modules.llm.backends.vllm.base", "RLvLLMEngine"),
+    # Sync
+    "make_vllm_worker": (
+        "torchrl.modules.llm.backends.vllm.vllm_sync",
+        "make_vllm_worker",
+    ),
+    "RayLLMWorker": ("torchrl.modules.llm.backends.vllm.vllm_sync", "RayLLMWorker"),
+    "LocalLLMWrapper": (
+        "torchrl.modules.llm.backends.vllm.vllm_sync",
+        "LocalLLMWrapper",
+    ),
+    # Async
+    "_AsyncLLMEngine": (
+        "torchrl.modules.llm.backends.vllm.vllm_async",
+        "_AsyncLLMEngine",
+    ),
+    "_AsyncvLLMWorker": (
+        "torchrl.modules.llm.backends.vllm.vllm_async",
+        "_AsyncvLLMWorker",
+    ),
+    "AsyncVLLM": ("torchrl.modules.llm.backends.vllm.vllm_async", "AsyncVLLM"),
+    "make_async_vllm_engine": (
+        "torchrl.modules.llm.backends.vllm.vllm_async",
+        "make_async_vllm_engine",
+    ),
+    # Utils
+    "stateless_init_process_group": (
+        "torchrl.modules.llm.backends.vllm.vllm_utils",
+        "stateless_init_process_group",
+    ),
+    "stateless_init_process_group_async": (
+        "torchrl.modules.llm.backends.vllm.vllm_utils",
+        "stateless_init_process_group_async",
+    ),
+}
+def __getattr__(name: str) -> Any:  # noqa: ANN401
+    target = _LAZY_ATTRS.get(name)
+    if target is None:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module_name, attr_name = target
+    module = __import__(module_name, fromlist=[attr_name])
+    return getattr(module, attr_name)

torchrl/modules/llm/backends/vllm/_models.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Override the last layers of your models here."""
+from __future__ import annotations
+import os
+import torch
+try:
+    from vllm.config import VllmConfig
+    from vllm.model_executor.models.qwen3 import Qwen3ForCausalLM
+except ImportError:
+    class VllmConfig:
+        """Placeholder for VllmConfig class when vLLM is not installed."""
+    class Qwen3ForCausalLM:
+        """Placeholder for Qwen3ForCausalLM class when vLLM is not installed."""
+def is_fp32_output_enabled() -> bool:
+    """Check if FP32 output is enabled."""
+    return os.getenv("VLLM_ENABLE_FP32_OUTPUT", "0") == "1"
+class Qwen3ForCausalLMFP32(Qwen3ForCausalLM):
+    """Qwen3ForCausalLM with FP32 output."""
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__(vllm_config=vllm_config, prefix=prefix)
+        if is_fp32_output_enabled():
+            self.lm_head.float()
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        if is_fp32_output_enabled():
+            hidden_states = hidden_states.float()
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits