PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314t-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/weight_update/llm/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .vllm_double_buffer import (
+    VLLMDoubleBufferSyncScheme,
+    VLLMDoubleBufferTransport,
+    VLLMDoubleBufferWeightReceiver,
+    VLLMDoubleBufferWeightSender,
+)
+from .vllm_nccl import (
+    get_model_metadata,
+    VLLMCollectiveTransport,
+    VLLMWeightReceiver,
+    VLLMWeightSender,
+    VLLMWeightSyncScheme,
+)
+__all__ = [
+    # vLLM NCCL-based weight sync
+    "VLLMWeightSyncScheme",
+    "VLLMWeightSender",
+    "VLLMWeightReceiver",
+    "VLLMCollectiveTransport",
+    "get_model_metadata",
+    # vLLM double-buffer weight sync
+    "VLLMDoubleBufferSyncScheme",
+    "VLLMDoubleBufferWeightSender",
+    "VLLMDoubleBufferWeightReceiver",
+    "VLLMDoubleBufferTransport",
+]

torchrl/weight_update/llm/vllm_double_buffer.py ADDED Viewed

@@ -0,0 +1,370 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""vLLM weight synchronization using double-buffered shared memory.
+This module provides weight synchronization for vLLM engines using a double-buffer
+approach with memory-mapped TensorDict storage.
+**Architecture Overview**
+The double-buffer synchronization uses a simpler architecture compared to NCCL:
+1. **Sender (Trainer)**
+   - Extracts weights from the training model
+   - Writes weights to shared directory using TensorDict.memmap
+   - No coordination needed - receiver pulls when ready
+2. **Receiver (vLLM Worker)**
+   - Uses RPC to tell all vLLM workers to load from shared directory
+   - Each worker reads weights and calls model.load_weights()
+   - Can trigger at any time (pull-based)
+**Key Differences from NCCL**
+- **Async vs Sync**: Double-buffer is asynchronous (no coordination required)
+- **Push vs Pull**: Sender writes, receiver pulls when ready via RPC
+- **Simplicity**: No NCCL collectives, uses file I/O
+- **Storage**: Uses shared filesystem instead of GPU-GPU transfer
+**RPC Pattern**
+Like the NCCL implementation, this uses RPC to coordinate workers:
+- RPC tells workers: "load weights from this directory"
+- Workers read from shared storage independently
+- Each worker calls `model_runner.model.load_weights()`
+**Usage Example**
+.. code-block:: python
+    # Create scheme with shared directory
+    scheme = VLLMDoubleBufferSyncScheme(
+        remote_addr="/shared/weights",
+        num_threads=4
+    )
+    # Sender side (trainer)
+    sender = scheme.create_sender()
+    sender.register_model(policy_model)
+    sender.update_weights()  # Writes to /shared/weights
+    # Receiver side (vLLM worker - AsyncVLLM)
+    receiver = scheme.create_receiver(vllm_engine)
+    receiver.poll_and_apply()  # RPC to workers -> load from /shared/weights
+**Node-to-Node Transfer**
+For distributed setups, you can use different addresses:
+- Sender writes to local path
+- Use NFS, rsync, or other file sync mechanisms
+- Receiver reads from its local mount point
+"""
+from __future__ import annotations
+from typing import Any, Literal
+from tensordict import TensorDict, TensorDictBase
+from torchrl._utils import logger
+from torchrl.weight_update.weight_sync_schemes import WeightStrategy, WeightSyncScheme
+class VLLMDoubleBufferTransport:
+    """Transport for vLLM using double-buffered memory-mapped storage.
+    This transport writes weights to a shared directory and reads them back
+    using TensorDict's memory-mapping capabilities.
+    Args:
+        remote_addr: Directory path where sender writes weights.
+        local_addr: Directory path where receiver reads weights.
+            If None, uses same path as remote_addr (for local testing).
+        num_threads: Number of threads for memmap operations.
+    """
+    def __init__(
+        self, remote_addr: str, local_addr: str | None = None, num_threads: int = 1
+    ):
+        if local_addr is None:
+            local_addr = remote_addr
+        self.remote_addr = remote_addr
+        self.local_addr = local_addr
+        self.num_threads = num_threads
+    def send_weights(self, model_id: str, weights: Any) -> None:
+        """Writes the weights to a shared directory.
+        Args:
+            model_id: Identifier for the model (used for logging).
+            weights: TensorDict or dict of weights to write.
+        """
+        if isinstance(weights, dict):
+            weights = TensorDict(weights, batch_size=[])
+        elif isinstance(weights, TensorDictBase):
+            # Ensure it has a batch_size
+            if weights.batch_size == ():
+                weights = weights.clone()
+        logger.info(f"Writing weights for model '{model_id}' to {self.remote_addr}")
+        weights.memmap(self.remote_addr, num_threads=self.num_threads)
+        logger.info(f"Weights written successfully to {self.remote_addr}")
+    def receive_weights(
+        self,
+        timeout: float | None = None,
+        *,
+        weights: Any = None,
+        model: Any = None,
+        strategy: Any = None,
+    ) -> Any | None:
+        """Reads the weights from the shared directory.
+        Args:
+            timeout: Ignored (file-based transport is instant).
+            weights: Ignored.
+            model: Ignored.
+            strategy: Ignored.
+        Returns:
+            TensorDict with flattened keys containing the weights.
+        """
+        # Timeout is ignored since file-based transport doesn't involve waiting
+        logger.info(f"Reading weights from {self.local_addr}")
+        received_weights = TensorDict.load_memmap(self.local_addr)
+        received_weights = received_weights.flatten_keys(".")
+        logger.info(f"Weights read successfully from {self.local_addr}")
+        return received_weights
+    def check_connection(self) -> bool:
+        """Check if the transport is ready.
+        For file-based transport, always returns True.
+        """
+        return True
+class VLLMDoubleBufferSyncScheme(WeightSyncScheme):
+    """Weight synchronization scheme for vLLM using double-buffered storage.
+    This scheme uses memory-mapped TensorDict storage to transfer weights from
+    a trainer to vLLM inference workers. It's simpler than NCCL-based approaches
+    and doesn't require process group coordination.
+    Args:
+        remote_addr: Directory path where sender writes weights.
+        local_addr: Directory path where receiver reads weights.
+            If None, uses same path as remote_addr (for local testing).
+        num_threads: Number of threads for memmap operations. Defaults to 1.
+        strategy: Weight extraction strategy ("tensordict" or "state_dict").
+    Example:
+        >>> # Local testing (same machine)
+        >>> scheme = VLLMDoubleBufferSyncScheme(
+        ...     remote_addr="/tmp/weights",
+        ...     strategy="tensordict"
+        ... )
+        >>>
+        >>> # Distributed setup (different machines)
+        >>> # On trainer node:
+        >>> scheme = VLLMDoubleBufferSyncScheme(
+        ...     remote_addr="/mnt/shared/weights",  # NFS mount
+        ...     num_threads=4
+        ... )
+        >>>
+        >>> # On vLLM worker node:
+        >>> scheme = VLLMDoubleBufferSyncScheme(
+        ...     remote_addr="/mnt/shared/weights",  # Same NFS mount
+        ...     num_threads=4
+        ... )
+    """
+    def __init__(
+        self,
+        remote_addr: str,
+        local_addr: str | None = None,
+        num_threads: int = 1,
+        strategy: Literal["tensordict", "state_dict"] = "tensordict",
+    ):
+        self.remote_addr = remote_addr
+        self.local_addr = local_addr if local_addr is not None else remote_addr
+        self.num_threads = num_threads
+        self.strategy_name = strategy
+    def create_transport(self, **kwargs) -> VLLMDoubleBufferTransport:
+        """Create transport for double-buffered storage.
+        Args:
+            **kwargs: Not used for file-based transport (kept for API compatibility).
+        Returns:
+            A VLLMDoubleBufferTransport instance.
+        """
+        return VLLMDoubleBufferTransport(
+            remote_addr=self.remote_addr,
+            local_addr=self.local_addr,
+            num_threads=self.num_threads,
+        )
+    def create_sender(self) -> VLLMDoubleBufferWeightSender:
+        """Create a weight sender for the trainer process."""
+        return VLLMDoubleBufferWeightSender(self)
+    def create_receiver(self, vllm_engine) -> VLLMDoubleBufferWeightReceiver:
+        """Create a weight receiver for a vLLM worker process.
+        Args:
+            vllm_engine: The vLLM engine instance (must have .llm_engine.model_executor attribute).
+        """
+        return VLLMDoubleBufferWeightReceiver(self, vllm_engine)
+class VLLMDoubleBufferWeightSender:
+    """Sends weights to vLLM workers using double-buffered storage.
+    This sender extracts weights from a training model and writes them to
+    a shared directory using TensorDict.memmap.
+    Example:
+        >>> sender = scheme.create_sender()
+        >>> sender.register_model(policy_model)
+        >>>
+        >>> # During training loop
+        >>> sender.update_weights()  # Writes current weights to shared storage
+    """
+    def __init__(self, scheme: VLLMDoubleBufferSyncScheme):
+        self._scheme = scheme
+        self._strategy = WeightStrategy(extract_as=scheme.strategy_name)
+        self._model_ref = None
+        self._transport = None
+    def register_model(self, model: Any) -> None:
+        """Register the model to extract weights from.
+        Args:
+            model: The model to extract weights from (e.g., TransformersWrapper).
+        """
+        import weakref
+        self._model_ref = weakref.ref(model)
+        # Create transport on registration
+        self._transport = self._scheme.create_transport()
+        logger.info(
+            f"Registered model for double-buffer weight sync to {self._scheme.remote_addr}"
+        )
+    def update_weights(self, weights: Any | None = None) -> None:
+        """Extract and write weights to shared storage.
+        Args:
+            weights: Optional weights to send. If None, extracts from registered model.
+        """
+        if self._transport is None:
+            raise RuntimeError("Transport not initialized. Call register_model first.")
+        # Extract weights if not provided
+        if weights is None:
+            model = self._model_ref()
+            if model is None:
+                raise RuntimeError("Model reference is dead")
+            weights = self._strategy.extract_weights(model)
+        else:
+            # Ensure weights are in the right format
+            if hasattr(weights, "state_dict"):
+                # It's a module, extract
+                weights = self._strategy.extract_weights(weights)
+        # Send via transport
+        self._transport.send_weights("vllm_model", weights)
+class VLLMDoubleBufferWeightReceiver:
+    """Receives weights in a vLLM worker using double-buffered storage.
+    This receiver reads weights from a shared directory and loads them into
+    the vLLM engine using the engine's load_weights interface.
+    Example:
+        >>> receiver = scheme.create_receiver(vllm_engine)
+        >>>
+        >>> # Poll for new weights
+        >>> if receiver.poll_and_apply():
+        ...     print("Weights updated!")
+    """
+    def __init__(self, scheme: VLLMDoubleBufferSyncScheme, vllm_engine):
+        self._scheme = scheme
+        self._strategy = WeightStrategy(extract_as=scheme.strategy_name)
+        self._vllm_engine = vllm_engine
+        self._transport = scheme.create_transport()
+        logger.info(
+            f"Initialized double-buffer receiver reading from {self._scheme.local_addr}"
+        )
+    def apply_weights(self, weights: TensorDict, inplace: bool = True) -> None:
+        """Apply weights to vLLM engine using RPC.
+        This method uses RPC to tell all vLLM workers to load weights from
+        the shared storage directory. Similar to how AsyncVLLM._update_weights_with_nccl_broadcast_simple
+        uses collective_rpc to coordinate workers.
+        Args:
+            weights: TensorDict with flattened keys containing weights.
+            inplace: Whether to apply weights in place. Default is `True`.
+        """
+        if not inplace:
+            raise ValueError("Cannot apply weights out of place for vLLM double-buffer")
+        logger.info("Applying weights to vLLM engine via RPC")
+        # Convert TensorDict to list of (name, tensor) tuples
+        weights_list = list(weights.items())
+        # Check if this is an AsyncVLLM instance (uses RPC to coordinate workers)
+        if hasattr(self._vllm_engine, "collective_rpc"):
+            # AsyncVLLM path: use RPC to tell all workers to load weights
+            logger.info(
+                f"Using RPC to load {len(weights_list)} weights across all replicas"
+            )
+            # Call collective_rpc to tell workers to load from shared storage
+            # The method 'load_weights_from_storage' will be called on each worker
+            futures = self._vllm_engine.collective_rpc(
+                method="load_weights_from_storage",
+                args=(str(self._scheme.local_addr), self._transport.num_threads),
+            )
+            # Wait for all workers to complete
+            import ray
+            ray.get(futures)
+            logger.info("Weights loaded successfully via RPC")
+        else:
+            # Direct path for local LLM (non-AsyncVLLM)
+            logger.info("Using direct load for local LLM")
+            engine = (
+                self._vllm_engine.llm_engine
+                if hasattr(self._vllm_engine, "llm_engine")
+                else self._vllm_engine
+            )
+            worker = engine.model_executor.driver_worker
+            model = worker.model_runner.model
+            model.load_weights(weights_list)
+            logger.info("Weights loaded successfully")
+    def poll_and_apply(self, timeout: float = 180.0) -> bool:
+        """Poll for and apply weights from shared storage.
+        Args:
+            timeout: Not used for file-based transport (kept for API compatibility).
+        Returns:
+            True if weights were successfully read and applied, False otherwise.
+        """
+        # timeout is not used by file-based transport but kept for API compatibility
+        weights = self._transport.receive_weights()
+        self.apply_weights(weights)
+        return True