PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314t-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/weight_update/llm/vllm_nccl.py ADDED Viewed

@@ -0,0 +1,710 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""vLLM weight synchronization for the v2 API.
+This module provides weight synchronization for vLLM engines using a two-layer
+architecture:
+**Architecture Overview**
+The weight synchronization uses two separate layers:
+1. **RPC Layer** (Coordination)
+   - Signals workers when a collective operation will begin
+   - Can be implemented with different backends (Ray, torch.distributed.rpc, etc.)
+   - Tells vLLM workers: "prepare to receive weights via collective"
+   - Currently supports Ray as the RPC backend
+2. **Collective Layer** (Data Transfer)
+   - Performs the actual weight broadcast using NCCL
+   - High-bandwidth GPU-to-GPU communication
+   - All ranks participate simultaneously in the collective
+**Why Two Layers?**
+Separating RPC and collectives provides:
+- **Flexibility**: Swap RPC backends (Ray, RPC, gRPC) without changing collectives
+- **Clarity**: Coordination logic separate from data transfer
+- **Performance**: Use optimal transport for each (RPC for signals, NCCL for data)
+**Flow Example (Ray Backend)**
+.. code-block::
+    Trainer (rank 0)                    vLLM Workers (ranks 1+)
+    ================                    =======================
+    # 1. RPC: Signal collective start
+    trainer.update_weights() ---------> [Ray RPC] --------> receiver.init_all_workers_group()
+                                                            "I'm ready for collective"
+    # 2. Collective: Broadcast weights
+    NCCL broadcast -------------------- [GPU-GPU] ---------> NCCL receive
+    (high bandwidth)                                        (parallel)
+    # 3. RPC: Confirmation (optional)
+    "broadcast done" <----------------- [Ray RPC] --------- "weights applied"
+**Extending to Other Backends**
+To add a new RPC backend (e.g., torch.distributed.rpc):
+1. Implement an RPC coordinator in the sender/receiver
+2. Replace Ray remote calls with your RPC mechanism
+3. Keep the collective layer unchanged (it's backend-agnostic)
+.. rubric:: Example
+.. code-block:: python
+    class TorchRPCVLLMReceiver(VLLMWeightReceiver):
+        def init_all_workers_group(self, metadata):
+            # Use torch.distributed.rpc instead of Ray
+            torch.distributed.rpc.rpc_sync(
+                "trainer",
+                lambda: "ready",
+            )
+            super().init_all_workers_group(metadata)  # Collective init
+**Current Implementation (Ray Backend)**
+.. code-block:: python
+    # Trainer actor (provides RPC endpoint)
+    trainer = RayWorkerTransformer.as_remote().options(
+        name="Trainer"  # Named for discovery
+    ).remote(scheme_config)
+    # Receiver actor (uses RPC to coordinate)
+    receiver = RayWorkerVLLM.as_remote().remote(
+        scheme_config, trainer_actor_name="Trainer"
+    )
+    # RPC Layer: Both actors call init() via Ray remote calls
+    # This coordinates the collective handshake
+    ray.get([trainer.init.remote(), receiver.init.remote()])
+    # RPC Layer: Trigger update via Ray remote call
+    # Collective Layer: NCCL broadcast happens automatically
+    ray.get(trainer.update_weights.remote(modify_weights=True))
+In this setup:
+- **Ray provides RPC**: Named actors, ``remote()`` calls, ``ray.get()``
+- **NCCL provides collectives**: GPU-GPU weight broadcast
+- **Loose coupling**: Can replace Ray with any RPC mechanism
+"""
+from __future__ import annotations
+import time
+from typing import Any, Literal
+import torch
+import torch.distributed
+from tensordict import TensorDictBase
+from torchrl._utils import logger as torchrl_logger
+from torchrl.modules.llm.backends import stateless_init_process_group
+from torchrl.weight_update.weight_sync_schemes import WeightStrategy, WeightSyncScheme
+# ============================================================================
+# vLLM Transport using Collective Communication
+# ============================================================================
+class VLLMCollectiveTransport:
+    """Transport for vLLM using collective communication (NCCL).
+    **COLLECTIVE LAYER ONLY** - This class handles the data transfer layer.
+    RPC coordination is handled separately by the caller (sender/receiver).
+    This transport uses PyTorch distributed collectives to broadcast weights
+    from a trainer (rank 0) to vLLM workers (ranks 1+).
+    **Separation of Concerns:**
+    - This class: NCCL collective operations (GPU-GPU data transfer)
+    - Caller (sender/receiver): RPC coordination (when to start collective)
+    Args:
+        master_address: Address of the master node for distributed init.
+        master_port: Port of the master node for distributed init.
+        rank: Rank of this process (0 for trainer, 1+ for vLLM workers).
+        world_size: Total number of processes (1 + num_replicas * gpus_per_replica).
+        device: Device to use for communication (typically cuda:0).
+        vllm_engine: Optional vLLM engine reference (for receiver side).
+    Note:
+        The RPC layer (e.g., Ray remote calls) must ensure all ranks call
+        init_all_workers_group() simultaneously before any collective operations.
+    """
+    def __init__(
+        self,
+        master_address: str,
+        master_port: int,
+        rank: int | None,
+        world_size: int,
+        device: torch.device | str | int | None = None,
+        vllm_engine: Any | None = None,
+    ):
+        self.master_address = master_address
+        self.master_port = master_port
+        self.rank = rank
+        self.world_size = world_size
+        self.vllm_engine = vllm_engine
+        self._comm_group = None
+        self._model_metadata = None
+        # Ray sets CUDA_VISIBLE_DEVICES, so each actor sees only device 0
+        # PyNcclCommunicator expects an integer device index
+        if device is None:
+            self.device = 0  # Default to device 0 (Ray convention)
+        elif isinstance(device, str):
+            # Extract device index from "cuda:X"
+            self.device = int(device.split(":")[-1]) if ":" in device else 0
+        elif isinstance(device, torch.device):
+            # Extract index from torch.device
+            self.device = device.index if device.index is not None else 0
+        else:
+            self.device = device
+    def init_all_workers_group(
+        self, model_metadata: dict[str, tuple[torch.dtype, torch.Size]]
+    ):
+        """Initialize the collective communication group.
+        Args:
+            model_metadata: Dict mapping param names to (dtype, shape) tuples.
+        """
+        self._model_metadata = model_metadata
+        if self.rank == 0:
+            # Trainer side - initialize process group
+            torchrl_logger.debug(
+                f"Initializing trainer collective group: rank={self.rank}, world_size={self.world_size}, device={self.device}"
+            )
+            # Ray sets CUDA_VISIBLE_DEVICES, so we always use device 0
+            # Set CUDA device before initializing NCCL to avoid segfaults
+            torch.cuda.set_device(self.device)
+            torchrl_logger.debug(f"Set CUDA device to {self.device}")
+            self._comm_group = stateless_init_process_group(
+                self.master_address,
+                self.master_port,
+                self.rank,
+                self.world_size,
+                device=self.device,
+            )
+            torchrl_logger.debug("Trainer collective group initialized successfully")
+        else:
+            # vLLM worker side - initialize through engine
+            if self.vllm_engine is None:
+                raise ValueError("vllm_engine must be provided for worker ranks")
+            torchrl_logger.debug(
+                "Initializing vLLM worker collective group through engine"
+            )
+            # Call vLLM engine's init method - it returns futures for all workers
+            # Workers will start NCCL init in background threads and return immediately
+            refs = self.vllm_engine.init_weight_update_group(
+                master_address=self.master_address,
+                master_port=self.master_port,
+            )
+            # Wait for RPCs to complete - ensures workers have dispatched their NCCL init threads
+            import ray
+            ray.get(refs)
+            torchrl_logger.debug(
+                f"All {len(refs)} vLLM workers have dispatched NCCL init RPCs"
+            )
+            # Small delay to ensure worker background threads have entered the NCCL collective
+            # This prevents a race where the trainer starts NCCL before workers are ready
+            time.sleep(0.2)
+            self._comm_group = True  # Mark as initialized
+            torchrl_logger.debug(
+                "vLLM workers should now be blocked in NCCL collective, ready for trainer"
+            )
+    def send_weights(self, model_id: str, weights: Any) -> None:
+        """Broadcast weights to all workers using NCCL.
+        This method follows AsyncVLLM's periodic-mono pattern:
+        For each weight: RPC → NCCL broadcast → Wait for RPC completion
+        This should only be called from rank 0 (trainer).
+        Args:
+            model_id: ID of the model (used for logging).
+            weights: TensorDict or dict of weights to broadcast.
+        """
+        # This code is a duplicate from AsyncVLLM
+        # We are waiting for vLLM server to accept tokens endpoints, at which point we will be
+        # able to remove all dependencies on Ray for vllm distributed features.
+        # This will allow a more natural integration with the sender/receiver API.
+        import ray
+        if self.rank != 0:
+            raise RuntimeError("send_weights should only be called from rank 0")
+        if self._comm_group is None:
+            raise RuntimeError(
+                "Communication group not initialized. Call init_all_workers_group first."
+            )
+        if self._model_metadata is None:
+            raise RuntimeError("Model metadata not set")
+        if self.vllm_engine is None:
+            raise RuntimeError(
+                "vllm_engine must be provided to sender for RPC coordination"
+            )
+        # Set CUDA device for this operation
+        torch.cuda.set_device(self.device)
+        # Convert to dict if needed
+        if isinstance(weights, TensorDictBase):
+            weights_dict = weights.to_dict()
+        else:
+            weights_dict = weights
+        torchrl_logger.debug(
+            f"Broadcasting {len(weights_dict)} weights for model '{model_id}'"
+        )
+        # Broadcast each weight using periodic-mono pattern (like AsyncVLLM)
+        for name, (dtype, shape) in self._model_metadata.items():
+            if name not in weights_dict:
+                raise ValueError(
+                    f"Weight '{name}' not found in weights. Weights keys: {list(weights_dict.keys())[:10]}..."
+                )
+            tensor = weights_dict[name].to(f"cuda:{self.device}")
+            dtype_name = str(dtype).split(".")[-1]  # "torch.float16" -> "float16"
+            # Step 1: Send RPC to workers for this weight
+            futures = self.vllm_engine.collective_rpc(
+                "update_weight", args=(name, dtype_name, tuple(shape))
+            )
+            # Step 2: Immediately broadcast this weight
+            self._comm_group.broadcast(
+                tensor,
+                src=0,
+                stream=torch.cuda.current_stream(),
+            )
+            # Step 3: Wait for workers to complete this weight
+            ray.get(futures)
+            del tensor
+        torch.cuda.synchronize()
+        torchrl_logger.debug(f"Broadcast complete for model '{model_id}'")
+    def receive_weights(
+        self,
+        timeout: float | None = None,
+        *,
+        weights: Any = None,
+        model: Any = None,
+        strategy: Any = None,
+    ) -> Any | None:
+        """Receive weights from broadcaster.
+        This should only be called from worker ranks (rank > 0).
+        This method is called by vLLM engine internally through collective operations.
+        Args:
+            timeout: Ignored (vLLM handles synchronization internally).
+            weights: Ignored.
+            model: Ignored.
+            strategy: Ignored.
+        Returns:
+            None - vLLM handles weight application internally via collectives.
+        """
+        # vLLM handles this through its own collective operations
+        # The weights are received and applied by the engine during broadcast
+        return None
+    def check_connection(self) -> bool:
+        """Check if the communication group is initialized."""
+        return self._comm_group is not None
+# ============================================================================
+# vLLM Weight Synchronization Components
+# ============================================================================
+class VLLMWeightSyncScheme(WeightSyncScheme):
+    """Weight synchronization scheme for vLLM engines.
+    This scheme uses collective communication (NCCL) to broadcast weights from
+    a trainer to vLLM inference workers with parallelism support.
+    Args:
+        master_address: Address of the master node. Defaults to "localhost".
+        master_port: Port of the master node. If None, will auto-assign.
+        gpus_per_replica: Number of GPUs per replica (tp_size × dp_size × pp_size).
+        num_replicas: Number of vLLM engine replicas. Defaults to 1.
+        strategy: Weight extraction strategy ("tensordict" or "state_dict").
+        device: Device index to use for communication. Defaults to 0.
+            Note: When using Ray, each actor sees only its assigned GPU as device 0
+            due to CUDA_VISIBLE_DEVICES isolation. You should typically use 0.
+    .. warning::
+        Collective communication requires ALL ranks to participate simultaneously.
+        Both the sender (trainer, rank 0) and all receivers (vLLM workers, ranks 1+)
+        must call ``init_all_workers_group()`` at approximately the same time for the collective
+        handshake to succeed. Do NOT wait for one init to complete before starting
+        the other - start both and wait for both together.
+    Note:
+        The world_size for NCCL will be: 1 (trainer) + num_replicas × gpus_per_replica (vLLM workers)
+    Example:
+        >>> # Single replica with 2 GPUs (e.g., tp_size=2)
+        >>> scheme = VLLMWeightSyncScheme(
+        ...     master_port=12345,
+        ...     gpus_per_replica=2,
+        ...     num_replicas=1,
+        ...     strategy="tensordict"
+        ... )  # world_size = 1 + 1*2 = 3
+        >>>
+        >>> # Multiple replicas with 1 GPU each
+        >>> scheme = VLLMWeightSyncScheme(
+        ...     master_port=12345,
+        ...     gpus_per_replica=1,
+        ...     num_replicas=2,
+        ...     strategy="tensordict"
+        ... )  # world_size = 1 + 2*1 = 3
+        >>>
+        >>> # Multiple replicas with tp_size=2, dp_size=1, pp_size=1
+        >>> scheme = VLLMWeightSyncScheme(
+        ...     master_port=12345,
+        ...     gpus_per_replica=2,  # 2*1*1
+        ...     num_replicas=3,
+        ...     strategy="tensordict"
+        ... )  # world_size = 1 + 3*2 = 7
+        >>>
+        >>> # In trainer process (rank 0)
+        >>> sender = VLLMWeightSender(scheme)
+        >>> sender.register_model(policy)
+        >>>
+        >>> # In vLLM worker process (rank 1+)
+        >>> receiver = VLLMWeightReceiver(scheme, vllm_engine)
+        >>>
+        >>> # IMPORTANT: Both must init simultaneously for collective handshake
+        >>> # With Ray:
+        >>> init_sender = sender_actor.init_all_workers_group.remote(metadata)
+        >>> init_receiver = receiver_actor.init_all_workers_group.remote(metadata)
+        >>> ray.get([init_sender, init_receiver])  # Wait for both together
+        >>>
+        >>> # After init, updates work normally
+        >>> sender.update_weights()
+        >>> # Weights are received automatically via collectives
+    """
+    def __init__(
+        self,
+        master_address: str | None = None,
+        master_port: int | None = None,
+        gpus_per_replica: int = 1,
+        num_replicas: int = 1,
+        strategy: Literal["tensordict", "state_dict"] = "tensordict",
+        device: torch.device | str | int = 0,
+    ):
+        self.master_address = (
+            master_address if master_address is not None else "localhost"
+        )
+        self.master_port = master_port
+        self.gpus_per_replica = gpus_per_replica
+        self.num_replicas = num_replicas
+        self.strategy_name = strategy
+        # Ray sets CUDA_VISIBLE_DEVICES for each actor, so device 0 is typical
+        self.device = device
+        # Auto-assign port if not provided
+        if self.master_port is None:
+            try:
+                from vllm.utils import get_open_port
+                self.master_port = get_open_port()
+            except ImportError:
+                # Fallback if vLLM not available
+                import socket
+                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                    s.bind(("", 0))
+                    self.master_port = s.getsockname()[1]
+    def create_transport(self, **kwargs) -> VLLMCollectiveTransport:
+        """Create transport for collective communication.
+        For vLLM, this creates a transport but requires additional setup via init_all_workers_group().
+        This method is required by the base class but transport creation for vLLM
+        is more complex and typically handled by sender/receiver initialization.
+        Args:
+            **kwargs: Not used for vLLM (kept for API compatibility).
+        Returns:
+            A VLLMCollectiveTransport instance (needs init_all_workers_group() to be called).
+        """
+        # Return a transport with default rank 0 (trainer)
+        # Actual initialization happens in sender/receiver
+        world_size = 1 + self.num_replicas * self.gpus_per_replica
+        return VLLMCollectiveTransport(
+            master_address=self.master_address,
+            master_port=self.master_port,
+            rank=0,
+            world_size=world_size,
+            device=self.device,
+        )
+    def create_sender(self) -> VLLMWeightSender:
+        """Create a weight sender for the trainer process."""
+        return VLLMWeightSender(self)
+    def create_receiver(self, vllm_engine) -> VLLMWeightReceiver:
+        """Create a weight receiver for a vLLM worker process.
+        Args:
+            vllm_engine: The vLLM engine instance (must implement RLvLLMEngine interface).
+        """
+        return VLLMWeightReceiver(self, vllm_engine)
+class VLLMWeightSender:
+    """Sends weights to vLLM workers using collective communication.
+    **RPC + Collective Implementation**
+    This class implements both layers:
+    1. **RPC Layer**: Currently uses Ray remote calls (implicit in test setup)
+       - Can be extended to other RPC backends (torch.distributed.rpc, gRPC)
+       - In the test, Ray actors provide the RPC mechanism
+    2. **Collective Layer**: Uses VLLMCollectiveTransport for NCCL broadcast
+       - Broadcasts weights from trainer (rank 0) to workers (ranks 1+)
+       - High-bandwidth GPU-to-GPU transfer
+    **Extending RPC Backends**
+    To use a different RPC backend, subclass and override coordination:
+    .. code-block:: python
+        class TorchRPCVLLMSender(VLLMWeightSender):
+            def update_weights(self, weights=None):
+                # Custom RPC: Signal workers to prepare
+                for worker in self.workers:
+                    torch.distributed.rpc.rpc_async(worker, "prepare_receive")
+                # Then do collective (unchanged)
+                super().update_weights(weights)
+    """
+    def __init__(self, scheme: VLLMWeightSyncScheme):
+        self._scheme = scheme
+        self._strategy = WeightStrategy(extract_as=scheme.strategy_name)
+        self._model_ref = None
+        self._transport = None
+        self._model_metadata = None
+    def register_model(self, model: Any) -> None:
+        """Register the model to extract weights from."""
+        import weakref
+        self._model_ref = weakref.ref(model)
+    def init_all_workers_group(
+        self,
+        model_metadata: dict[str, tuple[torch.dtype, torch.Size]],
+        vllm_engine: Any | None = None,
+    ):
+        """Initialize the collective communication group.
+        Args:
+            model_metadata: Dict mapping param names to (dtype, shape) tuples.
+            vllm_engine: Optional vLLM engine for RPC coordination. Required for NCCL broadcasts.
+        """
+        self._model_metadata = model_metadata
+        self._vllm_engine = vllm_engine
+        # Create transport for trainer (rank 0)
+        world_size = 1 + self._scheme.num_replicas * self._scheme.gpus_per_replica
+        self._transport = VLLMCollectiveTransport(
+            master_address=self._scheme.master_address,
+            master_port=self._scheme.master_port,
+            rank=0,  # Trainer is always rank 0
+            world_size=world_size,
+            device=self._scheme.device,
+            vllm_engine=vllm_engine,
+        )
+        torchrl_logger.debug(
+            f"Initializing transport from sender with world_size={world_size}"
+        )
+        self._transport.init_all_workers_group(model_metadata)
+    def update_weights(self, weights: Any | None = None) -> None:
+        """Extract and broadcast weights to vLLM workers.
+        Args:
+            weights: Optional weights to send. If None, extracts from registered model.
+        """
+        if self._transport is None:
+            raise RuntimeError(
+                "Transport not initialized. Call init_all_workers_group first."
+            )
+        # Extract weights if not provided
+        if weights is None:
+            model = self._model_ref()
+            if model is None:
+                raise RuntimeError("Model reference is dead")
+            weights = self._strategy.extract_weights(model)
+        else:
+            # Ensure weights are in the right format
+            if hasattr(weights, "state_dict"):
+                # It's a module, extract
+                weights = self._strategy.extract_weights(weights)
+        # Send via transport
+        self._transport.send_weights("vllm_model", weights)
+class VLLMWeightReceiver:
+    """Receives weights in a vLLM worker using collective communication.
+    **RPC + Collective Implementation**
+    This class implements both layers:
+    1. **RPC Layer**: Currently uses Ray for coordination
+       - `init()` in test uses Ray `ray.get_actor()` to find trainer
+       - Fetches metadata via Ray remote call
+       - Signals readiness to participate in collective
+    2. **Collective Layer**: Participates in NCCL broadcast
+       - Receives weights via collective operations
+       - vLLM engine applies weights internally during broadcast
+    **Extending RPC Backends**
+    To use a different RPC backend:
+    .. code-block:: python
+        class TorchRPCVLLMReceiver(VLLMWeightReceiver):
+            def init(self):
+                # Custom RPC: Get metadata from trainer
+                metadata = torch.distributed.rpc.rpc_sync(
+                    "trainer",
+                    lambda: get_metadata()
+                )
+                # Then init collective (unchanged)
+                self.receiver.init_all_workers_group(metadata)
+    Note:
+        The RPC and collective layers are loosely coupled. The RPC layer
+        ensures all ranks are ready before the collective starts, but the
+        actual data transfer is independent of the RPC mechanism.
+    """
+    def __init__(self, scheme: VLLMWeightSyncScheme, vllm_engine):
+        self._scheme = scheme
+        self._strategy = WeightStrategy(extract_as=scheme.strategy_name)
+        self._vllm_engine = vllm_engine
+        self._transport = None
+    def init_all_workers_group(
+        self, model_metadata: dict[str, tuple[torch.dtype, torch.Size]]
+    ):
+        """Initialize the collective communication group.
+        Args:
+            model_metadata: Dict mapping param names to (dtype, shape) tuples.
+        """
+        # For vLLM receiver, we use rank=1 as a placeholder
+        # The engine handles actual rank assignment internally for all workers
+        world_size = 1 + self._scheme.num_replicas * self._scheme.gpus_per_replica
+        self._transport = VLLMCollectiveTransport(
+            master_address=self._scheme.master_address,
+            master_port=self._scheme.master_port,
+            rank=None,  # Placeholder - engine assigns actual ranks
+            world_size=world_size,
+            device=self._scheme.device,
+            vllm_engine=self._vllm_engine,
+        )
+        torchrl_logger.debug(
+            f"Initializing transport from receiver with world_size={world_size}."
+        )
+        self._transport.init_all_workers_group(model_metadata)
+    def apply_weights(self, weights: Any, inplace: bool = True) -> None:
+        """Apply weights to vLLM engine.
+        Args:
+            weights: The weights to apply.
+            inplace: Whether to apply weights in place. Default is `True`.
+        Note: For vLLM, weights are applied automatically during the collective
+        broadcast operation. This method is a no-op but kept for API consistency.
+        """
+        # vLLM handles weight application through its collective operations
+        # The weights are already applied by the time broadcast completes
+    def poll_and_apply(self, timeout: float = 0.1) -> bool:
+        """Poll for and apply weights.
+        Returns:
+            False - vLLM uses push-based updates via collectives, not polling.
+        """
+        # vLLM uses collective broadcasts (push), not polling
+        # This is handled by the engine's collective operations
+        return False
+# ============================================================================
+# Helper Functions
+# ============================================================================
+def get_model_metadata(model) -> dict[str, tuple[torch.dtype, torch.Size]]:
+    """Extract model metadata from a model.
+    Args:
+        model: A model with state_dict() or a model wrapper.
+    Returns:
+        Dict mapping parameter names to (dtype, shape) tuples.
+    Note:
+        This function must extract keys in the same format as WeightStrategy.extract_weights()
+        to ensure consistency between metadata and actual weight keys during broadcasting.
+    """
+    # Extract state_dict directly from the model
+    # This ensures keys match what extract_weights() will produce
+    if hasattr(model, "state_dict"):
+        if hasattr(model, "merge_and_unload"):
+            # LoRA model
+            sd = model.merge_and_unload().state_dict()
+        else:
+            sd = model.state_dict()
+    else:
+        raise TypeError(f"Cannot extract state_dict from {type(model)}")
+    return {k: (v.dtype, v.shape) for k, v in sd.items()}