PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-win_amd64.whl - Mend

torchrl 0.11.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (394) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cp314-win_amd64.pyd +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/LICENSE +21 -0
torchrl-0.11.0.dist-info/METADATA +1307 -0
torchrl-0.11.0.dist-info/RECORD +394 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/weight_update/_mp.py ADDED Viewed

@@ -0,0 +1,624 @@
+from __future__ import annotations
+from collections.abc import Callable
+from typing import Any
+import torch
+from tensordict import TensorDictBase
+from torch import multiprocessing as mp, nn
+from torchrl.weight_update._shared import SharedMemWeightSyncScheme
+from torchrl.weight_update.utils import _resolve_model
+from torchrl.weight_update.weight_sync_schemes import TransportBackend
+class MultiProcessWeightSyncScheme(SharedMemWeightSyncScheme):
+    """Weight synchronization for multiprocess operations using queues.
+    This scheme creates transports that communicate via multiprocessing queues.
+    Unlike the parent SharedMemWeightSyncScheme which uses shared memory for in-place
+    updates, this scheme sends actual weight copies through queues to workers.
+    A background thread on the receiver side listens for "receive" instructions
+    from the sender. When an instruction arrives, the thread receives the weights
+    from the weight queue and applies them to the model.
+    It follows the same two-phase pattern as SharedMemWeightSyncScheme:
+    1. **init_on_sender()**: Stores the recipe for creating device-specific weights
+       (model reference, devices, mapping functions) without creating actual copies
+    2. **synchronize_weights()**: Creates device-specific weight copies on-demand,
+       sends them sequentially to workers via queues, allowing garbage collection
+       between workers to minimize memory usage
+    This approach avoids holding multiple weight copies in memory simultaneously,
+    which is especially beneficial for large models with many workers.
+    Synchronization flow:
+    - **init_on_sender()**: Store configuration and register worker queues
+    - **synchronize_weights()**: Create and send initial weights on-demand
+    - **init_on_receiver()**: Create receiver that reads from queue
+    - **send()**: Extract and send weight updates, wait for acknowledgments
+    Args:
+        strategy: The weight transmission strategy (default: "tensordict").
+            Can be "tensordict" or "state_dict".
+        sync: If True (default), send() blocks until receiver acknowledges.
+            If False, send() returns immediately (use send_async/wait_async).
+    Example:
+        >>> # Basic usage with collector
+        >>> scheme = MultiProcessWeightSyncScheme()
+        >>> collector = MultiSyncCollector(
+        ...     create_env_fn=[lambda: GymEnv("CartPole-v1")] * 3,
+        ...     policy=policy,
+        ...     frames_per_batch=100,
+        ...     total_frames=1000,
+        ...     weight_sync_schemes={"policy": scheme},
+        ... )
+        >>> # scheme.collect() is called automatically by collector
+        >>> # Weights are created on-demand and sent to workers efficiently
+    Note:
+        The on-demand weight creation means that synchronize_weights() will be
+        slower than if weights were pre-computed, but memory usage is significantly
+        reduced, especially when workers use different devices or when the model
+        is large.
+    """
+    def __init__(self, strategy: str = "tensordict", sync: bool = True):
+        """Initialize the MultiProcessWeightSyncScheme.
+        Args:
+            strategy: The weight transmission strategy (default: "tensordict").
+            sync: If True (default), send() blocks until receiver acknowledges.
+        """
+        super().__init__(strategy, sync=sync)
+        # Override parent's shared transport - we don't use shared memory
+        self._shared_transport = None
+    def _init_on_sender_impl(
+        self,
+        *,
+        model_id: str | None = None,
+        context: Any = None,
+        weights: TensorDictBase | None = None,
+        model: nn.Module | None = None,
+        params_map: dict[int, TensorDictBase] | None = None,
+        devices: list[torch.device] | None = None,
+        device_map_fn: Callable[[int, TensorDictBase], TensorDictBase] | None = None,
+        num_workers: int | None = None,
+        ctx: Any = None,
+        **kwargs,
+    ) -> None:
+        """Initialize on the main process (sender side).
+        This method stores the configuration needed to create device-specific weight
+        copies during synchronization. Weight copies are created on-demand during
+        `synchronize_weights()` to reduce memory usage.
+        Similar to `SharedMemWeightSyncScheme`, this follows a two-phase pattern:
+        1. `init_on_sender()`: Store the recipe for creating weights
+        2. `synchronize_weights()`: Create and send weights on-demand
+        Args:
+            model_id: Identifier for the model being synchronized (e.g., "policy").
+                Required when using context.
+            context: Optional context object (e.g., collector) providing:
+                - num_workers: Number of worker processes
+                - policy_device: List of devices for each worker
+                When provided, model_id is used to resolve the model from context.
+            weights: Pre-extracted weights as TensorDict. Mutually exclusive with
+                model and context. Used when weights are already available.
+            model: Model to extract weights from. Mutually exclusive with weights
+                and context.
+            params_map: Pre-computed mapping of worker_idx to device-specific weights.
+                Most explicit option. When provided, all other parameters must be None.
+            devices: List of devices for each worker. Used with weights or model to
+                automatically create device-specific copies. Length must equal num_workers.
+            device_map_fn: Custom function (worker_idx, weights) -> device_weights.
+                Allows full control over device mapping. Requires num_workers.
+            num_workers: Number of workers. Required with device_map_fn, inferred
+                from devices length otherwise.
+            ctx: The multiprocessing context to use. Defaults to `multiprocessing.get_context()`.
+            **kwargs: Reserved for future use.
+        Examples:
+            Simple usage with collector context (most common):
+            >>> scheme = MultiProcessWeightSyncScheme()
+            >>> collector = MultiSyncCollector(
+            ...     create_env_fn=[lambda: GymEnv("CartPole-v1")] * 3,
+            ...     policy=policy,
+            ...     frames_per_batch=100,
+            ...     weight_sync_schemes={"policy": scheme},
+            ... )
+            >>> # scheme.init_on_sender() is called automatically by collector
+            Direct initialization with explicit devices:
+            >>> scheme = MultiProcessWeightSyncScheme()
+            >>> weights = TensorDict.from_module(policy)
+            >>> scheme.init_on_sender(
+            ...     weights=weights,
+            ...     devices=[torch.device("cpu"), torch.device("cuda:0")],
+            ...     num_workers=2,
+            ... )
+            Advanced: Pre-computed params_map:
+            >>> weights_cpu = TensorDict.from_module(policy)
+            >>> weights_cuda = weights_cpu.to("cuda")
+            >>> scheme.init_on_sender(
+            ...     params_map={0: weights_cpu, 1: weights_cuda, 2: weights_cuda},
+            ...     num_workers=3,
+            ... )
+        """
+        # Get params_map from parent class logic
+        params_map_result = self._get_params_map(
+            context=context,
+            model_id=model_id,
+            weights=weights,
+            model=model,
+            params_map=params_map,
+            devices=devices,
+            device_map_fn=device_map_fn,
+            num_workers=num_workers,
+        )
+        # Store the mapping recipe for later use in synchronize_weights
+        # Don't store params_map directly to save memory - we'll recompute on demand
+        # Note: We don't store context directly to avoid pickle issues -
+        # it's available via _context_ref
+        self._device_mapping_info = {
+            "model_id": model_id,
+            "weights": weights,
+            "model": model,
+            "params_map": params_map,
+            "devices": devices,
+            "device_map_fn": device_map_fn,
+            "num_workers": num_workers
+            if num_workers is not None
+            else len(params_map_result),
+        }
+        # Create per-worker queues for weight distribution
+        # Each worker gets its own queue for receiving weights
+        all_workers = list(params_map_result.keys())
+        if not hasattr(self, "_weight_init_queues"):
+            self._weight_init_queues = {}
+        if ctx is None:
+            ctx = mp.get_context()
+        for worker_idx in all_workers:
+            if worker_idx not in self._weight_init_queues:
+                self._weight_init_queues[worker_idx] = ctx.Queue()
+            # Create instruction queues for background receiver
+            if worker_idx not in self._instruction_queues:
+                self._instruction_queues[worker_idx] = ctx.Queue()
+            # Create ack queues for synchronous mode
+            if worker_idx not in self._ack_queues:
+                self._ack_queues[worker_idx] = ctx.Queue()
+        # Store model_id and context on scheme
+        self.model_id = model_id
+        if context is not None:
+            self.context = context
+        # Register workers with their queues
+        for worker_idx in all_workers:
+            queue = self._weight_init_queues[worker_idx]
+            ack_queue = self._ack_queues[worker_idx]
+            # Create MPTransport for this worker with ack queue
+            transport = MPTransport(weight_queue=queue, ack_queue=ack_queue)
+            self._register_worker_sender(worker_idx=worker_idx, transport=transport)
+    def _init_on_receiver_impl(
+        self,
+        *,
+        model_id: str,
+        context: Any = None,
+        **kwargs,
+    ) -> None:
+        """Initialize on worker process (receiver side).
+        Args:
+            model_id: Identifier for the model being synchronized
+            context: Optional context object providing worker_idx and model
+            **kwargs: Alternative to context (worker_idx, model, etc.)
+        """
+        # Extract parameters from context or kwargs
+        if context is not None:
+            worker_idx = getattr(context, "worker_idx", None)
+            if hasattr(context, "get_model"):
+                model = context.get_model(model_id)
+            else:
+                model = _resolve_model(context, model_id)
+        else:
+            worker_idx = kwargs.get("worker_idx")
+            model = kwargs.get("model")
+        if worker_idx is None:
+            raise ValueError("worker_idx must be provided via context or kwargs")
+        # Get the queue for this worker
+        if worker_idx not in self._weight_init_queues:
+            raise ValueError(
+                f"Worker {worker_idx} not registered. init_on_sender() must be called first."
+            )
+        queue = self._weight_init_queues[worker_idx]
+        ack_queue = self._ack_queues.get(worker_idx)
+        # Store on scheme directly
+        self.model_id = model_id
+        if context is not None:
+            self.context = context
+        # Store instruction and ack queue references for this worker
+        if worker_idx in self._instruction_queues:
+            self._receiver_instruction_queue = self._instruction_queues[worker_idx]
+        if worker_idx in self._ack_queues:
+            self._receiver_ack_queue = self._ack_queues[worker_idx]
+        # Create transport with the worker's queue and ack queue
+        transport = MPTransport(weight_queue=queue, ack_queue=ack_queue)
+        self._register_transport_receiver(transport=transport)
+        if model is not None:
+            self.model = model
+        # Store worker_idx for synchronize_weights
+        self.worker_idx = worker_idx
+    def send(
+        self,
+        weights: Any = None,
+        worker_ids: int | list[int] | None = None,
+    ) -> None:
+        """Send weights synchronously to workers.
+        This method:
+        1. Prepares weights (extracts from model if weights=None)
+        2. Sends weights to the weight queue
+        3. Sends "receive" instruction to workers' background threads
+        4. If sync=True, waits for acknowledgments from those workers
+        Args:
+            weights: Weights to send. Can be:
+                - None: Extract from model via context.get_model(model_id)
+                - nn.Module: Extract weights from module
+                - TensorDict: Use directly
+                - dict: Convert to TensorDict
+            worker_ids: Which workers to send to:
+                - None: Send to all workers (default)
+                - int: Send to single worker
+                - list[int]: Send to specific workers
+        Note: If sync=True (default), this is a blocking call that ensures
+        specified workers are updated before returning.
+        """
+        if not self.initialized_on_sender:
+            raise RuntimeError("Must be initialized on sender before sending weights")
+        if not self.synchronized_on_sender:
+            raise RuntimeError("Must be synchronized on sender before sending weights")
+        model_id = self.model_id
+        context = self.context
+        # Let the scheme prepare the weights
+        prepared_weights = self.prepare_weights(
+            weights=weights,
+            model_id=model_id,
+            strategy=self._strategy,
+            context=context,
+        )
+        transports = list(self._iterate_transports(worker_ids))
+        # Send weights to all workers first via queue (non-blocking)
+        for transport in transports:
+            if hasattr(transport, "send_weights_async"):
+                # For MPTransport, pass model_id; other transports don't need it
+                transport.send_weights_async(prepared_weights, model_id=model_id)
+            else:
+                # Fallback for transports that don't support async send
+                transport.send_weights(prepared_weights)
+        # Send instruction to workers' background threads to receive the weights
+        self._send_instruction(instruction="receive", worker_ids=worker_ids)
+        # Wait for all acknowledgments if in synchronous mode
+        if self.sync:
+            self._wait_for_ack(worker_ids=worker_ids)
+    def _setup_connection_and_weights_on_sender_impl(
+        self,
+        *,
+        worker_idx: int | None = None,
+        weights: Any | None = None,
+    ) -> None:
+        """Synchronize weights with workers before collection starts.
+        Computes device-specific weight copies on-demand and sends them to workers
+        sequentially via queues. This is called once after workers are initialized
+        but before they start collecting data.
+        Unlike send(), this does not wait for acknowledgments since workers are still
+        in their initialization phase.
+        This approach creates weight copies on-demand and sends them sequentially,
+        allowing garbage collection between workers to reduce memory usage.
+        Raises:
+            RuntimeError: If init_on_sender() was not called first.
+        """
+        # Get the device mapping info stored during init_on_sender
+        if not hasattr(self, "_device_mapping_info"):
+            raise RuntimeError(
+                "synchronize_weights() requires init_on_sender() to be called first"
+            )
+        mapping_info = self._device_mapping_info
+        # Get context from weakref
+        context = self.context
+        # Compute params_map on-demand
+        # Extract with explicit type casting for type checker
+        model_id = mapping_info["model_id"]
+        weights = mapping_info["weights"]
+        model = mapping_info["model"]
+        params_map_arg = mapping_info["params_map"]
+        devices = mapping_info["devices"]
+        device_map_fn = mapping_info["device_map_fn"]
+        num_workers = mapping_info["num_workers"]
+        params_map = self._get_params_map(
+            context=context,
+            model_id=model_id,
+            weights=weights,
+            model=model,
+            params_map=params_map_arg,
+            devices=devices,
+            device_map_fn=device_map_fn,
+            num_workers=num_workers,
+        )
+        # Send to workers sequentially via queues (no ACK - workers are still initializing)
+        # This allows GC to clean up each worker's weights before creating the next
+        for i, transport in enumerate(self._iterate_transports()):
+            if worker_idx is not None and i != worker_idx:
+                continue
+            worker_weights = params_map[i]
+            if hasattr(transport, "send_weights_async"):
+                transport.send_weights_async(worker_weights, model_id=self._model_id)
+            else:
+                raise RuntimeError(
+                    f"Transport {type(transport)} does not support async send for synchronization"
+                )
+        # Clean up the mapping info after synchronization
+        delattr(self, "_device_mapping_info")
+    def _setup_connection_and_weights_on_receiver_impl(
+        self, *, worker_idx: int | None = None
+    ) -> None:
+        """Receive initial weights and start background receiver thread.
+        This method:
+        1. Receives initial weights from the sender via queue
+        2. Applies them to the model
+        3. Starts a background thread that listens for "receive" instructions
+        Args:
+            worker_idx: The worker index.
+        """
+        # Use stored worker_idx if not provided
+        if worker_idx is None:
+            worker_idx = self._worker_idx
+        if worker_idx is None:
+            raise RuntimeError(
+                "worker_idx must be provided for _setup_connection_and_weights_on_receiver_impl."
+            )
+        # Receive initial weights from queue via transport
+        if self._receiver_transport is None:
+            raise RuntimeError("Receiver transport not set.")
+        weights = self._receiver_transport.setup_connection_and_weights_on_receiver(
+            worker_idx=worker_idx,
+            weights=self.weights,
+            model=self.model,
+            strategy=self._strategy,
+        )
+        # Store received weights for later use
+        if weights is not None:
+            self._receiver_weights = weights
+        # Apply weights to model
+        if weights is not None and self.model is not None:
+            self._strategy.apply_weights(self.model, weights, inplace=False)
+        # Start background receiver thread
+        self._start_background_receiver()
+    def _background_receive_loop(self):
+        """Background thread loop that waits for instructions and receives weights.
+        This loop:
+        1. Waits for a "receive" instruction from the sender
+        2. Receives weights from the weight queue
+        3. Applies them to the model
+        4. Sends an acknowledgment back to the sender
+        5. Repeats until stop event is set or "stop" instruction received
+        """
+        from torchrl._utils import logger as torchrl_logger
+        while not self._stop_event.is_set():
+            try:
+                instruction = self._wait_for_instruction()
+                if instruction is None:
+                    # Stop event was set or timeout
+                    continue
+                if instruction == "receive":
+                    # Receive weights from transport (blocking)
+                    if self._receiver_transport is not None:
+                        weights = self._receiver_transport.receive_weights(
+                            model=self.model,
+                            strategy=self._strategy,
+                        )
+                        if weights is not None:
+                            # Cascade weight update to sub-collectors if context supports it
+                            model_id = self._model_id or "policy"
+                            if self.context is not None and hasattr(
+                                self.context, "update_policy_weights_"
+                            ):
+                                self.context.update_policy_weights_(
+                                    model_id=model_id, policy_or_weights=weights
+                                )
+                    # Send acknowledgment
+                    self._send_ack("updated")
+                elif instruction == "stop":
+                    break
+                else:
+                    torchrl_logger.warning(
+                        f"MultiProcessWeightSyncScheme: Unknown instruction: {instruction}"
+                    )
+            except Exception as e:
+                if not self._stop_event.is_set():
+                    torchrl_logger.warning(
+                        f"MultiProcessWeightSyncScheme: Background receiver error: {e}"
+                    )
+    def create_transport(self, **kwargs) -> TransportBackend:
+        """Create an MPTransport using the provided queue.
+        Note:
+            This is used internally by init_on_sender/init_on_receiver.
+        """
+        queue = kwargs.get("queue")
+        return MPTransport(weight_queue=queue, ack_queue=None)
+class MPTransport:
+    """Multiprocessing transport using queues.
+    This transport uses queues for weight distribution and synchronization.
+    Similar to SharedMemTransport's queue-based approach, MPTransport uses
+    queues to send initial weights to workers during synchronization.
+    Initialization flow:
+    - synchronize_weights() extracts weights and sends to all workers via queues
+    - Workers receive the initial weights via setup_connection_and_weights_on_receiver()
+    - Subsequent updates use send_weights_async() followed by acknowledgments
+    Args:
+        weight_queue (mp.Queue): The queue to use for sending weights.
+        ack_queue (mp.Queue): The queue to use for receiving acknowledgments.
+        timeout (float): The timeout for waiting for acknowledgment. Default is 10 seconds.
+    """
+    def __init__(self, weight_queue, ack_queue=None, timeout: float = 10.0):
+        self.timeout = timeout
+        self.weight_queue = weight_queue
+        self.ack_queue = ack_queue
+    def send_weights_async(self, weights: Any, model_id: str = "policy") -> None:
+        """Send weights through the queue without waiting for acknowledgment.
+        Use wait_ack() to wait for acknowledgment after sending to all workers.
+        """
+        # Send in format expected by worker loop: ((model_id, weights), "update_weights")
+        self.weight_queue.put(((model_id, weights), "update_weights"))
+    def receive_weights(
+        self,
+        timeout: float | None = None,
+        *,
+        weights: Any = None,
+        model: Any = None,
+        strategy: Any = None,
+    ) -> Any | None:
+        """Receive weights from the queue (used in worker process).
+        This method only handles weight update messages. Other messages
+        (like "close", "continue", etc.) are ignored and should be handled
+        by the main worker loop.
+        Args:
+            timeout: Maximum time to wait for weights (seconds).
+                     None means use the transport's default timeout.
+            weights: Ignored (weights come from queue).
+            model: The model to apply weights to.
+            strategy: Strategy for applying weights to the model.
+        Returns:
+            The received weights, or None if no data available.
+        """
+        # Use transport's default timeout if not specified
+        if timeout is None:
+            timeout = self.timeout
+        data_in, msg = self.weight_queue.get(timeout=timeout)
+        if msg == "update_weights":
+            # data_in is (model_id, weights) - we ignore model_id, scheme knows it
+            _model_id, received_weights = data_in
+            # Apply weights to model if provided
+            if model is not None and strategy is not None:
+                strategy.apply_weights(model, received_weights)
+            return received_weights
+        else:
+            raise ValueError(f"Expected 'update_weights' but got {msg}")
+    def setup_connection_and_weights_on_sender(self) -> None:
+        """No-op for MPTransport - weights are sent via scheme's synchronize_weights().
+        The actual sending happens in MultiProcessWeightSyncScheme._setup_connection_and_weights_on_sender_impl(), which:
+        1. Extracts weights from the context (e.g., collector.policy)
+        2. Calls send_weights_async() on all worker transports
+        3. Sends initial weights through queues to all workers
+        This is similar to SharedMemTransport.setup_connection_and_weights_on_sender() which
+        sends shared memory buffer references via queues.
+        """
+    def setup_connection_and_weights_on_receiver(
+        self,
+        *,
+        worker_idx: int,
+        weights: Any = None,
+        model: Any = None,
+        strategy: Any = None,
+    ) -> Any:
+        """Receive initial weights from sender during worker initialization.
+        This method blocks waiting for the initial weights to be sent from the main process
+        via queue. Similar to SharedMemTransport.setup_connection_and_weights_on_receiver() which receives
+        shared memory buffer references via queues, this receives the actual weights via queues.
+        The received weights are then applied to the worker's model by the scheme's synchronize_weights().
+        Args:
+            worker_idx: The worker index (used for logging/debugging).
+            weights: Ignored (weights come from queue).
+            model: Ignored.
+            strategy: Ignored.
+        Returns:
+            The received weights if available, None otherwise (weights will come later via receive()).
+        """
+        # Wait for initial weights (blocking)
+        data_in, msg = self.weight_queue.get(timeout=self.timeout)
+        if msg == "update_weights":
+            # data_in is (model_id, weights), extract just the weights
+            _, received_weights = data_in
+            return received_weights
+        else:
+            raise ValueError(f"Expected 'update_weights' but got {msg}")