PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-win_amd64.whl - Mend

torchrl 0.11.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (394) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cp314-win_amd64.pyd +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/LICENSE +21 -0
torchrl-0.11.0.dist-info/METADATA +1307 -0
torchrl-0.11.0.dist-info/RECORD +394 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/services/base.py ADDED Viewed

@@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Any
+class ServiceBase(ABC):
+    """Base class for distributed service registries.
+    A service registry manages distributed actors/services that can be accessed
+    across multiple workers. Common use cases include:
+    - Tokenizers shared across inference workers
+    - Replay buffers for distributed training
+    - Model registries for centralized model storage
+    - Metrics aggregators
+    The registry provides a dict-like interface for registering and accessing
+    services by name.
+    """
+    @abstractmethod
+    def register(self, name: str, service_factory: type, *args, **kwargs) -> Any:
+        """Register a service factory and create the service actor.
+        This method registers a service with the given name and immediately
+        creates the corresponding actor. The service becomes globally visible
+        to all workers in the cluster.
+        Args:
+            name: Unique identifier for the service. This name is used to
+                retrieve the service later.
+            service_factory: Class to instantiate as a remote actor.
+            *args: Positional arguments to pass to the service constructor.
+            **kwargs: Keyword arguments for both actor configuration and
+                service constructor. Actor configuration options are backend-specific
+                (e.g., num_cpus, num_gpus for Ray).
+        Returns:
+            The remote actor handle.
+        Raises:
+            ValueError: If a service with this name already exists.
+        """
+    @abstractmethod
+    def get(self, name: str) -> Any:
+        """Get a service by name.
+        Retrieves a previously registered service. If the service was registered
+        by another worker, this method will find it in the distributed registry.
+        Args:
+            name: Service identifier.
+        Returns:
+            The remote actor handle for the service.
+        Raises:
+            KeyError: If the service is not found.
+        """
+    @abstractmethod
+    def __contains__(self, name: str) -> bool:
+        """Check if a service is registered.
+        Args:
+            name: Service identifier.
+        Returns:
+            True if the service exists, False otherwise.
+        """
+    @abstractmethod
+    def list(self) -> list[str]:
+        """List all registered service names.
+        Returns:
+            List of service names currently registered in the cluster.
+        """
+    @abstractmethod
+    def reset(self) -> None:
+        """Reset the service registry.
+        This removes all registered services and cleans up associated resources.
+        After calling reset(), the registry will be empty and all service actors
+        will be terminated.
+        Warning:
+            This is a destructive operation. All services will be terminated and
+            any ongoing work will be interrupted.
+        """
+    def __getitem__(self, name: str) -> Any:
+        """Dict-like access: services["tokenizer"]."""
+        return self.get(name)
+    def __setitem__(self, name: str, service_factory: type) -> None:
+        """Dict-like registration: services["tokenizer"] = TokenizerClass.
+        Note: This only supports service_factory without additional arguments.
+        For full control, use register() method instead.
+        """
+        self.register(name, service_factory)

torchrl/services/ray_service.py ADDED Viewed

@@ -0,0 +1,453 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+from typing import Any
+from torchrl._utils import logger
+from torchrl.services.base import ServiceBase
+RAY_ERR = None
+try:
+    import ray
+    _has_ray = True
+except ImportError as err:
+    _has_ray = False
+    RAY_ERR = err
+class _ServiceRegistryActor:
+    """Internal actor that maintains the list of registered services.
+    This is a lightweight actor (1 CPU) that tracks which services have been
+    registered in a namespace. This ensures we only list our own services,
+    not other named actors in Ray.
+    """
+    def __init__(self):
+        self._services: set[str] = set()
+    def add(self, name: str) -> None:
+        """Add a service to the registry."""
+        self._services.add(name)
+    def remove(self, name: str) -> None:
+        """Remove a service from the registry."""
+        self._services.discard(name)
+    def list(self) -> list[str]:
+        """List all registered services."""
+        return sorted(self._services)
+    def clear(self) -> None:
+        """Clear all registered services."""
+        self._services.clear()
+    def contains(self, name: str) -> bool:
+        """Check if a service is registered."""
+        return name in self._services
+class RayService(ServiceBase):
+    """Ray-based distributed service registry.
+    This class uses Ray's named actors feature to provide truly distributed
+    service discovery. When a service is registered by any worker, it becomes
+    immediately accessible to all other workers in the Ray cluster.
+    Services are registered as Ray actors with globally unique names. This
+    ensures that:
+    1. Services persist independently of the registering worker
+    2. All workers see the same services instantly
+    3. No custom synchronization is needed
+    Args:
+        ray_init_config (dict, optional): Configuration for ray.init(). Only
+            used if Ray is not already initialized. Common options:
+            - address (str): Ray cluster address, or "auto" to auto-detect
+            - num_cpus (int): Number of CPUs to use
+            - num_gpus (int): Number of GPUs to use
+        namespace (str, optional): Ray namespace for service isolation. Services
+            in different namespaces are isolated from each other. Defaults to
+            "torchrl_services".
+    Examples:
+        >>> # Basic usage
+        >>> services = RayService()
+        >>> services.register("tokenizer", TokenizerClass, num_cpus=1)
+        >>> tokenizer = services["tokenizer"]
+        >>>
+        >>> # With Ray options for dynamic configuration
+        >>> actor = services.register(
+        ...     "model",
+        ...     ModelClass,
+        ...     num_cpus=2,
+        ...     num_gpus=1,
+        ...     memory=10 * 1024**3,
+        ...     max_concurrency=4
+        ... )
+        >>>
+        >>> # Check and retrieve
+        >>> if "tokenizer" in services:
+        ...     tok = services["tokenizer"]
+        >>>
+        >>> # List all services
+        >>> print(services.list())
+        ['tokenizer', 'model']
+    """
+    def __init__(
+        self,
+        ray_init_config: dict | None = None,
+        namespace: str = "torchrl_services",
+    ):
+        if not _has_ray:
+            raise ImportError(
+                "Ray is required for RayService. Install with: pip install ray"
+            ) from RAY_ERR
+        self._namespace = namespace
+        self._ensure_ray_initialized(ray_init_config)
+        self._registry_actor = self._get_or_create_registry_actor()
+    def _ensure_ray_initialized(self, ray_init_config: dict | None = None):
+        """Initialize Ray if not already initialized."""
+        if not ray.is_initialized():
+            config = ray_init_config or {}
+            # Ensure namespace is set
+            if "namespace" not in config:
+                config["namespace"] = self._namespace
+            logger.info(f"Initializing Ray with namespace '{self._namespace}'")
+            ray.init(**config)
+        else:
+            # Ray already initialized - check if namespace matches
+            context = ray.get_runtime_context()
+            current_namespace = context.namespace
+            if current_namespace != self._namespace:
+                logger.warning(
+                    f"Ray already initialized with namespace '{current_namespace}', "
+                    f"but RayService is using namespace '{self._namespace}'. "
+                    f"Services may not be visible across namespaces."
+                )
+    def _make_service_name(self, name: str) -> str:
+        """Create the full actor name with namespace prefix."""
+        return f"{self._namespace}::service::{name}"
+    def _get_registry_actor_name(self) -> str:
+        """Get the name of the registry actor for this namespace."""
+        return f"{self._namespace}::_registry"
+    def _get_or_create_registry_actor(self):
+        """Get or create the registry actor for this namespace."""
+        registry_name = self._get_registry_actor_name()
+        try:
+            # Try to get existing registry
+            registry = ray.get_actor(registry_name, namespace=self._namespace)
+            return registry
+        except ValueError:
+            # Registry doesn't exist, create it
+            RemoteRegistry = ray.remote(_ServiceRegistryActor)
+            registry = RemoteRegistry.options(
+                name=registry_name,
+                namespace=self._namespace,
+                lifetime="detached",
+                num_cpus=1,
+            ).remote()
+            logger.info(
+                f"Created service registry actor for namespace '{self._namespace}'"
+            )
+            return registry
+    def register(self, name: str, service_factory: type, *args, **kwargs) -> Any:
+        """Register a service and create a named Ray actor.
+        This method creates a Ray actor with a globally unique name. The actor
+        becomes immediately visible to all workers in the cluster.
+        Args:
+            name: Service identifier. Must be unique within the namespace.
+            service_factory: Class to instantiate as a Ray actor.
+            *args: Positional arguments for the service constructor.
+            **kwargs: Both Ray actor options (num_cpus, num_gpus, memory, etc.)
+                and service constructor arguments. Ray will filter out the actor
+                options it recognizes.
+        Returns:
+            The Ray actor handle.
+        Raises:
+            ValueError: If a service with this name already exists.
+        Examples:
+            >>> services = RayService()
+            >>>
+            >>> # Basic registration
+            >>> tokenizer = services.register("tokenizer", TokenizerClass)
+            >>>
+            >>> # With Ray resource specification
+            >>> buffer = services.register(
+            ...     "buffer",
+            ...     ReplayBuffer,
+            ...     num_cpus=2,
+            ...     num_gpus=0,
+            ...     size=1000000
+            ... )
+            >>>
+            >>> # With advanced Ray options
+            >>> model = services.register(
+            ...     "model",
+            ...     ModelClass,
+            ...     num_cpus=4,
+            ...     num_gpus=1,
+            ...     memory=20 * 1024**3,
+            ...     max_concurrency=10,
+            ...     max_restarts=3,
+            ... )
+        """
+        full_name = self._make_service_name(name)
+        # Check if service already exists in our registry
+        if ray.get(self._registry_actor.contains.remote(name)):
+            raise ValueError(
+                f"Service '{name}' already exists in namespace '{self._namespace}'. "
+                f"Use a different name or retrieve the existing service with get()."
+            )
+        # Create the Ray remote class
+        # First, make it a remote class
+        remote_cls = ray.remote(service_factory)
+        # Then apply options including the name
+        options = {
+            "name": full_name,
+            "namespace": self._namespace,
+            "lifetime": "detached",
+        }
+        # Extract Ray-specific options from kwargs
+        ray_options = [
+            "num_cpus",
+            "num_gpus",
+            "memory",
+            "object_store_memory",
+            "resources",
+            "accelerator_type",
+            "max_concurrency",
+            "max_restarts",
+            "max_task_retries",
+            "max_pending_calls",
+            "scheduling_strategy",
+        ]
+        for opt in ray_options:
+            if opt in kwargs:
+                options[opt] = kwargs.pop(opt)
+        # Apply options and create the actor
+        remote_actor = remote_cls.options(**options).remote(*args, **kwargs)
+        # Add to registry
+        ray.get(self._registry_actor.add.remote(name))
+        logger.info(
+            f"Registered service '{name}' as Ray actor '{full_name}' "
+            f"with options: {options}"
+        )
+        return remote_actor
+    def get(self, name: str) -> Any:
+        """Get a service by name.
+        Retrieves a service actor by name. The service can have been registered
+        by any worker in the cluster.
+        Args:
+            name: Service identifier.
+        Returns:
+            The Ray actor handle.
+        Raises:
+            KeyError: If the service is not found.
+        Examples:
+            >>> services = RayService()
+            >>> tokenizer = services.get("tokenizer")
+            >>> # Use the actor
+            >>> result = ray.get(tokenizer.encode.remote("Hello world"))
+        """
+        # Check registry first
+        if not ray.get(self._registry_actor.contains.remote(name)):
+            raise KeyError(
+                f"Service '{name}' not found in namespace '{self._namespace}'. "
+                f"Available services: {self.list()}"
+            )
+        full_name = self._make_service_name(name)
+        try:
+            actor = ray.get_actor(full_name, namespace=self._namespace)
+            return actor
+        except ValueError as e:
+            # Service in registry but actor missing - inconsistency
+            logger.warning(
+                f"Service '{name}' in registry but actor not found. "
+                f"Removing from registry."
+            )
+            ray.get(self._registry_actor.remove.remote(name))
+            raise KeyError(
+                f"Service '{name}' actor not found (removed from registry). "
+                f"Available services: {self.list()}"
+            ) from e
+    def __contains__(self, name: str) -> bool:
+        """Check if a service is registered.
+        Args:
+            name: Service identifier.
+        Returns:
+            True if the service exists, False otherwise.
+        Examples:
+            >>> services = RayService()
+            >>> if "tokenizer" in services:
+            ...     tokenizer = services["tokenizer"]
+            ... else:
+            ...     services.register("tokenizer", TokenizerClass)
+        """
+        return ray.get(self._registry_actor.contains.remote(name))
+    def list(self) -> list[str]:
+        """List all registered service names.
+        Returns a list of all services in the current namespace. This includes
+        services registered by any worker.
+        Returns:
+            List of service names (without namespace prefix).
+        Examples:
+            >>> services = RayService()
+            >>> services.register("tokenizer", TokenizerClass)
+            >>> services.register("buffer", ReplayBuffer)
+            >>> print(services.list())
+            ['buffer', 'tokenizer']
+        """
+        return ray.get(self._registry_actor.list.remote())
+    def reset(self) -> None:
+        """Reset the service registry by terminating all actors.
+        This method:
+        1. Terminates all service actors in the current namespace
+        2. Clears the registry actor's internal state
+        After calling reset(), all services will be removed and their actors
+        will be killed. Any ongoing work will be interrupted.
+        Warning:
+            This is a destructive operation that affects all workers in the
+            namespace. Use with caution.
+        Examples:
+            >>> services = RayService(namespace="experiment")
+            >>> services.register("tokenizer", TokenizerClass)
+            >>> print(services.list())
+            ['tokenizer']
+            >>> services.reset()
+            >>> print(services.list())
+            []
+        """
+        service_names = self.list()
+        for name in service_names:
+            full_name = self._make_service_name(name)
+            try:
+                actor = ray.get_actor(full_name, namespace=self._namespace)
+                ray.kill(actor)
+                logger.info(f"Terminated service '{name}' (actor: {full_name})")
+            except ValueError:
+                # Actor already gone or doesn't exist
+                logger.warning(f"Service '{name}' not found during reset")
+            except Exception as e:
+                logger.warning(f"Failed to terminate service '{name}': {e}")
+        # Clear the registry
+        ray.get(self._registry_actor.clear.remote())
+        logger.info(
+            f"Reset complete for namespace '{self._namespace}'. Terminated {len(service_names)} services."
+        )
+    def shutdown(self, raise_on_error: bool = True) -> None:
+        """Shutdown the RayService by shutting down the Ray cluster."""
+        try:
+            self.reset()
+            # kill the registry actor
+            registry_actor = ray.get_actor(
+                self._get_registry_actor_name(), namespace=self._namespace
+            )
+            ray.kill(registry_actor, no_restart=True)
+        except Exception as e:
+            if raise_on_error:
+                raise e
+            else:
+                logger.warning(f"Error shutting down RayService: {e}")
+    def register_with_options(
+        self,
+        name: str,
+        service_factory: type,
+        actor_options: dict[str, Any],
+        **constructor_kwargs,
+    ) -> Any:
+        """Register a service with explicit separation of Ray options and constructor args.
+        This is a convenience method that makes it explicit which arguments are for
+        Ray actor configuration vs. the service constructor. It's functionally
+        equivalent to `register()` but more readable for complex configurations.
+        Args:
+            name: Service identifier.
+            service_factory: Class to instantiate as a Ray actor.
+            actor_options: Dictionary of Ray actor options (num_cpus, num_gpus, etc.).
+            **constructor_kwargs: Arguments to pass to the service constructor.
+        Returns:
+            The Ray actor handle.
+        Examples:
+            >>> services = RayService()
+            >>>
+            >>> # Explicit separation of concerns
+            >>> model = services.register_with_options(
+            ...     "model",
+            ...     ModelClass,
+            ...     actor_options={
+            ...         "num_cpus": 4,
+            ...         "num_gpus": 1,
+            ...         "memory": 20 * 1024**3,
+            ...         "max_concurrency": 10
+            ...     },
+            ...     model_path="/path/to/checkpoint",
+            ...     batch_size=32
+            ... )
+            >>>
+            >>> # Equivalent to:
+            >>> # services.register(
+            >>> #     "model", ModelClass,
+            >>> #     num_cpus=4, num_gpus=1, memory=20*1024**3, max_concurrency=10,
+            >>> #     model_path="/path/to/checkpoint", batch_size=32
+            >>> # )
+        """
+        # Merge actor_options into kwargs for register()
+        merged_kwargs = {**actor_options, **constructor_kwargs}
+        return self.register(name, service_factory, **merged_kwargs)

torchrl/testing/__init__.py ADDED Viewed

@@ -0,0 +1,107 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Testing utilities for TorchRL.
+This module provides helper classes and utilities for testing TorchRL functionality,
+particularly for distributed and Ray-based tests that require importable classes.
+"""
+from torchrl.testing.assertions import (
+    check_rollout_consistency_multikey_env,
+    rand_reset,
+    rollout_consistency_assertion,
+)
+from torchrl.testing.env_creators import (
+    get_transform_out,
+    make_envs,
+    make_multithreaded_env,
+)
+from torchrl.testing.gym_helpers import (
+    BREAKOUT_VERSIONED,
+    CARTPOLE_VERSIONED,
+    CLIFFWALKING_VERSIONED,
+    HALFCHEETAH_VERSIONED,
+    PENDULUM_VERSIONED,
+    PONG_VERSIONED,
+)
+from torchrl.testing.llm_mocks import (
+    DummyStrDataLoader,
+    DummyTensorDataLoader,
+    MockTransformerConfig,
+    MockTransformerModel,
+    MockTransformerOutput,
+)
+from torchrl.testing.modules import (
+    BiasModule,
+    call_value_nets,
+    LSTMNet,
+    NonSerializableBiasModule,
+)
+from torchrl.testing.ray_helpers import (
+    WorkerTransformerDoubleBuffer,
+    WorkerTransformerNCCL,
+    WorkerVLLMDoubleBuffer,
+    WorkerVLLMNCCL,
+)
+from torchrl.testing.utils import (
+    capture_log_records,
+    dtype_fixture,
+    generate_seeds,
+    get_available_devices,
+    get_default_devices,
+    IS_WIN,
+    make_tc,
+    mp_ctx,
+    PYTHON_3_9,
+    retry,
+    set_global_var,
+)
+__all__ = [
+    # Assertions
+    "check_rollout_consistency_multikey_env",
+    "rand_reset",
+    "rollout_consistency_assertion",
+    # Environment creators
+    "get_transform_out",
+    "make_envs",
+    "make_multithreaded_env",
+    # Gym helpers
+    "BREAKOUT_VERSIONED",
+    "CARTPOLE_VERSIONED",
+    "CLIFFWALKING_VERSIONED",
+    "HALFCHEETAH_VERSIONED",
+    "PENDULUM_VERSIONED",
+    "PONG_VERSIONED",
+    # LLM mocks
+    "DummyStrDataLoader",
+    "DummyTensorDataLoader",
+    "MockTransformerConfig",
+    "MockTransformerModel",
+    "MockTransformerOutput",
+    # Modules
+    "BiasModule",
+    "call_value_nets",
+    "LSTMNet",
+    "NonSerializableBiasModule",
+    # Ray helpers
+    "WorkerTransformerDoubleBuffer",
+    "WorkerTransformerNCCL",
+    "WorkerVLLMDoubleBuffer",
+    "WorkerVLLMNCCL",
+    # Utils
+    "capture_log_records",
+    "dtype_fixture",
+    "generate_seeds",
+    "get_available_devices",
+    "get_default_devices",
+    "IS_WIN",
+    "make_tc",
+    "mp_ctx",
+    "PYTHON_3_9",
+    "retry",
+    "set_global_var",
+]