PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/data/datasets/vd4rl.py ADDED Viewed

@@ -0,0 +1,432 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import functools
+import importlib
+import json
+import os
+import pathlib
+import shutil
+import tempfile
+from collections import defaultdict
+from collections.abc import Callable
+from pathlib import Path
+import numpy as np
+import torch
+from tensordict import PersistentTensorDict, TensorDict
+from torch import multiprocessing as mp
+from torchrl._utils import KeyDependentDefaultDict, logger as torchrl_logger
+from torchrl.data.datasets.common import BaseDatasetExperienceReplay
+from torchrl.data.datasets.utils import _get_root_dir
+from torchrl.data.replay_buffers.samplers import Sampler
+from torchrl.data.replay_buffers.storages import TensorStorage
+from torchrl.data.replay_buffers.writers import ImmutableDatasetWriter, Writer
+from torchrl.envs.transforms import Compose, Resize, ToTensorImage
+from torchrl.envs.utils import _classproperty
+_has_tqdm = importlib.util.find_spec("tqdm", None) is not None
+_has_h5py = importlib.util.find_spec("h5py", None) is not None
+_has_hf_hub = importlib.util.find_spec("huggingface_hub", None) is not None
+THIS_DIR = pathlib.Path(__file__).parent
+class VD4RLExperienceReplay(BaseDatasetExperienceReplay):
+    """V-D4RL experience replay dataset.
+    This class downloads the H5/npz data from V-D4RL and processes it in a mmap
+    format, which makes indexing (and therefore sampling) faster.
+    Learn more about V-D4RL here: https://arxiv.org/abs/2206.04779
+    The `"pixels"` entry is located at the root of the data, and all the data
+    that is not reward, done-state, action or pixels is moved under a `"state"`
+    node.
+    The data format follows the :ref:`TED convention <TED-format>`.
+    Args:
+        dataset_id (str): the dataset to be downloaded. Must be part of
+            VD4RLExperienceReplay.available_datasets.
+        batch_size (int): Batch-size used during sampling. Can be overridden by
+            `data.sample(batch_size)` if necessary.
+    Keyword Args:
+        root (Path or str, optional): The V-D4RL dataset root directory.
+            The actual dataset memory-mapped files will be saved under
+            `<root>/<dataset_id>`. If none is provided, it defaults to
+            `~/.cache/torchrl/atari`.vd4rl`.
+        download (bool or str, optional): Whether the dataset should be downloaded if
+            not found. Defaults to ``True``. Download can also be passed as ``"force"``,
+            in which case the downloaded data will be overwritten.
+        sampler (Sampler, optional): the sampler to be used. If none is provided
+            a default RandomSampler() will be used.
+        writer (Writer, optional): the writer to be used. If none is provided
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
+        collate_fn (callable, optional): merges a list of samples to form a
+            mini-batch of Tensor(s)/outputs.  Used when using batched
+            loading from a map-style dataset.
+        pin_memory (bool): whether pin_memory() should be called on the rb
+            samples.
+        prefetch (int, optional): number of next batches to be prefetched
+            using multithreading.
+        transform (Transform, optional): Transform to be executed when sample() is called.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
+        split_trajs (bool, optional): if ``True``, the trajectories will be split
+            along the first dimension and padded to have a matching shape.
+            To split the trajectories, the ``"done"`` signal will be used, which
+            is recovered via ``done = truncated | terminated``. In other words,
+            it is assumed that any ``truncated`` or ``terminated`` signal is
+            equivalent to the end of a trajectory. For some datasets from
+            ``D4RL``, this may not be true. It is up to the user to make
+            accurate choices regarding this usage of ``split_trajs``.
+            Defaults to ``False``.
+        totensor (bool, optional): if ``True``, a :class:`~torchrl.envs.transforms.ToTensorImage`
+            transform will be included in the transform list (if not automatically
+            detected). Defaults to ``True``.
+        image_size (int, list of ints or None): if not ``None``, this argument
+            will be used to create a :class:`~torchrl.envs.transforms.Resize`
+            transform that will be appended to the transform list. Supports
+            `int` types (square resizing) or a list/tuple of `int` (rectangular
+            resizing). Defaults to ``None`` (no resizing).
+        num_workers (int, optional): the number of workers to download the files.
+            Defaults to ``0`` (no multiprocessing).
+    Attributes:
+        available_datasets: a list of accepted entries to be downloaded. These
+            names correspond to the directory path in the huggingface dataset
+            repository. If possible, the list will be dynamically retrieved from
+            huggingface. If no internet connection is available, it a cached
+            version will be used.
+    .. note:: Since not all experience replay have start and stop signals, we
+        do not mark the episodes in the retrieved dataset.
+    Examples:
+        >>> import torch
+        >>> torch.manual_seed(0)
+        >>> from torchrl.data.datasets import VD4RLExperienceReplay
+        >>> d = VD4RLExperienceReplay("main/walker_walk/random/64px", batch_size=32,
+        ...     image_size=50)
+        >>> for batch in d:
+        ...     break
+        >>> print(batch)
+        TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([32, 6]), device=cpu, dtype=torch.float32, is_shared=False),
+                done: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                index: Tensor(shape=torch.Size([32]), device=cpu, dtype=torch.int64, is_shared=False),
+                is_init: Tensor(shape=torch.Size([32]), device=cpu, dtype=torch.bool, is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: TensorDict(
+                            fields={
+                                height: Tensor(shape=torch.Size([32]), device=cpu, dtype=torch.float32, is_shared=False),
+                                orientations: Tensor(shape=torch.Size([32, 14]), device=cpu, dtype=torch.float32, is_shared=False),
+                                velocity: Tensor(shape=torch.Size([32, 9]), device=cpu, dtype=torch.float32, is_shared=False)},
+                            batch_size=torch.Size([32]),
+                            device=cpu,
+                            is_shared=False),
+                        pixels: Tensor(shape=torch.Size([32, 3, 50, 50]), device=cpu, dtype=torch.float32, is_shared=False),
+                        reward: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    batch_size=torch.Size([32]),
+                    device=cpu,
+                    is_shared=False),
+                observation: TensorDict(
+                    fields={
+                        height: Tensor(shape=torch.Size([32]), device=cpu, dtype=torch.float32, is_shared=False),
+                        orientations: Tensor(shape=torch.Size([32, 14]), device=cpu, dtype=torch.float32, is_shared=False),
+                        velocity: Tensor(shape=torch.Size([32, 9]), device=cpu, dtype=torch.float32, is_shared=False)},
+                    batch_size=torch.Size([32]),
+                    device=cpu,
+                    is_shared=False),
+                pixels: Tensor(shape=torch.Size([32, 3, 50, 50]), device=cpu, dtype=torch.float32, is_shared=False),
+                terminated: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                truncated: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+            batch_size=torch.Size([32]),
+            device=cpu,
+            is_shared=False)
+    """
+    def __init__(
+        self,
+        dataset_id,
+        batch_size: int,
+        *,
+        root: str | Path | None = None,
+        download: bool = True,
+        sampler: Sampler | None = None,
+        writer: Writer | None = None,
+        collate_fn: Callable | None = None,
+        pin_memory: bool = False,
+        prefetch: int | None = None,
+        transform: torchrl.envs.Transform | None = None,  # noqa-F821
+        split_trajs: bool = False,
+        totensor: bool = True,
+        image_size: int | list[int] | None = None,
+        num_workers: int = 0,
+        **env_kwargs,
+    ):
+        if not _has_h5py or not _has_hf_hub:
+            raise ImportError(
+                "h5py and huggingface_hub are required for V-D4RL datasets."
+            )
+        if dataset_id not in self.available_datasets:
+            raise ValueError(
+                f"The dataset_id {dataset_id} isn't part of the accepted datasets. "
+                f"To check which dataset can be downloaded, call `{type(self)}.available_datasets`."
+            )
+        self.dataset_id = dataset_id
+        if root is None:
+            root = _get_root_dir("vd4rl")
+            os.makedirs(root, exist_ok=True)
+        self.root = root
+        self.split_trajs = split_trajs
+        self.download = download
+        self.num_workers = num_workers
+        if self.download == "force" or (self.download and not self._is_downloaded()):
+            if self.download == "force":
+                try:
+                    if os.path.exists(self.data_path_root):
+                        shutil.rmtree(self.data_path_root)
+                    if self.data_path != self.data_path_root:
+                        shutil.rmtree(self.data_path)
+                except FileNotFoundError:
+                    pass
+            storage = self._download_and_preproc(
+                dataset_id, data_path=self.data_path, num_workers=self.num_workers
+            )
+        elif self.split_trajs and not os.path.exists(self.data_path):
+            storage = self._make_split()
+        else:
+            storage = self._load()
+        if totensor and transform is None:
+            transform = ToTensorImage(
+                in_keys=["pixels", ("next", "pixels")], shape_tolerant=True
+            )
+        elif totensor and (
+            not isinstance(transform, Compose)
+            or not any(isinstance(t, ToTensorImage) for t in transform)
+        ):
+            transform = Compose(
+                transform,
+                ToTensorImage(
+                    in_keys=["pixels", ("next", "pixels")], shape_tolerant=True
+                ),
+            )
+        if image_size is not None:
+            transform = Compose(
+                transform, Resize(image_size, in_keys=["pixels", ("next", "pixels")])
+            )
+        storage = TensorStorage(storage)
+        if writer is None:
+            writer = ImmutableDatasetWriter()
+        super().__init__(
+            storage=storage,
+            sampler=sampler,
+            writer=writer,
+            collate_fn=collate_fn,
+            pin_memory=pin_memory,
+            prefetch=prefetch,
+            transform=transform,
+            batch_size=batch_size,
+        )
+    @classmethod
+    def _parse_datasets(cls):
+        from huggingface_hub import HfApi
+        dataset = HfApi().dataset_info("conglu/vd4rl")
+        sibs = defaultdict(list)
+        for sib in dataset.siblings:
+            if sib.rfilename.endswith("npz") or sib.rfilename.endswith("hdf5"):
+                path = Path(sib.rfilename)
+                sibs[path.parent].append(path)
+        return sibs
+    @classmethod
+    def _hf_hub_download(cls, subfolder, filename, *, tmpdir):
+        from huggingface_hub import hf_hub_download
+        return hf_hub_download(
+            "conglu/vd4rl",
+            subfolder=subfolder,
+            filename=filename,
+            repo_type="dataset",
+            cache_dir=str(tmpdir),
+        )
+    @classmethod
+    def _download_and_preproc(cls, dataset_id, data_path, num_workers):
+        tds = []
+        with tempfile.TemporaryDirectory() as tmpdir:
+            sibs = cls._parse_datasets()
+            total_steps = 0
+            paths_to_proc = []
+            files_to_proc = []
+            for path in sibs:
+                if dataset_id not in str(path):
+                    continue
+                for file in sibs[path]:
+                    paths_to_proc.append(str(path))
+                    files_to_proc.append(str(file.parts[-1]))
+            func = functools.partial(cls._hf_hub_download, tmpdir=tmpdir)
+            if num_workers > 0:
+                with mp.Pool(num_workers) as pool:
+                    files = pool.starmap(
+                        func,
+                        zip(paths_to_proc, files_to_proc),
+                    )
+                    files = list(files)
+            else:
+                files = [
+                    func(subfolder, filename)
+                    for (subfolder, filename) in zip(paths_to_proc, files_to_proc)
+                ]
+            torchrl_logger.info("Downloaded, processing files")
+            if _has_tqdm:
+                import tqdm
+                pbar = tqdm.tqdm(files)
+            else:
+                pbar = files
+            for local_path in pbar:
+                if _has_tqdm:
+                    pbar.set_description(f"file={local_path}")
+                # we memmap temporarily the files for faster access later
+                if local_path.endswith("hdf5"):
+                    td = (
+                        PersistentTensorDict.from_h5(local_path)
+                        .to_tensordict()
+                        .memmap(num_threads=32)
+                    )
+                else:
+                    td = _from_npz(local_path).memmap(num_threads=32)
+                td.unlock_()
+                if total_steps == 0:
+                    tdc = cls._process_data(td.clone())
+                    td_save = tdc[0]
+                tds.append(td)
+                total_steps += td.shape[0]
+        # From this point, the local paths are non needed anymore
+        td_save = td_save.expand(total_steps).memmap_like(data_path, num_threads=32)
+        torchrl_logger.info(f"Saved tensordict: {td_save}")
+        idx0 = 0
+        idx1 = 0
+        while len(files):
+            _ = files.pop(0)
+            td = tds.pop(0)
+            td = cls._process_data(td)
+            idx1 += td.shape[0]
+            td_save[idx0:idx1] = td
+            idx0 = idx1
+        return td_save
+    @classmethod
+    def _process_data(cls, td: TensorDict):
+        for name in list(td.keys()):
+            # move remaining data
+            if name not in _NAME_MATCH:
+                td.rename_key_(name, ("state", name))
+            elif name != _NAME_MATCH[name]:
+                td.rename_key_(name, _NAME_MATCH[name])
+        if ("next", "reward") in td.keys(True):
+            td.set(("next", "reward"), td.get(("next", "reward")).unsqueeze(-1))
+        if ("next", "done") in td.keys(True) and ("next", "terminated") in td.keys(
+            True
+        ):
+            # first unsqueeze
+            td.set(("next", "done"), td.get(("next", "done")).unsqueeze(-1))
+            td.set(("next", "terminated"), td.get(("next", "terminated")).unsqueeze(-1))
+            # create root vals
+            td.set("done", torch.zeros_like(td.get(("next", "done"))))
+            td.set("terminated", torch.zeros_like(td.get(("next", "terminated"))))
+            # Add truncated
+            td.set(
+                ("next", "truncated"),
+                td.get(("next", "done")) & ~td.get(("next", "terminated")),
+            )
+            td.set("truncated", torch.zeros_like(td.get(("next", "truncated"))))
+        pixels = td.get("pixels")
+        subtd = td._get_sub_tensordict(slice(0, -1))
+        subtd.set(("next", "pixels"), pixels[1:], inplace=True)
+        state = td.get("state", None)
+        if state is not None:
+            subtd.set(("next", "state"), state[1:], inplace=True)
+        return td
+    @_classproperty
+    def available_datasets(cls):
+        return cls._available_datasets()
+    @classmethod
+    def _available_datasets(cls):
+        # try to gather paths from hf
+        try:
+            sibs = cls._parse_datasets()
+            return [str(path)[6:] for path in sibs]
+        except Exception:
+            # return the default datasets
+            with open(THIS_DIR / "vd4rl.json") as file:
+                return json.load(file)
+    def _make_split(self):
+        from torchrl.collectors.utils import split_trajectories
+        td_data = TensorDict.load_memmap(self.data_path_root)
+        td_data = split_trajectories(td_data).memmap_(self.data_path)
+        return td_data
+    def _load(self):
+        return TensorDict.load_memmap(self.data_path)
+    @property
+    def data_path(self):
+        if self.split_trajs:
+            return Path(self.root) / (self.dataset_id + "_split")
+        return self.data_path_root
+    @property
+    def data_path_root(self):
+        return Path(self.root) / self.dataset_id
+    def _is_downloaded(self):
+        return os.path.exists(self.data_path_root)
+def _from_npz(npz_path):
+    npz = np.load(npz_path)
+    npz_dict = {file: npz[file] for file in npz.files}
+    return TensorDict.from_dict(npz_dict, auto_batch_size=True)
+_NAME_MATCH = KeyDependentDefaultDict(lambda x: x)
+_NAME_MATCH.update(
+    {
+        "is_first": "is_init",
+        "is_last": ("next", "done"),
+        "is_terminal": ("next", "terminated"),
+        "reward": ("next", "reward"),
+        "image": "pixels",
+        "observation": "pixels",
+        "discount": "discount",
+        "action": "action",
+    }
+)

torchrl/data/llm/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from .dataset import (
+    create_infinite_iterator,
+    get_dataloader,
+    TensorDictTokenizer,
+    TokenizedDatasetLoader,
+)
+from .history import add_chat_template, ContentBase, History
+from .prompt import PromptData, PromptTensorDictTokenizer
+from .reward import PairwiseDataset, RewardData
+from .topk import TopKRewardSelector
+from .utils import AdaptiveKLController, ConstantKLController, RolloutFromModel
+__all__ = [
+    "AdaptiveKLController",
+    "ConstantKLController",
+    "ContentBase",
+    "History",
+    "PairwiseDataset",
+    "PromptData",
+    "add_chat_template",
+    "PromptTensorDictTokenizer",
+    "RewardData",
+    "RolloutFromModel",
+    "TensorDictTokenizer",
+    "TokenizedDatasetLoader",
+    "create_infinite_iterator",
+    "get_dataloader",
+    "TopKRewardSelector",
+]