PyPI - torchrl - Versions diffs - 0.11.0__cp314-cp314-macosx_11_0_arm64.whl - Mend

torchrl 0.11.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

benchmarks/benchmark_batched_envs.py +104 -0
benchmarks/conftest.py +91 -0
benchmarks/ecosystem/gym_env_throughput.py +321 -0
benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
benchmarks/requirements.txt +7 -0
benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
benchmarks/test_collectors_benchmark.py +240 -0
benchmarks/test_compressed_storage_benchmark.py +145 -0
benchmarks/test_envs_benchmark.py +133 -0
benchmarks/test_llm.py +101 -0
benchmarks/test_non_tensor_env_benchmark.py +70 -0
benchmarks/test_objectives_benchmarks.py +1199 -0
benchmarks/test_replaybuffer_benchmark.py +254 -0
sota-check/README.md +35 -0
sota-implementations/README.md +142 -0
sota-implementations/a2c/README.md +39 -0
sota-implementations/a2c/a2c_atari.py +291 -0
sota-implementations/a2c/a2c_mujoco.py +273 -0
sota-implementations/a2c/utils_atari.py +240 -0
sota-implementations/a2c/utils_mujoco.py +160 -0
sota-implementations/bandits/README.md +7 -0
sota-implementations/bandits/dqn.py +126 -0
sota-implementations/cql/cql_offline.py +198 -0
sota-implementations/cql/cql_online.py +249 -0
sota-implementations/cql/discrete_cql_offline.py +180 -0
sota-implementations/cql/discrete_cql_online.py +227 -0
sota-implementations/cql/utils.py +471 -0
sota-implementations/crossq/crossq.py +271 -0
sota-implementations/crossq/utils.py +320 -0
sota-implementations/ddpg/ddpg.py +231 -0
sota-implementations/ddpg/utils.py +325 -0
sota-implementations/decision_transformer/dt.py +163 -0
sota-implementations/decision_transformer/lamb.py +167 -0
sota-implementations/decision_transformer/online_dt.py +178 -0
sota-implementations/decision_transformer/utils.py +562 -0
sota-implementations/discrete_sac/discrete_sac.py +243 -0
sota-implementations/discrete_sac/utils.py +324 -0
sota-implementations/dqn/README.md +30 -0
sota-implementations/dqn/dqn_atari.py +272 -0
sota-implementations/dqn/dqn_cartpole.py +236 -0
sota-implementations/dqn/utils_atari.py +132 -0
sota-implementations/dqn/utils_cartpole.py +90 -0
sota-implementations/dreamer/README.md +129 -0
sota-implementations/dreamer/dreamer.py +586 -0
sota-implementations/dreamer/dreamer_utils.py +1107 -0
sota-implementations/expert-iteration/README.md +352 -0
sota-implementations/expert-iteration/ei_utils.py +770 -0
sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
sota-implementations/gail/gail.py +327 -0
sota-implementations/gail/gail_utils.py +68 -0
sota-implementations/gail/ppo_utils.py +157 -0
sota-implementations/grpo/README.md +273 -0
sota-implementations/grpo/grpo-async.py +437 -0
sota-implementations/grpo/grpo-sync.py +435 -0
sota-implementations/grpo/grpo_utils.py +843 -0
sota-implementations/grpo/requirements_gsm8k.txt +11 -0
sota-implementations/grpo/requirements_ifeval.txt +16 -0
sota-implementations/impala/README.md +33 -0
sota-implementations/impala/impala_multi_node_ray.py +292 -0
sota-implementations/impala/impala_multi_node_submitit.py +284 -0
sota-implementations/impala/impala_single_node.py +261 -0
sota-implementations/impala/utils.py +184 -0
sota-implementations/iql/discrete_iql.py +230 -0
sota-implementations/iql/iql_offline.py +164 -0
sota-implementations/iql/iql_online.py +225 -0
sota-implementations/iql/utils.py +437 -0
sota-implementations/multiagent/README.md +74 -0
sota-implementations/multiagent/iql.py +237 -0
sota-implementations/multiagent/maddpg_iddpg.py +266 -0
sota-implementations/multiagent/mappo_ippo.py +267 -0
sota-implementations/multiagent/qmix_vdn.py +271 -0
sota-implementations/multiagent/sac.py +337 -0
sota-implementations/multiagent/utils/__init__.py +4 -0
sota-implementations/multiagent/utils/logging.py +151 -0
sota-implementations/multiagent/utils/utils.py +43 -0
sota-implementations/ppo/README.md +29 -0
sota-implementations/ppo/ppo_atari.py +305 -0
sota-implementations/ppo/ppo_mujoco.py +293 -0
sota-implementations/ppo/utils_atari.py +238 -0
sota-implementations/ppo/utils_mujoco.py +152 -0
sota-implementations/ppo_trainer/train.py +21 -0
sota-implementations/redq/README.md +7 -0
sota-implementations/redq/redq.py +199 -0
sota-implementations/redq/utils.py +1060 -0
sota-implementations/sac/sac-async.py +266 -0
sota-implementations/sac/sac.py +239 -0
sota-implementations/sac/utils.py +381 -0
sota-implementations/sac_trainer/train.py +16 -0
sota-implementations/td3/td3.py +254 -0
sota-implementations/td3/utils.py +319 -0
sota-implementations/td3_bc/td3_bc.py +177 -0
sota-implementations/td3_bc/utils.py +251 -0
torchrl/.dylibs/libc++.1.0.dylib +0 -0
torchrl/__init__.py +144 -0
torchrl/_extension.py +74 -0
torchrl/_torchrl.cpython-314-darwin.so +0 -0
torchrl/_utils.py +1431 -0
torchrl/collectors/__init__.py +48 -0
torchrl/collectors/_base.py +1058 -0
torchrl/collectors/_constants.py +88 -0
torchrl/collectors/_multi_async.py +324 -0
torchrl/collectors/_multi_base.py +1805 -0
torchrl/collectors/_multi_sync.py +464 -0
torchrl/collectors/_runner.py +581 -0
torchrl/collectors/_single.py +2009 -0
torchrl/collectors/_single_async.py +259 -0
torchrl/collectors/collectors.py +62 -0
torchrl/collectors/distributed/__init__.py +32 -0
torchrl/collectors/distributed/default_configs.py +133 -0
torchrl/collectors/distributed/generic.py +1306 -0
torchrl/collectors/distributed/ray.py +1092 -0
torchrl/collectors/distributed/rpc.py +1006 -0
torchrl/collectors/distributed/sync.py +731 -0
torchrl/collectors/distributed/utils.py +160 -0
torchrl/collectors/llm/__init__.py +10 -0
torchrl/collectors/llm/base.py +494 -0
torchrl/collectors/llm/ray_collector.py +275 -0
torchrl/collectors/llm/utils.py +36 -0
torchrl/collectors/llm/weight_update/__init__.py +10 -0
torchrl/collectors/llm/weight_update/vllm.py +348 -0
torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
torchrl/collectors/utils.py +433 -0
torchrl/collectors/weight_update.py +591 -0
torchrl/csrc/numpy_utils.h +38 -0
torchrl/csrc/pybind.cpp +27 -0
torchrl/csrc/segment_tree.h +458 -0
torchrl/csrc/torch_utils.h +34 -0
torchrl/csrc/utils.cpp +48 -0
torchrl/csrc/utils.h +31 -0
torchrl/data/__init__.py +187 -0
torchrl/data/datasets/__init__.py +58 -0
torchrl/data/datasets/atari_dqn.py +878 -0
torchrl/data/datasets/common.py +281 -0
torchrl/data/datasets/d4rl.py +489 -0
torchrl/data/datasets/d4rl_infos.py +187 -0
torchrl/data/datasets/gen_dgrl.py +375 -0
torchrl/data/datasets/minari_data.py +643 -0
torchrl/data/datasets/openml.py +177 -0
torchrl/data/datasets/openx.py +798 -0
torchrl/data/datasets/roboset.py +363 -0
torchrl/data/datasets/utils.py +11 -0
torchrl/data/datasets/vd4rl.py +432 -0
torchrl/data/llm/__init__.py +34 -0
torchrl/data/llm/dataset.py +491 -0
torchrl/data/llm/history.py +1378 -0
torchrl/data/llm/prompt.py +198 -0
torchrl/data/llm/reward.py +225 -0
torchrl/data/llm/topk.py +186 -0
torchrl/data/llm/utils.py +543 -0
torchrl/data/map/__init__.py +21 -0
torchrl/data/map/hash.py +185 -0
torchrl/data/map/query.py +204 -0
torchrl/data/map/tdstorage.py +363 -0
torchrl/data/map/tree.py +1434 -0
torchrl/data/map/utils.py +103 -0
torchrl/data/postprocs/__init__.py +8 -0
torchrl/data/postprocs/postprocs.py +391 -0
torchrl/data/replay_buffers/__init__.py +99 -0
torchrl/data/replay_buffers/checkpointers.py +622 -0
torchrl/data/replay_buffers/ray_buffer.py +292 -0
torchrl/data/replay_buffers/replay_buffers.py +2376 -0
torchrl/data/replay_buffers/samplers.py +2578 -0
torchrl/data/replay_buffers/scheduler.py +265 -0
torchrl/data/replay_buffers/storages.py +2412 -0
torchrl/data/replay_buffers/utils.py +1042 -0
torchrl/data/replay_buffers/writers.py +781 -0
torchrl/data/tensor_specs.py +7101 -0
torchrl/data/utils.py +334 -0
torchrl/envs/__init__.py +265 -0
torchrl/envs/async_envs.py +1105 -0
torchrl/envs/batched_envs.py +3093 -0
torchrl/envs/common.py +4241 -0
torchrl/envs/custom/__init__.py +11 -0
torchrl/envs/custom/chess.py +617 -0
torchrl/envs/custom/llm.py +214 -0
torchrl/envs/custom/pendulum.py +401 -0
torchrl/envs/custom/san_moves.txt +29274 -0
torchrl/envs/custom/tictactoeenv.py +288 -0
torchrl/envs/env_creator.py +263 -0
torchrl/envs/gym_like.py +752 -0
torchrl/envs/libs/__init__.py +68 -0
torchrl/envs/libs/_gym_utils.py +326 -0
torchrl/envs/libs/brax.py +846 -0
torchrl/envs/libs/dm_control.py +544 -0
torchrl/envs/libs/envpool.py +447 -0
torchrl/envs/libs/gym.py +2239 -0
torchrl/envs/libs/habitat.py +138 -0
torchrl/envs/libs/isaac_lab.py +87 -0
torchrl/envs/libs/isaacgym.py +203 -0
torchrl/envs/libs/jax_utils.py +166 -0
torchrl/envs/libs/jumanji.py +963 -0
torchrl/envs/libs/meltingpot.py +599 -0
torchrl/envs/libs/openml.py +153 -0
torchrl/envs/libs/openspiel.py +652 -0
torchrl/envs/libs/pettingzoo.py +1042 -0
torchrl/envs/libs/procgen.py +351 -0
torchrl/envs/libs/robohive.py +429 -0
torchrl/envs/libs/smacv2.py +645 -0
torchrl/envs/libs/unity_mlagents.py +891 -0
torchrl/envs/libs/utils.py +147 -0
torchrl/envs/libs/vmas.py +813 -0
torchrl/envs/llm/__init__.py +63 -0
torchrl/envs/llm/chat.py +730 -0
torchrl/envs/llm/datasets/README.md +4 -0
torchrl/envs/llm/datasets/__init__.py +17 -0
torchrl/envs/llm/datasets/gsm8k.py +353 -0
torchrl/envs/llm/datasets/ifeval.py +274 -0
torchrl/envs/llm/envs.py +789 -0
torchrl/envs/llm/libs/README.md +3 -0
torchrl/envs/llm/libs/__init__.py +8 -0
torchrl/envs/llm/libs/mlgym.py +869 -0
torchrl/envs/llm/reward/__init__.py +10 -0
torchrl/envs/llm/reward/gsm8k.py +324 -0
torchrl/envs/llm/reward/ifeval/README.md +13 -0
torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
torchrl/envs/llm/transforms/__init__.py +55 -0
torchrl/envs/llm/transforms/browser.py +292 -0
torchrl/envs/llm/transforms/dataloading.py +859 -0
torchrl/envs/llm/transforms/format.py +73 -0
torchrl/envs/llm/transforms/kl.py +1544 -0
torchrl/envs/llm/transforms/policy_version.py +189 -0
torchrl/envs/llm/transforms/reason.py +323 -0
torchrl/envs/llm/transforms/tokenizer.py +321 -0
torchrl/envs/llm/transforms/tools.py +1955 -0
torchrl/envs/model_based/__init__.py +9 -0
torchrl/envs/model_based/common.py +180 -0
torchrl/envs/model_based/dreamer.py +112 -0
torchrl/envs/transforms/__init__.py +147 -0
torchrl/envs/transforms/functional.py +48 -0
torchrl/envs/transforms/gym_transforms.py +203 -0
torchrl/envs/transforms/module.py +341 -0
torchrl/envs/transforms/r3m.py +372 -0
torchrl/envs/transforms/ray_service.py +663 -0
torchrl/envs/transforms/rb_transforms.py +214 -0
torchrl/envs/transforms/transforms.py +11835 -0
torchrl/envs/transforms/utils.py +94 -0
torchrl/envs/transforms/vc1.py +307 -0
torchrl/envs/transforms/vecnorm.py +845 -0
torchrl/envs/transforms/vip.py +407 -0
torchrl/envs/utils.py +1718 -0
torchrl/envs/vec_envs.py +11 -0
torchrl/modules/__init__.py +206 -0
torchrl/modules/distributions/__init__.py +73 -0
torchrl/modules/distributions/continuous.py +830 -0
torchrl/modules/distributions/discrete.py +908 -0
torchrl/modules/distributions/truncated_normal.py +187 -0
torchrl/modules/distributions/utils.py +233 -0
torchrl/modules/llm/__init__.py +62 -0
torchrl/modules/llm/backends/__init__.py +65 -0
torchrl/modules/llm/backends/vllm/__init__.py +94 -0
torchrl/modules/llm/backends/vllm/_models.py +46 -0
torchrl/modules/llm/backends/vllm/base.py +72 -0
torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
torchrl/modules/llm/policies/__init__.py +28 -0
torchrl/modules/llm/policies/common.py +1809 -0
torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
torchrl/modules/llm/utils.py +23 -0
torchrl/modules/mcts/__init__.py +21 -0
torchrl/modules/mcts/scores.py +579 -0
torchrl/modules/models/__init__.py +86 -0
torchrl/modules/models/batchrenorm.py +119 -0
torchrl/modules/models/decision_transformer.py +179 -0
torchrl/modules/models/exploration.py +731 -0
torchrl/modules/models/llm.py +156 -0
torchrl/modules/models/model_based.py +596 -0
torchrl/modules/models/models.py +1712 -0
torchrl/modules/models/multiagent.py +1067 -0
torchrl/modules/models/recipes/impala.py +185 -0
torchrl/modules/models/utils.py +162 -0
torchrl/modules/planners/__init__.py +10 -0
torchrl/modules/planners/cem.py +228 -0
torchrl/modules/planners/common.py +73 -0
torchrl/modules/planners/mppi.py +265 -0
torchrl/modules/tensordict_module/__init__.py +89 -0
torchrl/modules/tensordict_module/actors.py +2457 -0
torchrl/modules/tensordict_module/common.py +529 -0
torchrl/modules/tensordict_module/exploration.py +814 -0
torchrl/modules/tensordict_module/probabilistic.py +321 -0
torchrl/modules/tensordict_module/rnn.py +1639 -0
torchrl/modules/tensordict_module/sequence.py +132 -0
torchrl/modules/tensordict_module/world_models.py +34 -0
torchrl/modules/utils/__init__.py +38 -0
torchrl/modules/utils/mappings.py +9 -0
torchrl/modules/utils/utils.py +89 -0
torchrl/objectives/__init__.py +78 -0
torchrl/objectives/a2c.py +659 -0
torchrl/objectives/common.py +753 -0
torchrl/objectives/cql.py +1346 -0
torchrl/objectives/crossq.py +710 -0
torchrl/objectives/ddpg.py +453 -0
torchrl/objectives/decision_transformer.py +371 -0
torchrl/objectives/deprecated.py +516 -0
torchrl/objectives/dqn.py +683 -0
torchrl/objectives/dreamer.py +488 -0
torchrl/objectives/functional.py +48 -0
torchrl/objectives/gail.py +258 -0
torchrl/objectives/iql.py +996 -0
torchrl/objectives/llm/__init__.py +30 -0
torchrl/objectives/llm/grpo.py +846 -0
torchrl/objectives/llm/sft.py +482 -0
torchrl/objectives/multiagent/__init__.py +8 -0
torchrl/objectives/multiagent/qmixer.py +396 -0
torchrl/objectives/ppo.py +1669 -0
torchrl/objectives/redq.py +683 -0
torchrl/objectives/reinforce.py +530 -0
torchrl/objectives/sac.py +1580 -0
torchrl/objectives/td3.py +570 -0
torchrl/objectives/td3_bc.py +625 -0
torchrl/objectives/utils.py +782 -0
torchrl/objectives/value/__init__.py +28 -0
torchrl/objectives/value/advantages.py +1956 -0
torchrl/objectives/value/functional.py +1459 -0
torchrl/objectives/value/utils.py +360 -0
torchrl/record/__init__.py +17 -0
torchrl/record/loggers/__init__.py +23 -0
torchrl/record/loggers/common.py +48 -0
torchrl/record/loggers/csv.py +226 -0
torchrl/record/loggers/mlflow.py +142 -0
torchrl/record/loggers/tensorboard.py +139 -0
torchrl/record/loggers/trackio.py +163 -0
torchrl/record/loggers/utils.py +78 -0
torchrl/record/loggers/wandb.py +214 -0
torchrl/record/recorder.py +554 -0
torchrl/services/__init__.py +79 -0
torchrl/services/base.py +109 -0
torchrl/services/ray_service.py +453 -0
torchrl/testing/__init__.py +107 -0
torchrl/testing/assertions.py +179 -0
torchrl/testing/dist_utils.py +122 -0
torchrl/testing/env_creators.py +227 -0
torchrl/testing/env_helper.py +35 -0
torchrl/testing/gym_helpers.py +156 -0
torchrl/testing/llm_mocks.py +119 -0
torchrl/testing/mocking_classes.py +2720 -0
torchrl/testing/modules.py +295 -0
torchrl/testing/mp_helpers.py +15 -0
torchrl/testing/ray_helpers.py +293 -0
torchrl/testing/utils.py +190 -0
torchrl/trainers/__init__.py +42 -0
torchrl/trainers/algorithms/__init__.py +11 -0
torchrl/trainers/algorithms/configs/__init__.py +705 -0
torchrl/trainers/algorithms/configs/collectors.py +216 -0
torchrl/trainers/algorithms/configs/common.py +41 -0
torchrl/trainers/algorithms/configs/data.py +308 -0
torchrl/trainers/algorithms/configs/envs.py +104 -0
torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
torchrl/trainers/algorithms/configs/logging.py +80 -0
torchrl/trainers/algorithms/configs/modules.py +570 -0
torchrl/trainers/algorithms/configs/objectives.py +177 -0
torchrl/trainers/algorithms/configs/trainers.py +340 -0
torchrl/trainers/algorithms/configs/transforms.py +955 -0
torchrl/trainers/algorithms/configs/utils.py +252 -0
torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
torchrl/trainers/algorithms/configs/weight_update.py +159 -0
torchrl/trainers/algorithms/ppo.py +373 -0
torchrl/trainers/algorithms/sac.py +308 -0
torchrl/trainers/helpers/__init__.py +40 -0
torchrl/trainers/helpers/collectors.py +416 -0
torchrl/trainers/helpers/envs.py +573 -0
torchrl/trainers/helpers/logger.py +33 -0
torchrl/trainers/helpers/losses.py +132 -0
torchrl/trainers/helpers/models.py +658 -0
torchrl/trainers/helpers/replay_buffer.py +59 -0
torchrl/trainers/helpers/trainers.py +301 -0
torchrl/trainers/trainers.py +2052 -0
torchrl/weight_update/__init__.py +33 -0
torchrl/weight_update/_distributed.py +749 -0
torchrl/weight_update/_mp.py +624 -0
torchrl/weight_update/_noupdate.py +102 -0
torchrl/weight_update/_ray.py +1032 -0
torchrl/weight_update/_rpc.py +284 -0
torchrl/weight_update/_shared.py +891 -0
torchrl/weight_update/llm/__init__.py +32 -0
torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
torchrl/weight_update/llm/vllm_nccl.py +710 -0
torchrl/weight_update/utils.py +73 -0
torchrl/weight_update/weight_sync_schemes.py +1244 -0
torchrl-0.11.0.dist-info/METADATA +1308 -0
torchrl-0.11.0.dist-info/RECORD +395 -0
torchrl-0.11.0.dist-info/WHEEL +5 -0
torchrl-0.11.0.dist-info/entry_points.txt +2 -0
torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
torchrl-0.11.0.dist-info/top_level.txt +7 -0

torchrl/envs/transforms/vecnorm.py ADDED Viewed

@@ -0,0 +1,845 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+import math
+import uuid
+import warnings
+from collections import OrderedDict
+from collections.abc import Sequence
+from copy import copy
+from typing import Any
+import torch
+from tensordict import NestedKey, TensorDict, TensorDictBase, unravel_key
+from tensordict.utils import _zip_strict
+from torch import multiprocessing as mp
+from torchrl.data.tensor_specs import Bounded, Composite, Unbounded
+from torchrl.envs.common import EnvBase
+from torchrl.envs.transforms.transforms import Compose, ObservationNorm, Transform
+from torchrl.envs.transforms.utils import _set_missing_tolerance
+class VecNormV2(Transform):
+    """A class for normalizing vectorized observations and rewards in reinforcement learning environments.
+    `VecNormV2` can operate in either a stateful or stateless mode. In stateful mode, it maintains
+    internal statistics (mean and variance) to normalize inputs. In stateless mode, it requires
+    external statistics to be provided for normalization.
+    .. note:: This class is designed to be an almost drop-in replacement for :class:`~torchrl.envs.transforms.VecNorm`.
+        It should not be constructed directly, but rather with the :class:`~torchrl.envs.transforms.VecNorm`
+        transform using the `new_api=True` keyword argument. In v0.10, the :class:`~torchrl.envs.transforms.VecNorm`
+        transform will be switched to the new api by default.
+    Stateful vs. Stateless:
+        Stateful Mode (`stateful=True`):
+            - Maintains internal statistics (`loc`, `var`, `count`) for normalization.
+            - Updates statistics with each call unless frozen.
+            - `state_dict` returns the current statistics.
+            - `load_state_dict` updates the internal statistics with the provided state.
+        Stateless Mode (`stateful=False`):
+            - Requires external statistics to be provided for normalization.
+            - Does not maintain or update internal statistics.
+            - `state_dict` returns an empty dictionary.
+            - `load_state_dict` does not affect internal state.
+    Args:
+        in_keys (Sequence[NestedKey]): The input keys for the data to be normalized.
+        out_keys (Sequence[NestedKey] | None): The output keys for the normalized data. Defaults to `in_keys` if
+            not provided.
+        lock (mp.Lock, optional): A lock for thread safety.
+        stateful (bool, optional): Whether the `VecNorm` is stateful. Stateless versions of this
+            transform requires the data to be carried within the input/output tensordicts.
+            Defaults to `True`.
+        decay (float, optional): The decay rate for updating statistics. Defaults to `0.9999`.
+            If `decay=1` is used, the normalizing statistics have an infinite memory (each item is weighed
+            identically). Lower values weigh recent data more than old ones.
+        eps (float, optional): A small value to prevent division by zero. Defaults to `1e-4`.
+        shared_data (TensorDictBase | None, optional): Shared data for initialization. Defaults to `None`.
+        reduce_batch_dims (bool, optional): If `True`, the batch dimensions are reduced by averaging the data
+            before updating the statistics. This is useful when samples are received in batches, as it allows
+            the moving average to be computed over the entire batch rather than individual elements. Note that
+            this option is only supported in stateful mode (`stateful=True`). Defaults to `False`.
+    Attributes:
+        stateful (bool): Indicates whether the VecNormV2 is stateful or stateless.
+        lock (mp.Lock): A multiprocessing lock to ensure thread safety when updating statistics.
+        decay (float): The decay rate for updating statistics.
+        eps (float): A small value to prevent division by zero during normalization.
+        frozen (bool): Indicates whether the VecNormV2 is frozen, preventing updates to statistics.
+        _cast_int_to_float (bool): Indicates whether integer inputs should be cast to float.
+    Methods:
+        freeze(): Freezes the VecNorm, preventing updates to statistics.
+        unfreeze(): Unfreezes the VecNorm, allowing updates to statistics.
+        frozen_copy(): Returns a frozen copy of the VecNorm.
+        clone(): Returns a clone of the VecNorm.
+        transform_observation_spec(observation_spec): Transforms the observation specification.
+        transform_reward_spec(reward_spec, observation_spec): Transforms the reward specification.
+        transform_output_spec(output_spec): Transforms the output specification.
+        to_observation_norm(): Converts the VecNorm to an ObservationNorm transform.
+        set_extra_state(state): Sets the extra state for the VecNorm.
+        get_extra_state(): Gets the extra state of the VecNorm.
+        loc: Returns the location (mean) for normalization.
+        scale: Returns the scale (standard deviation) for normalization.
+        standard_normal: Indicates whether the normalization follows the standard normal distribution.
+    State Dict Behavior:
+        - In stateful mode, `state_dict` returns a dictionary containing the current `loc`, `var`, and `count`.
+          These can be used to share the tensors across processes (this method is automatically triggered by
+          :class:`~torchrl.envs.VecNorm` to share the VecNorm states across processes).
+        - In stateless mode, `state_dict` returns an empty dictionary as no internal state is maintained.
+    Load State Dict Behavior:
+        - In stateful mode, `load_state_dict` updates the internal `loc`, `var`, and `count` with the provided state.
+        - In stateless mode, `load_state_dict` does not modify any internal state as there is none to update.
+    .. seealso:: :class:`~torchrl.envs.transforms.VecNorm` for the first version of this transform.
+    Examples:
+        >>> import torch
+        >>> from torchrl.envs import EnvCreator, GymEnv, ParallelEnv, SerialEnv, VecNormV2
+        >>>
+        >>> torch.manual_seed(0)
+        >>> env = GymEnv("Pendulum-v1")
+        >>> env_trsf = env.append_transform(
+        >>>     VecNormV2(in_keys=["observation", "reward"], out_keys=["observation_norm", "reward_norm"])
+        >>> )
+        >>> r = env_trsf.rollout(10)
+        >>> print("Unnormalized rewards", r["next", "reward"])
+        Unnormalized rewards tensor([[ -1.7967],
+                [ -2.1238],
+                [ -2.5911],
+                [ -3.5275],
+                [ -4.8585],
+                [ -6.5028],
+                [ -8.2505],
+                [-10.3169],
+                [-12.1332],
+                [-13.1235]])
+        >>> print("Normalized rewards", r["next", "reward_norm"])
+        Normalized rewards tensor([[-1.6596e-04],
+                [-8.3072e-02],
+                [-1.9170e-01],
+                [-3.9255e-01],
+                [-5.9131e-01],
+                [-7.4671e-01],
+                [-8.3760e-01],
+                [-9.2058e-01],
+                [-9.3484e-01],
+                [-8.6185e-01]])
+        >>> # Aggregate values when using batched envs
+        >>> env = SerialEnv(2, [lambda: GymEnv("Pendulum-v1")] * 2)
+        >>> env_trsf = env.append_transform(
+        >>>     VecNormV2(
+        >>>         in_keys=["observation", "reward"],
+        >>>         out_keys=["observation_norm", "reward_norm"],
+        >>>         # Use reduce_batch_dims=True to aggregate values across batch elements
+        >>>         reduce_batch_dims=True, )
+        >>> )
+        >>> r = env_trsf.rollout(10)
+        >>> print("Unnormalized rewards", r["next", "reward"])
+        Unnormalized rewards tensor([[[-0.1456],
+                 [-0.1862],
+                 [-0.2053],
+                 [-0.2605],
+                 [-0.4046],
+                 [-0.5185],
+                 [-0.8023],
+                 [-1.1364],
+                 [-1.6183],
+                 [-2.5406]],
+                [[-0.0920],
+                 [-0.1492],
+                 [-0.2702],
+                 [-0.3917],
+                 [-0.5001],
+                 [-0.7947],
+                 [-1.0160],
+                 [-1.3347],
+                 [-1.9082],
+                 [-2.9679]]])
+        >>> print("Normalized rewards", r["next", "reward_norm"])
+        Normalized rewards tensor([[[-0.2199],
+                 [-0.2918],
+                 [-0.1668],
+                 [-0.2083],
+                 [-0.4981],
+                 [-0.5046],
+                 [-0.7950],
+                 [-0.9791],
+                 [-1.1484],
+                 [-1.4182]],
+                [[ 0.2201],
+                 [-0.0403],
+                 [-0.5206],
+                 [-0.7791],
+                 [-0.8282],
+                 [-1.2306],
+                 [-1.2279],
+                 [-1.2907],
+                 [-1.4929],
+                 [-1.7793]]])
+        >>> print("Loc / scale", env_trsf.transform.loc["reward"], env_trsf.transform.scale["reward"])
+        Loc / scale tensor([-0.8626]) tensor([1.1832])
+        >>>
+        >>> # Share values between workers
+        >>> def make_env():
+        ...     env = GymEnv("Pendulum-v1")
+        ...     env_trsf = env.append_transform(
+        ...         VecNormV2(in_keys=["observation", "reward"], out_keys=["observation_norm", "reward_norm"])
+        ...     )
+        ...     return env_trsf
+        ...
+        ...
+        >>> if __name__ == "__main__":
+        ...     # EnvCreator will share the loc/scale vals
+        ...     make_env = EnvCreator(make_env)
+        ...     # Create a local env to track the loc/scale
+        ...     local_env = make_env()
+        ...     env = ParallelEnv(2, [make_env] * 2)
+        ...     r = env.rollout(10)
+        ...     # Non-zero loc and scale testify that the sub-envs share their summary stats with us
+        ...     print("Remotely updated loc / scale", local_env.transform.loc["reward"], local_env.transform.scale["reward"])
+        Remotely updated loc / scale tensor([-0.4307]) tensor([0.9613])
+        ...     env.close()
+    """
+    # TODO:
+    # - test 2 different vecnorms, one for reward one for obs and that they don't collide
+    # - test that collision is spotted
+    # - customize the vecnorm keys in stateless
+    def __init__(
+        self,
+        in_keys: Sequence[NestedKey],
+        out_keys: Sequence[NestedKey] | None = None,
+        *,
+        lock: mp.Lock = None,
+        stateful: bool = True,
+        decay: float = 0.9999,
+        eps: float = 1e-4,
+        shared_data: TensorDictBase | None = None,
+        reduce_batch_dims: bool = False,
+    ) -> None:
+        self.stateful = stateful
+        if lock is None:
+            lock = mp.Lock()
+        if out_keys is None:
+            out_keys = copy(in_keys)
+        super().__init__(in_keys=in_keys, out_keys=out_keys)
+        self.lock = lock
+        self.decay = decay
+        self.eps = eps
+        self.frozen = False
+        self._cast_int_to_float = False
+        if self.stateful:
+            self.register_buffer("initialized", torch.zeros((), dtype=torch.bool))
+            if shared_data:
+                self._loc = shared_data["loc"]
+                self._var = shared_data["var"]
+                self._count = shared_data["count"]
+            else:
+                self._loc = None
+                self._var = None
+                self._count = None
+        else:
+            self.initialized = False
+            if shared_data:
+                # FIXME
+                raise NotImplementedError
+        if reduce_batch_dims and not stateful:
+            raise RuntimeError(
+                "reduce_batch_dims=True and stateful=False are not supported."
+            )
+        self.reduce_batch_dims = reduce_batch_dims
+    @property
+    def in_keys(self) -> Sequence[NestedKey]:
+        in_keys = self._in_keys
+        if not self.stateful:
+            in_keys = in_keys + [
+                f"{self.prefix}_count",
+                f"{self.prefix}_loc",
+                f"{self.prefix}_var",
+            ]
+        return in_keys
+    @in_keys.setter
+    def in_keys(self, in_keys: Sequence[NestedKey]):
+        self._in_keys = in_keys
+    def set_container(self, container: Transform | EnvBase) -> None:
+        super().set_container(container)
+        if self.stateful:
+            parent = getattr(self, "parent", None)
+            if parent is not None and isinstance(parent, EnvBase):
+                if not parent.batch_locked:
+                    warnings.warn(
+                        f"Support of {type(self).__name__} for unbatched container is experimental and subject to change."
+                    )
+                if parent.batch_size:
+                    warnings.warn(
+                        f"Support of {type(self).__name__} for containers with non-empty batch-size is experimental and subject to change."
+                    )
+                # init
+                data = parent.fake_tensordict().get("next")
+                self._maybe_stateful_init(data)
+        else:
+            parent = getattr(self, "parent", None)
+            if parent is not None and isinstance(parent, EnvBase):
+                self._make_prefix(parent.output_spec)
+    def freeze(self) -> VecNormV2:
+        """Freezes the VecNorm, avoiding the stats to be updated when called.
+        See :meth:`~.unfreeze`.
+        """
+        self.frozen = True
+        return self
+    def unfreeze(self) -> VecNormV2:
+        """Unfreezes the VecNorm.
+        See :meth:`~.freeze`.
+        """
+        self.frozen = False
+        return self
+    def frozen_copy(self):
+        """Returns a copy of the Transform that keeps track of the stats but does not update them."""
+        if not self.stateful:
+            raise RuntimeError("Cannot create a frozen copy of a statelss VecNorm.")
+        if self._loc is None:
+            raise RuntimeError(
+                "Make sure the VecNorm has been initialized before creating a frozen copy."
+            )
+        clone = self.clone()
+        if self.stateful:
+            # replace values
+            clone._var = self._var.clone()
+            clone._loc = self._loc.clone()
+            clone._count = self._count.clone()
+        # freeze
+        return clone.freeze()
+    def clone(self) -> VecNormV2:
+        other = super().clone()
+        if self.stateful:
+            delattr(other, "initialized")
+            other.register_buffer("initialized", self.initialized.clone())
+            if self._loc is not None:
+                other.initialized.fill_(True)
+                other._loc = self._loc.clone()
+                other._var = self._var.clone()
+                other._count = self._count.clone()
+        return other
+    def _apply(self, fn, recurse=True):
+        """Apply device/dtype transformation to the module and its TensorDict state.
+        This method is called internally by PyTorch when using .to(), .cuda(), .cpu(), etc.
+        In stateful mode, we manually apply the transformation to _loc, _var, and _count
+        since they are TensorDict instances, not registered buffers.
+        """
+        super()._apply(fn, recurse=recurse)
+        if self.stateful and self._loc is not None:
+            self._loc = self._loc.apply(fn)
+            self._var = self._var.apply(fn)
+            # Move _count to same device as _loc, but preserve its int dtype.
+            # We extract the device from an actual leaf tensor because TensorDict.device
+            # can be stale after .apply(fn) moves the leaves.
+            iterator = iter(self._loc.values(True, True))
+            leaf_tensor = next(iterator)
+            while not isinstance(leaf_tensor, torch.Tensor):
+                leaf_tensor = next(iterator)
+            target_device = leaf_tensor.device
+            if isinstance(self._count, TensorDictBase):
+                self._count = self._count.to(device=target_device)
+            else:
+                self._count = self._count.to(device=target_device)
+        return self
+    def _reset(
+        self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase
+    ) -> TensorDictBase:
+        # TODO: remove this decorator when trackers are in data
+        with _set_missing_tolerance(self, True):
+            return self._step(tensordict_reset, tensordict_reset)
+        return tensordict_reset
+    def _step(
+        self, tensordict: TensorDictBase, next_tensordict: TensorDictBase
+    ) -> TensorDictBase:
+        if self.lock is not None:
+            self.lock.acquire()
+        try:
+            if self.stateful:
+                self._maybe_stateful_init(next_tensordict)
+                next_tensordict_select = next_tensordict.select(
+                    *self.in_keys, strict=not self.missing_tolerance
+                )
+                if self.missing_tolerance and next_tensordict_select.is_empty():
+                    return next_tensordict
+                self._stateful_update(next_tensordict_select)
+                next_tensordict_norm = self._stateful_norm(next_tensordict_select)
+            else:
+                self._maybe_stateless_init(tensordict)
+                next_tensordict_select = next_tensordict.select(
+                    *self._in_keys_safe, strict=not self.missing_tolerance
+                )
+                if self.missing_tolerance and next_tensordict_select.is_empty():
+                    return next_tensordict
+                loc = tensordict[f"{self.prefix}_loc"]
+                var = tensordict[f"{self.prefix}_var"]
+                count = tensordict[f"{self.prefix}_count"]
+                loc, var, count = self._stateless_update(
+                    next_tensordict_select, loc, var, count
+                )
+                next_tensordict_norm = self._stateless_norm(
+                    next_tensordict_select, loc, var, count
+                )
+                # updates have been done in-place, we're good
+                next_tensordict_norm.set(f"{self.prefix}_loc", loc)
+                next_tensordict_norm.set(f"{self.prefix}_var", var)
+                next_tensordict_norm.set(f"{self.prefix}_count", count)
+            next_tensordict.update(next_tensordict_norm)
+        finally:
+            if self.lock is not None:
+                self.lock.release()
+        return next_tensordict
+    def _maybe_cast_to_float(self, data):
+        if self._cast_int_to_float:
+            dtype = torch.get_default_dtype()
+            data = data.apply(
+                lambda x: x.to(dtype) if not x.dtype.is_floating_point else x
+            )
+        return data
+    @staticmethod
+    def _maybe_make_float(x):
+        if x.dtype.is_floating_point:
+            return x
+        return x.to(torch.get_default_dtype())
+    def _maybe_stateful_init(self, data):
+        if not self.initialized:
+            self.initialized.copy_(True)
+            #  Some keys (specifically rewards) may be missing, but we can use the
+            #  specs for them
+            try:
+                data_select = data.select(*self._in_keys_safe, strict=True)
+            except KeyError:
+                data_select = self.parent.full_observation_spec.zero().update(
+                    self.parent.full_reward_spec.zero()
+                )
+                data_select = data_select.update(data)
+                data_select = data_select.select(*self._in_keys_safe, strict=True)
+            if self.reduce_batch_dims and data_select.ndim:
+                # collapse the batch-dims
+                data_select = data_select.mean(dim=tuple(range(data.ndim)))
+            # For the count, we must use a TD because some keys (eg Reward) may be missing at some steps (eg, reset)
+            #  We use mean() to eliminate all dims - since it's local we don't need to expand the shape
+            count = (
+                torch.zeros_like(data_select, dtype=torch.float32)
+                .mean()
+                .to(torch.int64)
+            )
+            # create loc
+            loc = torch.zeros_like(data_select.apply(self._maybe_make_float))
+            # create var
+            var = torch.zeros_like(data_select.apply(self._maybe_make_float))
+            self._loc = loc
+            self._var = var
+            self._count = count
+    @property
+    def _in_keys_safe(self):
+        if not self.stateful:
+            return self.in_keys[:-3]
+        return self.in_keys
+    def _norm(self, data, loc, var, count):
+        if self.missing_tolerance:
+            loc = loc.select(*data.keys(True, True))
+            var = var.select(*data.keys(True, True))
+            count = count.select(*data.keys(True, True))
+            if loc.is_empty():
+                return data
+        if self.decay < 1.0:
+            bias_correction = 1 - (count * math.log(self.decay)).exp()
+            bias_correction = bias_correction.apply(lambda x, y: x.to(y.dtype), data)
+        else:
+            bias_correction = 1
+        var = var - loc.pow(2)
+        loc = loc / bias_correction
+        var = var / bias_correction
+        scale = var.sqrt().clamp_min(self.eps)
+        data_update = (data - loc) / scale
+        if self.out_keys[: len(self.in_keys)] != self.in_keys:
+            # map names
+            for in_key, out_key in _zip_strict(self._in_keys_safe, self.out_keys):
+                if in_key in data_update:
+                    data_update.rename_key_(in_key, out_key)
+        else:
+            pass
+        return data_update
+    def _stateful_norm(self, data):
+        return self._norm(data, self._loc, self._var, self._count)
+    def _stateful_update(self, data):
+        if self.frozen:
+            return
+        if self.missing_tolerance:
+            var = self._var.select(*data.keys(True, True))
+            loc = self._loc.select(*data.keys(True, True))
+            count = self._count.select(*data.keys(True, True))
+        else:
+            var = self._var
+            loc = self._loc
+            count = self._count
+        data = self._maybe_cast_to_float(data)
+        if self.reduce_batch_dims and data.ndim:
+            # The naive way to do this would be to convert the data to a list and iterate over it, but (1) that is
+            #  slow, and (2) it makes the value of the loc/var conditioned on the order we take to iterate over the data.
+            #  The second approach would be to average the data, but that would mean that having one vecnorm per batched
+            #  env or one per sub-env will lead to different results as a batch of N elements will actually be
+            #  considered as a single one.
+            #  What we go for instead is to average the data (and its squared value) then do the moving average with
+            #  adapted decay.
+            n = data.numel()
+            count += n
+            data2 = data.pow(2).mean(dim=tuple(range(data.ndim)))
+            data_mean = data.mean(dim=tuple(range(data.ndim)))
+            if self.decay != 1.0:
+                weight = 1 - self.decay**n
+            else:
+                weight = n / count
+        else:
+            count += 1
+            data2 = data.pow(2)
+            data_mean = data
+            if self.decay != 1.0:
+                weight = 1 - self.decay
+            else:
+                weight = 1 / count
+        loc.lerp_(end=data_mean, weight=weight)
+        var.lerp_(end=data2, weight=weight)
+    def _maybe_stateless_init(self, data):
+        if not self.initialized or f"{self.prefix}_loc" not in data.keys():
+            self.initialized = True
+            # select all except vecnorm
+            #  Some keys (specifically rewards) may be missing, but we can use the
+            #  specs for them
+            try:
+                data_select = data.select(*self._in_keys_safe, strict=True)
+            except KeyError:
+                data_select = self.parent.full_observation_spec.zero().update(
+                    self.parent.full_reward_spec.zero()
+                )
+                data_select = data_select.update(data)
+                data_select = data_select.select(*self._in_keys_safe, strict=True)
+            data[f"{self.prefix}_count"] = torch.zeros_like(
+                data_select, dtype=torch.int64
+            )
+            # create loc
+            loc = torch.zeros_like(data_select.apply(self._maybe_make_float))
+            # create var
+            var = torch.zeros_like(data_select.apply(self._maybe_make_float))
+            data[f"{self.prefix}_loc"] = loc
+            data[f"{self.prefix}_var"] = var
+    def _stateless_norm(self, data, loc, var, count):
+        data = self._norm(data, loc, var, count)
+        return data
+    def _stateless_update(self, data, loc, var, count):
+        if self.frozen:
+            return loc, var, count
+        count = count + 1
+        data = self._maybe_cast_to_float(data)
+        if self.decay != 1.0:
+            weight = 1 - self.decay
+        else:
+            weight = 1 / count
+        loc = loc.lerp(end=data, weight=weight)
+        var = var.lerp(end=data.pow(2), weight=weight)
+        return loc, var, count
+    def transform_observation_spec(self, observation_spec: Composite) -> Composite:
+        return self._transform_spec(observation_spec)
+    def transform_reward_spec(
+        self, reward_spec: Composite, observation_spec
+    ) -> Composite:
+        return self._transform_spec(reward_spec, observation_spec)
+    def transform_output_spec(self, output_spec: Composite) -> Composite:
+        # This is a copy-paste of the parent methd to ensure that we correct the reward spec properly
+        output_spec = output_spec.clone()
+        observation_spec = self.transform_observation_spec(
+            output_spec["full_observation_spec"]
+        )
+        if "full_reward_spec" in output_spec.keys():
+            output_spec["full_reward_spec"] = self.transform_reward_spec(
+                output_spec["full_reward_spec"], observation_spec
+            )
+        output_spec["full_observation_spec"] = observation_spec
+        if "full_done_spec" in output_spec.keys():
+            output_spec["full_done_spec"] = self.transform_done_spec(
+                output_spec["full_done_spec"]
+            )
+        output_spec_keys = [
+            unravel_key(k[1:]) for k in output_spec.keys(True) if isinstance(k, tuple)
+        ]
+        out_keys = {unravel_key(k) for k in self.out_keys}
+        in_keys = {unravel_key(k) for k in self.in_keys}
+        for key in out_keys - in_keys:
+            if unravel_key(key) not in output_spec_keys:
+                warnings.warn(
+                    f"The key '{key}' is unaccounted for by the transform (expected keys {output_spec_keys}). "
+                    f"Every new entry in the tensordict resulting from a call to a transform must be "
+                    f"registered in the specs for torchrl rollouts to be consistently built. "
+                    f"Make sure transform_output_spec/transform_observation_spec/... is coded correctly. "
+                    "This warning will trigger a KeyError in v0.9, make sure to adapt your code accordingly.",
+                    category=FutureWarning,
+                )
+        return output_spec
+    def _maybe_convert_bounded(self, in_spec):
+        if isinstance(in_spec, Composite):
+            return Composite(
+                {
+                    key: self._maybe_convert_bounded(value)
+                    for key, value in in_spec.items()
+                }
+            )
+        dtype = in_spec.dtype
+        if dtype is not None and not dtype.is_floating_point:
+            # we need to cast the tensor and spec to a float type
+            in_spec = in_spec.clone()
+            in_spec.dtype = torch.get_default_dtype()
+            self._cast_int_to_float = True
+        if isinstance(in_spec, Bounded):
+            in_spec = Unbounded(
+                shape=in_spec.shape, device=in_spec.device, dtype=in_spec.dtype
+            )
+        return in_spec
+    @property
+    def prefix(self):
+        prefix = getattr(self, "_prefix", "_vecnorm")
+        return prefix
+    def _make_prefix(self, output_spec):
+        prefix = getattr(self, "_prefix", None)
+        if prefix is not None:
+            return prefix
+        if (
+            "_vecnorm_loc" in output_spec["full_observation_spec"].keys()
+            or "_vecnorm_loc" in output_spec["full_reward_spec"].keys()
+        ):
+            prefix = "_vecnorm" + str(uuid.uuid1())
+        else:
+            prefix = "_vecnorm"
+        self._prefix = prefix
+        return prefix
+    def _proc_count_spec(self, count_spec, parent_shape=None):
+        if isinstance(count_spec, Composite):
+            for key, spec in count_spec.items():
+                spec = self._proc_count_spec(spec, parent_shape=count_spec.shape)
+                count_spec[key] = spec
+            return count_spec
+        if count_spec.dtype:
+            count_spec = Unbounded(
+                shape=count_spec.shape, dtype=torch.int64, device=count_spec.device
+            )
+        return count_spec
+    def _transform_spec(
+        self, spec: Composite, obs_spec: Composite | None = None
+    ) -> Composite:
+        in_specs = {}
+        for in_key, out_key in zip(self._in_keys_safe, self.out_keys):
+            if unravel_key(in_key) in spec.keys(True):
+                in_spec = spec.get(in_key).clone()
+                in_spec = self._maybe_convert_bounded(in_spec)
+                spec.set(out_key, in_spec)
+                in_specs[in_key] = in_spec
+        if not self.stateful and in_specs:
+            if obs_spec is None:
+                obs_spec = spec
+            loc_spec = obs_spec.get(f"{self.prefix}_loc", default=None)
+            var_spec = obs_spec.get(f"{self.prefix}_var", default=None)
+            count_spec = obs_spec.get(f"{self.prefix}_count", default=None)
+            if loc_spec is None:
+                loc_spec = Composite(shape=obs_spec.shape, device=obs_spec.device)
+                var_spec = Composite(shape=obs_spec.shape, device=obs_spec.device)
+                count_spec = Composite(shape=obs_spec.shape, device=obs_spec.device)
+            loc_spec.update(in_specs)
+            # should we clone?
+            var_spec.update(in_specs)
+            count_spec = count_spec.update(in_specs)
+            count_spec = self._proc_count_spec(count_spec)
+            obs_spec[f"{self.prefix}_loc"] = loc_spec
+            obs_spec[f"{self.prefix}_var"] = var_spec
+            obs_spec[f"{self.prefix}_count"] = count_spec
+        return spec
+    def to_observation_norm(self) -> Compose | ObservationNorm:
+        if not self.stateful:
+            # FIXME
+            raise NotImplementedError()
+        result = []
+        loc, scale = self._get_loc_scale()
+        for key, key_out in _zip_strict(self.in_keys, self.out_keys):
+            local_result = ObservationNorm(
+                loc=loc.get(key),
+                scale=scale.get(key),
+                standard_normal=True,
+                in_keys=key,
+                out_keys=key_out,
+                eps=self.eps,
+            )
+            result += [local_result]
+        if len(self.in_keys) > 1:
+            return Compose(*result)
+        return local_result
+    def _get_loc_scale(self, loc_only: bool = False) -> tuple:
+        if self.stateful:
+            loc = self._loc
+            count = self._count
+            if self.decay != 1.0:
+                bias_correction = 1 - (count * math.log(self.decay)).exp()
+                bias_correction = bias_correction.apply(lambda x, y: x.to(y.dtype), loc)
+            else:
+                bias_correction = 1
+            if loc_only:
+                return loc / bias_correction, None
+            var = self._var
+            var = var - loc.pow(2)
+            loc = loc / bias_correction
+            var = var / bias_correction
+            scale = var.sqrt().clamp_min(self.eps)
+            return loc, scale
+        else:
+            raise RuntimeError("_get_loc_scale() called on stateless vecnorm.")
+    def __getstate__(self) -> dict[str, Any]:
+        state = super().__getstate__()
+        _lock = state.pop("lock", None)
+        if _lock is not None:
+            state["lock_placeholder"] = None
+        return state
+    def __setstate__(self, state: dict[str, Any]):
+        if "lock_placeholder" in state:
+            state.pop("lock_placeholder")
+            _lock = mp.Lock()
+            state["lock"] = _lock
+        super().__setstate__(state)
+    SEP = ".-|-."
+    def set_extra_state(self, state: OrderedDict) -> None:
+        if not self.stateful:
+            return
+        if not state:
+            if self._loc is None:
+                # we're good, not init yet
+                return
+            raise RuntimeError(
+                "set_extra_state() called with a void state-dict while the instance is initialized."
+            )
+        td = TensorDict(state).unflatten_keys(self.SEP)
+        if self._loc is None and not all(v.is_shared() for v in td.values(True, True)):
+            warnings.warn(
+                "VecNorm wasn't initialized and the tensordict is not shared. In single "
+                "process settings, this is ok, but if you need to share the statistics "
+                "between workers this should require some attention. "
+                "Make sure that the content of VecNorm is transmitted to the workers "
+                "after calling load_state_dict and not before, as other workers "
+                "may not have access to the loaded TensorDict."
+            )
+            td.share_memory_()
+        self._loc = td["loc"]
+        self._var = td["var"]
+        self._count = td["count"]
+    def get_extra_state(self) -> OrderedDict:
+        if not self.stateful:
+            return {}
+        if self._loc is None:
+            warnings.warn(
+                "Querying state_dict on an uninitialized VecNorm transform will "
+                "return a `None` value for the summary statistics. "
+                "Loading such a state_dict on an initialized VecNorm will result in "
+                "an error."
+            )
+            return {}
+        td = TensorDict(
+            loc=self._loc,
+            var=self._var,
+            count=self._count,
+        )
+        return td.flatten_keys(self.SEP).to_dict()
+    @property
+    def loc(self):
+        """Returns a TensorDict with the loc to be used for an affine transform."""
+        if not self.stateful:
+            raise RuntimeError("loc cannot be computed with stateless vecnorm.")
+        # We can't cache that value bc the summary stats could be updated by a different process
+        loc, _ = self._get_loc_scale(loc_only=True)
+        return loc
+    @property
+    def scale(self):
+        """Returns a TensorDict with the scale to be used for an affine transform."""
+        if not self.stateful:
+            raise RuntimeError("scale cannot be computed with stateless vecnorm.")
+        # We can't cache that value bc the summary stats could be updated by a different process
+        _, scale = self._get_loc_scale()
+        return scale
+    @property
+    def standard_normal(self):
+        """Whether the affine transform given by `loc` and `scale` follows the standard normal equation.
+        Similar to :class:`~torchrl.envs.ObservationNorm` standard_normal attribute.
+        Always returns ``True``.
+        """
+        return True