torchrl 0.11.0__cp314-cp314-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (394) hide show
  1. benchmarks/benchmark_batched_envs.py +104 -0
  2. benchmarks/conftest.py +91 -0
  3. benchmarks/ecosystem/gym_env_throughput.py +321 -0
  4. benchmarks/ecosystem/vmas_rllib_vs_torchrl_sampling_performance.py +231 -0
  5. benchmarks/requirements.txt +7 -0
  6. benchmarks/storage/benchmark_sample_latency_over_rpc.py +193 -0
  7. benchmarks/test_collectors_benchmark.py +240 -0
  8. benchmarks/test_compressed_storage_benchmark.py +145 -0
  9. benchmarks/test_envs_benchmark.py +133 -0
  10. benchmarks/test_llm.py +101 -0
  11. benchmarks/test_non_tensor_env_benchmark.py +70 -0
  12. benchmarks/test_objectives_benchmarks.py +1199 -0
  13. benchmarks/test_replaybuffer_benchmark.py +254 -0
  14. sota-check/README.md +35 -0
  15. sota-implementations/README.md +142 -0
  16. sota-implementations/a2c/README.md +39 -0
  17. sota-implementations/a2c/a2c_atari.py +291 -0
  18. sota-implementations/a2c/a2c_mujoco.py +273 -0
  19. sota-implementations/a2c/utils_atari.py +240 -0
  20. sota-implementations/a2c/utils_mujoco.py +160 -0
  21. sota-implementations/bandits/README.md +7 -0
  22. sota-implementations/bandits/dqn.py +126 -0
  23. sota-implementations/cql/cql_offline.py +198 -0
  24. sota-implementations/cql/cql_online.py +249 -0
  25. sota-implementations/cql/discrete_cql_offline.py +180 -0
  26. sota-implementations/cql/discrete_cql_online.py +227 -0
  27. sota-implementations/cql/utils.py +471 -0
  28. sota-implementations/crossq/crossq.py +271 -0
  29. sota-implementations/crossq/utils.py +320 -0
  30. sota-implementations/ddpg/ddpg.py +231 -0
  31. sota-implementations/ddpg/utils.py +325 -0
  32. sota-implementations/decision_transformer/dt.py +163 -0
  33. sota-implementations/decision_transformer/lamb.py +167 -0
  34. sota-implementations/decision_transformer/online_dt.py +178 -0
  35. sota-implementations/decision_transformer/utils.py +562 -0
  36. sota-implementations/discrete_sac/discrete_sac.py +243 -0
  37. sota-implementations/discrete_sac/utils.py +324 -0
  38. sota-implementations/dqn/README.md +30 -0
  39. sota-implementations/dqn/dqn_atari.py +272 -0
  40. sota-implementations/dqn/dqn_cartpole.py +236 -0
  41. sota-implementations/dqn/utils_atari.py +132 -0
  42. sota-implementations/dqn/utils_cartpole.py +90 -0
  43. sota-implementations/dreamer/README.md +129 -0
  44. sota-implementations/dreamer/dreamer.py +586 -0
  45. sota-implementations/dreamer/dreamer_utils.py +1107 -0
  46. sota-implementations/expert-iteration/README.md +352 -0
  47. sota-implementations/expert-iteration/ei_utils.py +770 -0
  48. sota-implementations/expert-iteration/expert-iteration-async.py +512 -0
  49. sota-implementations/expert-iteration/expert-iteration-sync.py +508 -0
  50. sota-implementations/expert-iteration/requirements_gsm8k.txt +13 -0
  51. sota-implementations/expert-iteration/requirements_ifeval.txt +16 -0
  52. sota-implementations/gail/gail.py +327 -0
  53. sota-implementations/gail/gail_utils.py +68 -0
  54. sota-implementations/gail/ppo_utils.py +157 -0
  55. sota-implementations/grpo/README.md +273 -0
  56. sota-implementations/grpo/grpo-async.py +437 -0
  57. sota-implementations/grpo/grpo-sync.py +435 -0
  58. sota-implementations/grpo/grpo_utils.py +843 -0
  59. sota-implementations/grpo/requirements_gsm8k.txt +11 -0
  60. sota-implementations/grpo/requirements_ifeval.txt +16 -0
  61. sota-implementations/impala/README.md +33 -0
  62. sota-implementations/impala/impala_multi_node_ray.py +292 -0
  63. sota-implementations/impala/impala_multi_node_submitit.py +284 -0
  64. sota-implementations/impala/impala_single_node.py +261 -0
  65. sota-implementations/impala/utils.py +184 -0
  66. sota-implementations/iql/discrete_iql.py +230 -0
  67. sota-implementations/iql/iql_offline.py +164 -0
  68. sota-implementations/iql/iql_online.py +225 -0
  69. sota-implementations/iql/utils.py +437 -0
  70. sota-implementations/multiagent/README.md +74 -0
  71. sota-implementations/multiagent/iql.py +237 -0
  72. sota-implementations/multiagent/maddpg_iddpg.py +266 -0
  73. sota-implementations/multiagent/mappo_ippo.py +267 -0
  74. sota-implementations/multiagent/qmix_vdn.py +271 -0
  75. sota-implementations/multiagent/sac.py +337 -0
  76. sota-implementations/multiagent/utils/__init__.py +4 -0
  77. sota-implementations/multiagent/utils/logging.py +151 -0
  78. sota-implementations/multiagent/utils/utils.py +43 -0
  79. sota-implementations/ppo/README.md +29 -0
  80. sota-implementations/ppo/ppo_atari.py +305 -0
  81. sota-implementations/ppo/ppo_mujoco.py +293 -0
  82. sota-implementations/ppo/utils_atari.py +238 -0
  83. sota-implementations/ppo/utils_mujoco.py +152 -0
  84. sota-implementations/ppo_trainer/train.py +21 -0
  85. sota-implementations/redq/README.md +7 -0
  86. sota-implementations/redq/redq.py +199 -0
  87. sota-implementations/redq/utils.py +1060 -0
  88. sota-implementations/sac/sac-async.py +266 -0
  89. sota-implementations/sac/sac.py +239 -0
  90. sota-implementations/sac/utils.py +381 -0
  91. sota-implementations/sac_trainer/train.py +16 -0
  92. sota-implementations/td3/td3.py +254 -0
  93. sota-implementations/td3/utils.py +319 -0
  94. sota-implementations/td3_bc/td3_bc.py +177 -0
  95. sota-implementations/td3_bc/utils.py +251 -0
  96. torchrl/__init__.py +144 -0
  97. torchrl/_extension.py +74 -0
  98. torchrl/_torchrl.cpython-314-aarch64-linux-gnu.so +0 -0
  99. torchrl/_utils.py +1431 -0
  100. torchrl/collectors/__init__.py +48 -0
  101. torchrl/collectors/_base.py +1058 -0
  102. torchrl/collectors/_constants.py +88 -0
  103. torchrl/collectors/_multi_async.py +324 -0
  104. torchrl/collectors/_multi_base.py +1805 -0
  105. torchrl/collectors/_multi_sync.py +464 -0
  106. torchrl/collectors/_runner.py +581 -0
  107. torchrl/collectors/_single.py +2009 -0
  108. torchrl/collectors/_single_async.py +259 -0
  109. torchrl/collectors/collectors.py +62 -0
  110. torchrl/collectors/distributed/__init__.py +32 -0
  111. torchrl/collectors/distributed/default_configs.py +133 -0
  112. torchrl/collectors/distributed/generic.py +1306 -0
  113. torchrl/collectors/distributed/ray.py +1092 -0
  114. torchrl/collectors/distributed/rpc.py +1006 -0
  115. torchrl/collectors/distributed/sync.py +731 -0
  116. torchrl/collectors/distributed/utils.py +160 -0
  117. torchrl/collectors/llm/__init__.py +10 -0
  118. torchrl/collectors/llm/base.py +494 -0
  119. torchrl/collectors/llm/ray_collector.py +275 -0
  120. torchrl/collectors/llm/utils.py +36 -0
  121. torchrl/collectors/llm/weight_update/__init__.py +10 -0
  122. torchrl/collectors/llm/weight_update/vllm.py +348 -0
  123. torchrl/collectors/llm/weight_update/vllm_v2.py +311 -0
  124. torchrl/collectors/utils.py +433 -0
  125. torchrl/collectors/weight_update.py +591 -0
  126. torchrl/csrc/numpy_utils.h +38 -0
  127. torchrl/csrc/pybind.cpp +27 -0
  128. torchrl/csrc/segment_tree.h +458 -0
  129. torchrl/csrc/torch_utils.h +34 -0
  130. torchrl/csrc/utils.cpp +48 -0
  131. torchrl/csrc/utils.h +31 -0
  132. torchrl/data/__init__.py +187 -0
  133. torchrl/data/datasets/__init__.py +58 -0
  134. torchrl/data/datasets/atari_dqn.py +878 -0
  135. torchrl/data/datasets/common.py +281 -0
  136. torchrl/data/datasets/d4rl.py +489 -0
  137. torchrl/data/datasets/d4rl_infos.py +187 -0
  138. torchrl/data/datasets/gen_dgrl.py +375 -0
  139. torchrl/data/datasets/minari_data.py +643 -0
  140. torchrl/data/datasets/openml.py +177 -0
  141. torchrl/data/datasets/openx.py +798 -0
  142. torchrl/data/datasets/roboset.py +363 -0
  143. torchrl/data/datasets/utils.py +11 -0
  144. torchrl/data/datasets/vd4rl.py +432 -0
  145. torchrl/data/llm/__init__.py +34 -0
  146. torchrl/data/llm/dataset.py +491 -0
  147. torchrl/data/llm/history.py +1378 -0
  148. torchrl/data/llm/prompt.py +198 -0
  149. torchrl/data/llm/reward.py +225 -0
  150. torchrl/data/llm/topk.py +186 -0
  151. torchrl/data/llm/utils.py +543 -0
  152. torchrl/data/map/__init__.py +21 -0
  153. torchrl/data/map/hash.py +185 -0
  154. torchrl/data/map/query.py +204 -0
  155. torchrl/data/map/tdstorage.py +363 -0
  156. torchrl/data/map/tree.py +1434 -0
  157. torchrl/data/map/utils.py +103 -0
  158. torchrl/data/postprocs/__init__.py +8 -0
  159. torchrl/data/postprocs/postprocs.py +391 -0
  160. torchrl/data/replay_buffers/__init__.py +99 -0
  161. torchrl/data/replay_buffers/checkpointers.py +622 -0
  162. torchrl/data/replay_buffers/ray_buffer.py +292 -0
  163. torchrl/data/replay_buffers/replay_buffers.py +2376 -0
  164. torchrl/data/replay_buffers/samplers.py +2578 -0
  165. torchrl/data/replay_buffers/scheduler.py +265 -0
  166. torchrl/data/replay_buffers/storages.py +2412 -0
  167. torchrl/data/replay_buffers/utils.py +1042 -0
  168. torchrl/data/replay_buffers/writers.py +781 -0
  169. torchrl/data/tensor_specs.py +7101 -0
  170. torchrl/data/utils.py +334 -0
  171. torchrl/envs/__init__.py +265 -0
  172. torchrl/envs/async_envs.py +1105 -0
  173. torchrl/envs/batched_envs.py +3093 -0
  174. torchrl/envs/common.py +4241 -0
  175. torchrl/envs/custom/__init__.py +11 -0
  176. torchrl/envs/custom/chess.py +617 -0
  177. torchrl/envs/custom/llm.py +214 -0
  178. torchrl/envs/custom/pendulum.py +401 -0
  179. torchrl/envs/custom/san_moves.txt +29274 -0
  180. torchrl/envs/custom/tictactoeenv.py +288 -0
  181. torchrl/envs/env_creator.py +263 -0
  182. torchrl/envs/gym_like.py +752 -0
  183. torchrl/envs/libs/__init__.py +68 -0
  184. torchrl/envs/libs/_gym_utils.py +326 -0
  185. torchrl/envs/libs/brax.py +846 -0
  186. torchrl/envs/libs/dm_control.py +544 -0
  187. torchrl/envs/libs/envpool.py +447 -0
  188. torchrl/envs/libs/gym.py +2239 -0
  189. torchrl/envs/libs/habitat.py +138 -0
  190. torchrl/envs/libs/isaac_lab.py +87 -0
  191. torchrl/envs/libs/isaacgym.py +203 -0
  192. torchrl/envs/libs/jax_utils.py +166 -0
  193. torchrl/envs/libs/jumanji.py +963 -0
  194. torchrl/envs/libs/meltingpot.py +599 -0
  195. torchrl/envs/libs/openml.py +153 -0
  196. torchrl/envs/libs/openspiel.py +652 -0
  197. torchrl/envs/libs/pettingzoo.py +1042 -0
  198. torchrl/envs/libs/procgen.py +351 -0
  199. torchrl/envs/libs/robohive.py +429 -0
  200. torchrl/envs/libs/smacv2.py +645 -0
  201. torchrl/envs/libs/unity_mlagents.py +891 -0
  202. torchrl/envs/libs/utils.py +147 -0
  203. torchrl/envs/libs/vmas.py +813 -0
  204. torchrl/envs/llm/__init__.py +63 -0
  205. torchrl/envs/llm/chat.py +730 -0
  206. torchrl/envs/llm/datasets/README.md +4 -0
  207. torchrl/envs/llm/datasets/__init__.py +17 -0
  208. torchrl/envs/llm/datasets/gsm8k.py +353 -0
  209. torchrl/envs/llm/datasets/ifeval.py +274 -0
  210. torchrl/envs/llm/envs.py +789 -0
  211. torchrl/envs/llm/libs/README.md +3 -0
  212. torchrl/envs/llm/libs/__init__.py +8 -0
  213. torchrl/envs/llm/libs/mlgym.py +869 -0
  214. torchrl/envs/llm/reward/__init__.py +10 -0
  215. torchrl/envs/llm/reward/gsm8k.py +324 -0
  216. torchrl/envs/llm/reward/ifeval/README.md +13 -0
  217. torchrl/envs/llm/reward/ifeval/__init__.py +10 -0
  218. torchrl/envs/llm/reward/ifeval/_instructions.py +1667 -0
  219. torchrl/envs/llm/reward/ifeval/_instructions_main.py +131 -0
  220. torchrl/envs/llm/reward/ifeval/_instructions_registry.py +100 -0
  221. torchrl/envs/llm/reward/ifeval/_instructions_util.py +1677 -0
  222. torchrl/envs/llm/reward/ifeval/_scorer.py +454 -0
  223. torchrl/envs/llm/transforms/__init__.py +55 -0
  224. torchrl/envs/llm/transforms/browser.py +292 -0
  225. torchrl/envs/llm/transforms/dataloading.py +859 -0
  226. torchrl/envs/llm/transforms/format.py +73 -0
  227. torchrl/envs/llm/transforms/kl.py +1544 -0
  228. torchrl/envs/llm/transforms/policy_version.py +189 -0
  229. torchrl/envs/llm/transforms/reason.py +323 -0
  230. torchrl/envs/llm/transforms/tokenizer.py +321 -0
  231. torchrl/envs/llm/transforms/tools.py +1955 -0
  232. torchrl/envs/model_based/__init__.py +9 -0
  233. torchrl/envs/model_based/common.py +180 -0
  234. torchrl/envs/model_based/dreamer.py +112 -0
  235. torchrl/envs/transforms/__init__.py +147 -0
  236. torchrl/envs/transforms/functional.py +48 -0
  237. torchrl/envs/transforms/gym_transforms.py +203 -0
  238. torchrl/envs/transforms/module.py +341 -0
  239. torchrl/envs/transforms/r3m.py +372 -0
  240. torchrl/envs/transforms/ray_service.py +663 -0
  241. torchrl/envs/transforms/rb_transforms.py +214 -0
  242. torchrl/envs/transforms/transforms.py +11835 -0
  243. torchrl/envs/transforms/utils.py +94 -0
  244. torchrl/envs/transforms/vc1.py +307 -0
  245. torchrl/envs/transforms/vecnorm.py +845 -0
  246. torchrl/envs/transforms/vip.py +407 -0
  247. torchrl/envs/utils.py +1718 -0
  248. torchrl/envs/vec_envs.py +11 -0
  249. torchrl/modules/__init__.py +206 -0
  250. torchrl/modules/distributions/__init__.py +73 -0
  251. torchrl/modules/distributions/continuous.py +830 -0
  252. torchrl/modules/distributions/discrete.py +908 -0
  253. torchrl/modules/distributions/truncated_normal.py +187 -0
  254. torchrl/modules/distributions/utils.py +233 -0
  255. torchrl/modules/llm/__init__.py +62 -0
  256. torchrl/modules/llm/backends/__init__.py +65 -0
  257. torchrl/modules/llm/backends/vllm/__init__.py +94 -0
  258. torchrl/modules/llm/backends/vllm/_models.py +46 -0
  259. torchrl/modules/llm/backends/vllm/base.py +72 -0
  260. torchrl/modules/llm/backends/vllm/vllm_async.py +2075 -0
  261. torchrl/modules/llm/backends/vllm/vllm_plugin.py +22 -0
  262. torchrl/modules/llm/backends/vllm/vllm_sync.py +446 -0
  263. torchrl/modules/llm/backends/vllm/vllm_utils.py +129 -0
  264. torchrl/modules/llm/policies/__init__.py +28 -0
  265. torchrl/modules/llm/policies/common.py +1809 -0
  266. torchrl/modules/llm/policies/transformers_wrapper.py +2756 -0
  267. torchrl/modules/llm/policies/vllm_wrapper.py +2241 -0
  268. torchrl/modules/llm/utils.py +23 -0
  269. torchrl/modules/mcts/__init__.py +21 -0
  270. torchrl/modules/mcts/scores.py +579 -0
  271. torchrl/modules/models/__init__.py +86 -0
  272. torchrl/modules/models/batchrenorm.py +119 -0
  273. torchrl/modules/models/decision_transformer.py +179 -0
  274. torchrl/modules/models/exploration.py +731 -0
  275. torchrl/modules/models/llm.py +156 -0
  276. torchrl/modules/models/model_based.py +596 -0
  277. torchrl/modules/models/models.py +1712 -0
  278. torchrl/modules/models/multiagent.py +1067 -0
  279. torchrl/modules/models/recipes/impala.py +185 -0
  280. torchrl/modules/models/utils.py +162 -0
  281. torchrl/modules/planners/__init__.py +10 -0
  282. torchrl/modules/planners/cem.py +228 -0
  283. torchrl/modules/planners/common.py +73 -0
  284. torchrl/modules/planners/mppi.py +265 -0
  285. torchrl/modules/tensordict_module/__init__.py +89 -0
  286. torchrl/modules/tensordict_module/actors.py +2457 -0
  287. torchrl/modules/tensordict_module/common.py +529 -0
  288. torchrl/modules/tensordict_module/exploration.py +814 -0
  289. torchrl/modules/tensordict_module/probabilistic.py +321 -0
  290. torchrl/modules/tensordict_module/rnn.py +1639 -0
  291. torchrl/modules/tensordict_module/sequence.py +132 -0
  292. torchrl/modules/tensordict_module/world_models.py +34 -0
  293. torchrl/modules/utils/__init__.py +38 -0
  294. torchrl/modules/utils/mappings.py +9 -0
  295. torchrl/modules/utils/utils.py +89 -0
  296. torchrl/objectives/__init__.py +78 -0
  297. torchrl/objectives/a2c.py +659 -0
  298. torchrl/objectives/common.py +753 -0
  299. torchrl/objectives/cql.py +1346 -0
  300. torchrl/objectives/crossq.py +710 -0
  301. torchrl/objectives/ddpg.py +453 -0
  302. torchrl/objectives/decision_transformer.py +371 -0
  303. torchrl/objectives/deprecated.py +516 -0
  304. torchrl/objectives/dqn.py +683 -0
  305. torchrl/objectives/dreamer.py +488 -0
  306. torchrl/objectives/functional.py +48 -0
  307. torchrl/objectives/gail.py +258 -0
  308. torchrl/objectives/iql.py +996 -0
  309. torchrl/objectives/llm/__init__.py +30 -0
  310. torchrl/objectives/llm/grpo.py +846 -0
  311. torchrl/objectives/llm/sft.py +482 -0
  312. torchrl/objectives/multiagent/__init__.py +8 -0
  313. torchrl/objectives/multiagent/qmixer.py +396 -0
  314. torchrl/objectives/ppo.py +1669 -0
  315. torchrl/objectives/redq.py +683 -0
  316. torchrl/objectives/reinforce.py +530 -0
  317. torchrl/objectives/sac.py +1580 -0
  318. torchrl/objectives/td3.py +570 -0
  319. torchrl/objectives/td3_bc.py +625 -0
  320. torchrl/objectives/utils.py +782 -0
  321. torchrl/objectives/value/__init__.py +28 -0
  322. torchrl/objectives/value/advantages.py +1956 -0
  323. torchrl/objectives/value/functional.py +1459 -0
  324. torchrl/objectives/value/utils.py +360 -0
  325. torchrl/record/__init__.py +17 -0
  326. torchrl/record/loggers/__init__.py +23 -0
  327. torchrl/record/loggers/common.py +48 -0
  328. torchrl/record/loggers/csv.py +226 -0
  329. torchrl/record/loggers/mlflow.py +142 -0
  330. torchrl/record/loggers/tensorboard.py +139 -0
  331. torchrl/record/loggers/trackio.py +163 -0
  332. torchrl/record/loggers/utils.py +78 -0
  333. torchrl/record/loggers/wandb.py +214 -0
  334. torchrl/record/recorder.py +554 -0
  335. torchrl/services/__init__.py +79 -0
  336. torchrl/services/base.py +109 -0
  337. torchrl/services/ray_service.py +453 -0
  338. torchrl/testing/__init__.py +107 -0
  339. torchrl/testing/assertions.py +179 -0
  340. torchrl/testing/dist_utils.py +122 -0
  341. torchrl/testing/env_creators.py +227 -0
  342. torchrl/testing/env_helper.py +35 -0
  343. torchrl/testing/gym_helpers.py +156 -0
  344. torchrl/testing/llm_mocks.py +119 -0
  345. torchrl/testing/mocking_classes.py +2720 -0
  346. torchrl/testing/modules.py +295 -0
  347. torchrl/testing/mp_helpers.py +15 -0
  348. torchrl/testing/ray_helpers.py +293 -0
  349. torchrl/testing/utils.py +190 -0
  350. torchrl/trainers/__init__.py +42 -0
  351. torchrl/trainers/algorithms/__init__.py +11 -0
  352. torchrl/trainers/algorithms/configs/__init__.py +705 -0
  353. torchrl/trainers/algorithms/configs/collectors.py +216 -0
  354. torchrl/trainers/algorithms/configs/common.py +41 -0
  355. torchrl/trainers/algorithms/configs/data.py +308 -0
  356. torchrl/trainers/algorithms/configs/envs.py +104 -0
  357. torchrl/trainers/algorithms/configs/envs_libs.py +361 -0
  358. torchrl/trainers/algorithms/configs/logging.py +80 -0
  359. torchrl/trainers/algorithms/configs/modules.py +570 -0
  360. torchrl/trainers/algorithms/configs/objectives.py +177 -0
  361. torchrl/trainers/algorithms/configs/trainers.py +340 -0
  362. torchrl/trainers/algorithms/configs/transforms.py +955 -0
  363. torchrl/trainers/algorithms/configs/utils.py +252 -0
  364. torchrl/trainers/algorithms/configs/weight_sync_schemes.py +191 -0
  365. torchrl/trainers/algorithms/configs/weight_update.py +159 -0
  366. torchrl/trainers/algorithms/ppo.py +373 -0
  367. torchrl/trainers/algorithms/sac.py +308 -0
  368. torchrl/trainers/helpers/__init__.py +40 -0
  369. torchrl/trainers/helpers/collectors.py +416 -0
  370. torchrl/trainers/helpers/envs.py +573 -0
  371. torchrl/trainers/helpers/logger.py +33 -0
  372. torchrl/trainers/helpers/losses.py +132 -0
  373. torchrl/trainers/helpers/models.py +658 -0
  374. torchrl/trainers/helpers/replay_buffer.py +59 -0
  375. torchrl/trainers/helpers/trainers.py +301 -0
  376. torchrl/trainers/trainers.py +2052 -0
  377. torchrl/weight_update/__init__.py +33 -0
  378. torchrl/weight_update/_distributed.py +749 -0
  379. torchrl/weight_update/_mp.py +624 -0
  380. torchrl/weight_update/_noupdate.py +102 -0
  381. torchrl/weight_update/_ray.py +1032 -0
  382. torchrl/weight_update/_rpc.py +284 -0
  383. torchrl/weight_update/_shared.py +891 -0
  384. torchrl/weight_update/llm/__init__.py +32 -0
  385. torchrl/weight_update/llm/vllm_double_buffer.py +370 -0
  386. torchrl/weight_update/llm/vllm_nccl.py +710 -0
  387. torchrl/weight_update/utils.py +73 -0
  388. torchrl/weight_update/weight_sync_schemes.py +1244 -0
  389. torchrl-0.11.0.dist-info/METADATA +1308 -0
  390. torchrl-0.11.0.dist-info/RECORD +394 -0
  391. torchrl-0.11.0.dist-info/WHEEL +5 -0
  392. torchrl-0.11.0.dist-info/entry_points.txt +2 -0
  393. torchrl-0.11.0.dist-info/licenses/LICENSE +21 -0
  394. torchrl-0.11.0.dist-info/top_level.txt +7 -0
@@ -0,0 +1,1434 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+ from __future__ import annotations
6
+
7
+ import weakref
8
+ from collections import deque
9
+ from collections.abc import Callable
10
+ from typing import Any, Literal
11
+
12
+ import torch
13
+ from tensordict import (
14
+ merge_tensordicts,
15
+ NestedKey,
16
+ TensorClass,
17
+ TensorDict,
18
+ TensorDictBase,
19
+ unravel_key,
20
+ )
21
+
22
+ from torchrl.data.map.tdstorage import TensorDictMap
23
+ from torchrl.data.map.utils import _plot_plotly_box, _plot_plotly_tree
24
+ from torchrl.data.replay_buffers.storages import ListStorage
25
+ from torchrl.data.tensor_specs import Composite
26
+ from torchrl.envs.common import EnvBase
27
+
28
+
29
+ class Tree(TensorClass["nocast"]):
30
+ """Representation of a single MCTS (Monte Carlo Tree Search) Tree.
31
+
32
+ This class encapsulates the data and behavior of a tree node in an MCTS algorithm.
33
+ It includes attributes for storing information about the node, such as its children,
34
+ visit count, and rollout data. Methods are provided for traversing the tree,
35
+ computing statistics, and visualizing the tree structure.
36
+
37
+ It is somewhat indistinguishable from a node or a vertex - we use the term "Tree" when talking about
38
+ a node with children, "node" or "vertex" when talking about a place in the tree where a branching occurs.
39
+ A node in the tree is defined primarily by its ``hash`` value. Usually, a ``hash`` is determined by a unique
40
+ combination of state (or observation) and action. If one observation (found in the ``node`` attribute) has more than
41
+ one action associated, each branch will be stored in the ``subtree`` attribute as a stack of ``Tree`` instances.
42
+
43
+ Attributes:
44
+ count (int): The number of visits to this node.
45
+ index (torch.Tensor): Indices of the child nodes in the data map.
46
+ hash (torch.Tensor): A hash value for this node.
47
+ It may be the case that ``hash`` is ``None`` in the specific case where the root of the tree
48
+ has more than one action associated. In that case, each subtree branch will have a different action
49
+ associated and a hash correspoding to the ``(observation, action)`` pair.
50
+ node_id (int): A unique identifier for this node.
51
+ rollout (TensorDict): Rollout data following the observation encoded in this node, in a TED format.
52
+ If there are multiple actions taken at this node, subtrees are stored in the corresponding
53
+ entry. Rollouts can be reconstructed using the :meth:`rollout_from_path` method.
54
+ node (TensorDict): Data defining this node (e.g., observations) before the next branching.
55
+ Entries usually matches the ``in_keys`` in ``MCTSForest.node_map``.
56
+ subtree (Tree): A stack of subtrees produced when actions are taken.
57
+ num_children (int): The number of child nodes (read-only).
58
+ is_terminal (bool): whether the tree has children nodes (read-only).
59
+ If the tree is compact, ``is_terminal == True`` means that there are more than one child node in
60
+ ``self.subtree``.
61
+
62
+ Methods:
63
+ __contains__: Whether another tree can be found in the tree.
64
+ vertices: Returns a dictionary containing all vertices in the tree. Keys must be paths, ids or hashes.
65
+ num_vertices: Returns the total number of vertices in the tree, with or without duplicates.
66
+ edges: Returns a list of edges in the tree.
67
+ valid_paths: Yields all valid paths in the tree.
68
+ max_length: Returns the maximum length of any path in the tree.
69
+ rollout_from_path: Reconstructs a rollout from a given path.
70
+ plot: Visualizes the tree using a specified backend and figure type.
71
+ get_node_by_id: returns the vertex given by its id in the tree.
72
+ get_node_by_hash: returns the vertex given by its hash in the forest.
73
+
74
+ """
75
+
76
+ count: int | torch.Tensor = None
77
+ wins: int | torch.Tensor = None
78
+
79
+ index: torch.Tensor | None = None
80
+ # The hash is None if the node has more than one action associated
81
+ hash: int | None = None
82
+ node_id: int | None = None
83
+
84
+ # rollout following the observation encoded in node, in a TorchRL (TED) format
85
+ rollout: TensorDict | None = None
86
+
87
+ # The data specifying the node (typically an observation or a set of observations)
88
+ node_data: TensorDict | None = None
89
+
90
+ # Stack of subtrees. A subtree is produced when an action is taken.
91
+ subtree: Tree = None
92
+
93
+ # weakrefs to the parent(s) of the node
94
+ _parent: weakref.ref | list[weakref.ref] | None = None
95
+
96
+ # Specs: contains information such as action or observation keys and spaces.
97
+ # If present, they should be structured like env specs are:
98
+ # Composite(input_spec=Composite(full_state_spec=..., full_action_spec=...),
99
+ # output_spec=Composite(full_observation_spec=..., full_reward_spec=..., full_done_spec=...))
100
+ # where every leaf component is optional.
101
+ specs: Composite | None = None
102
+
103
+ @classmethod
104
+ def make_node(
105
+ cls,
106
+ data: TensorDictBase,
107
+ *,
108
+ device: torch.device | None = None,
109
+ batch_size: torch.Size | None = None,
110
+ specs: Composite | None = None,
111
+ ) -> Tree:
112
+ """Creates a new node given some data."""
113
+ if "next" in data.keys():
114
+ rollout = data
115
+ if not rollout.ndim:
116
+ rollout = rollout.unsqueeze(0)
117
+ subtree = TensorDict.lazy_stack([cls.make_node(data["next"][..., -1])])
118
+ else:
119
+ rollout = None
120
+ subtree = None
121
+ if device is None:
122
+ device = data.device
123
+ return cls(
124
+ count=torch.zeros(()),
125
+ wins=torch.zeros(()),
126
+ node_data=data.exclude("action", "next"),
127
+ rollout=rollout,
128
+ subtree=subtree,
129
+ device=device,
130
+ batch_size=batch_size,
131
+ )
132
+
133
+ # Specs
134
+ @property
135
+ def full_observation_spec(self):
136
+ """The observation spec of the tree.
137
+
138
+ This is an alias for `Tree.specs['output_spec', 'full_observation_spec']`.
139
+ """
140
+ return self.specs["output_spec", "full_observation_spec"]
141
+
142
+ @property
143
+ def full_reward_spec(self):
144
+ """The reward spec of the tree.
145
+
146
+ This is an alias for `Tree.specs['output_spec', 'full_reward_spec']`.
147
+ """
148
+ return self.specs["output_spec", "full_reward_spec"]
149
+
150
+ @property
151
+ def full_done_spec(self):
152
+ """The done spec of the tree.
153
+
154
+ This is an alias for `Tree.specs['output_spec', 'full_done_spec']`.
155
+ """
156
+ return self.specs["output_spec", "full_done_spec"]
157
+
158
+ @property
159
+ def full_state_spec(self):
160
+ """The state spec of the tree.
161
+
162
+ This is an alias for `Tree.specs['input_spec', 'full_state_spec']`.
163
+ """
164
+ return self.specs["input_spec", "full_state_spec"]
165
+
166
+ @property
167
+ def full_action_spec(self):
168
+ """The action spec of the tree.
169
+
170
+ This is an alias for `Tree.specs['input_spec', 'full_action_spec']`.
171
+ """
172
+ return self.specs["input_spec", "full_action_spec"]
173
+
174
+ @property
175
+ def selected_actions(self) -> torch.Tensor | TensorDictBase | None:
176
+ """Returns a tensor containing all the selected actions branching out from this node."""
177
+ if self.subtree is None:
178
+ return None
179
+ return self.subtree.rollout[..., 0]["action"]
180
+
181
+ @property
182
+ def prev_action(self) -> torch.Tensor | TensorDictBase | None:
183
+ """The action undertaken just before this node's observation was generated.
184
+
185
+ Returns:
186
+ a tensor, tensordict or None if the node has no parent.
187
+
188
+ .. seealso:: This will be equal to :class:`~torchrl.data.Tree.branching_action` whenever the rollout data contains a single step.
189
+
190
+ .. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
191
+
192
+ """
193
+ if self.rollout is None:
194
+ return None
195
+ return self.rollout[..., -1]["action"]
196
+
197
+ @property
198
+ def branching_action(self) -> torch.Tensor | TensorDictBase | None:
199
+ """Returns the action that branched out to this particular node.
200
+
201
+ Returns:
202
+ a tensor, tensordict or None if the node has no parent.
203
+
204
+ .. seealso:: This will be equal to :class:`~torchrl.data.Tree.prev_action` whenever the rollout data contains a single step.
205
+
206
+ .. seealso:: :class:`All actions associated with a given node (or observation) in the tree <~torchrl.data.Tree.selected_action>`.
207
+
208
+ """
209
+ if self.rollout is None:
210
+ return None
211
+ return self.rollout[..., 0]["action"]
212
+
213
+ @property
214
+ def node_observation(self) -> torch.Tensor | TensorDictBase:
215
+ """Returns the observation associated with this particular node.
216
+
217
+ This is the observation (or bag of observations) that defines the node before a branching occurs.
218
+ If the node contains a :meth:`rollout` attribute, the node observation is typically identical to the
219
+ observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
220
+
221
+ If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
222
+ is returned instead.
223
+
224
+ For a more consistent representation, see :attr:`~.node_observations`.
225
+
226
+ """
227
+ # TODO: implement specs
228
+ return self.node_data["observation"]
229
+
230
+ @property
231
+ def node_observations(self) -> torch.Tensor | TensorDictBase:
232
+ """Returns the observations associated with this particular node in a TensorDict format.
233
+
234
+ This is the observation (or bag of observations) that defines the node before a branching occurs.
235
+ If the node contains a :meth:`rollout` attribute, the node observation is typically identical to the
236
+ observation resulting from the last action undertaken, i.e., ``node.rollout[..., -1]["next", "observation"]``.
237
+
238
+ If more than one observation key is associated with the tree specs, a :class:`~tensordict.TensorDict` instance
239
+ is returned instead.
240
+
241
+ For a more consistent representation, see :attr:`~.node_observations`.
242
+
243
+ """
244
+ # TODO: implement specs
245
+ return self.node_data.select("observation")
246
+
247
+ @property
248
+ def visits(self) -> int | torch.Tensor:
249
+ """Returns the number of visits associated with this particular node.
250
+
251
+ This is an alias for the :attr:`~.count` attribute.
252
+
253
+ """
254
+ return self.count
255
+
256
+ @visits.setter
257
+ def visits(self, count):
258
+ self.count = count
259
+
260
+ def __setattr__(self, name: str, value: Any) -> None:
261
+ if name == "subtree" and value is not None:
262
+ wr = weakref.ref(self._tensordict)
263
+ if value._parent is None:
264
+ value._parent = wr
265
+ elif isinstance(value._parent, list):
266
+ value._parent.append(wr)
267
+ else:
268
+ value._parent = [value._parent, wr]
269
+ return super().__setattr__(name, value)
270
+
271
+ @property
272
+ def parent(self) -> Tree | None:
273
+ """The parent of the node.
274
+
275
+ If the node has a parent and this object is still present in the python workspace, it will be returned by this
276
+ property.
277
+
278
+ For re-branching trees, this property may return a stack of trees where every index of the stack corresponds to
279
+ a different parent.
280
+
281
+ .. note:: the ``parent`` attribute will match in content but not in identity: the tensorclass object is recustructed
282
+ using the same tensors (i.e., tensors that point to the same memory locations).
283
+
284
+ Returns:
285
+ A ``Tree`` containing the parent data or ``None`` if the parent data is out of scope or the node is the root.
286
+ """
287
+ parent = self._parent
288
+ if parent is not None:
289
+ # Check that all parents match
290
+ queue = [parent]
291
+
292
+ def maybe_flatten_list(maybe_nested_list):
293
+ if isinstance(maybe_nested_list, list):
294
+ for p in maybe_nested_list:
295
+ if isinstance(p, list):
296
+ queue.append(p)
297
+ else:
298
+ yield p()
299
+ else:
300
+ yield maybe_nested_list()
301
+
302
+ parent_result = None
303
+ while len(queue):
304
+ local_result = None
305
+ for r in maybe_flatten_list(queue.pop()):
306
+ if local_result is None:
307
+ local_result = r
308
+ elif r is not None and r is not local_result:
309
+ if isinstance(local_result, list):
310
+ local_result.append(r)
311
+ else:
312
+ local_result = [local_result, r]
313
+ if local_result is None:
314
+ continue
315
+ # replicate logic at macro level
316
+ if parent_result is None:
317
+ parent_result = local_result
318
+ else:
319
+ if isinstance(local_result, list):
320
+ local_result = [
321
+ r for r in local_result if r not in parent_result
322
+ ]
323
+ else:
324
+ local_result = [local_result]
325
+ if isinstance(parent_result, list):
326
+ parent_result.extend(local_result)
327
+ else:
328
+ parent_result = [parent_result, *local_result]
329
+ if isinstance(parent_result, list):
330
+ return TensorDict.lazy_stack(
331
+ [self._from_tensordict(r) for r in parent_result]
332
+ )
333
+ return self._from_tensordict(parent_result)
334
+
335
+ @property
336
+ def num_children(self) -> int:
337
+ """Number of children of this node.
338
+
339
+ Equates to the number of elements in the ``self.subtree`` stack.
340
+ """
341
+ return len(self.subtree) if self.subtree is not None else 0
342
+
343
+ @property
344
+ def is_terminal(self) -> bool | torch.Tensor:
345
+ """Returns True if the tree has no children nodes."""
346
+ if self.rollout is not None:
347
+ return self.rollout[..., -1]["next", "done"].squeeze(-1)
348
+ # If there is no rollout, there is no preceding data - either this is a root or it's a floating node.
349
+ # In either case, we assume that the node is not terminal.
350
+ return False
351
+
352
+ def fully_expanded(self, env: EnvBase) -> bool:
353
+ """Returns True if the number of children is equal to the environment cardinality."""
354
+ cardinality = env.cardinality(self.node_data)
355
+ num_actions = self.num_children
356
+ return cardinality == num_actions
357
+
358
+ def get_vertex_by_id(self, id: int) -> Tree:
359
+ """Goes through the tree and returns the node corresponding the given id."""
360
+ q = deque()
361
+ q.append(self)
362
+ while len(q):
363
+ tree = q.popleft()
364
+ if tree.node_id == id:
365
+ return tree
366
+ if tree.subtree is not None:
367
+ q.extend(tree.subtree.unbind(0))
368
+ raise ValueError(f"Node with id {id} not found.")
369
+
370
+ def get_vertex_by_hash(self, hash: int) -> Tree:
371
+ """Goes through the tree and returns the node corresponding the given hash."""
372
+ q = deque()
373
+ q.append(self)
374
+ while len(q):
375
+ tree = q.popleft()
376
+ if tree.hash == hash:
377
+ return tree
378
+ if tree.subtree is not None:
379
+ q.extend(tree.subtree.unbind(0))
380
+ raise ValueError(f"Node with hash {hash} not found.")
381
+
382
+ def __contains__(self, other: Tree) -> bool:
383
+ hash = other.hash
384
+ for vertex in self.vertices().values():
385
+ if vertex.hash == hash:
386
+ return True
387
+ else:
388
+ return False
389
+
390
+ def vertices(
391
+ self, *, key_type: Literal["id", "hash", "path"] = "hash"
392
+ ) -> dict[int | tuple[int], Tree]:
393
+ """Returns a map containing the vertices of the Tree.
394
+
395
+ Keyword args:
396
+ key_type (Literal["id", "hash", "path"], optional): Specifies the type of key to use for the vertices.
397
+
398
+ - "id": Use the vertex ID as the key.
399
+ - "hash": Use a hash of the vertex as the key.
400
+ - "path": Use the path to the vertex as the key. This may lead to a dictionary with a longer length than
401
+ when ``"id"`` or ``"hash"`` are used as the same node may be part of multiple trajectories.
402
+ Defaults to ``"hash"``.
403
+
404
+ Defaults to an empty string, which may imply a default behavior.
405
+
406
+ Returns:
407
+ Dict[int | Tuple[int], Tree]: A dictionary mapping keys to Tree vertices.
408
+
409
+ """
410
+ memo = set()
411
+ result = {}
412
+ q = deque()
413
+ cur_path = ()
414
+ q.append((self, cur_path))
415
+ use_hash = key_type == "hash"
416
+ use_id = key_type == "id"
417
+ use_path = key_type == "path"
418
+ while len(q):
419
+ tree, cur_path = q.popleft()
420
+ h = tree.hash
421
+ if h in memo and not use_path:
422
+ continue
423
+ memo.add(h)
424
+ if use_path:
425
+ result[cur_path] = tree
426
+ elif use_id:
427
+ result[tree.node_id] = tree
428
+ elif use_hash:
429
+ result[tree.node_id] = tree
430
+ else:
431
+ raise ValueError(
432
+ f"key_type must be either 'hash', 'id' or 'path'. Got {key_type}."
433
+ )
434
+
435
+ n = int(tree.num_children)
436
+ for i in range(n):
437
+ cur_path_tree = cur_path + (i,)
438
+ q.append((tree.subtree[i], cur_path_tree))
439
+ return result
440
+
441
+ def num_vertices(self, *, count_repeat: bool = False) -> int:
442
+ """Returns the number of unique vertices in the Tree.
443
+
444
+ Keyword Args:
445
+ count_repeat (bool, optional): Determines whether to count repeated
446
+ vertices.
447
+
448
+ - If ``False``, counts each unique vertex only once.
449
+
450
+ - If ``True``, counts vertices multiple times if they appear in different paths.
451
+ Defaults to ``False``.
452
+
453
+ Returns:
454
+ int: The number of unique vertices in the Tree.
455
+
456
+ """
457
+ return len(
458
+ {
459
+ v.node_id
460
+ for v in self.vertices(
461
+ key_type="hash" if not count_repeat else "path"
462
+ ).values()
463
+ }
464
+ )
465
+
466
+ def edges(self) -> list[tuple[int, int]]:
467
+ """Retrieves a list of edges in the tree.
468
+
469
+ Each edge is represented as a tuple of two node IDs: the parent node ID and the child node ID.
470
+ The tree is traversed using Breadth-First Search (BFS) to ensure all edges are visited.
471
+
472
+ Returns:
473
+ A list of tuples, where each tuple contains a parent node ID and a child node ID.
474
+ """
475
+ result = []
476
+ q = deque()
477
+ parent = self.node_id
478
+ q.append((self, parent))
479
+ while len(q):
480
+ tree, parent = q.popleft()
481
+ n = int(tree.num_children)
482
+ for i in range(n):
483
+ node = tree.subtree[i]
484
+ node_id = node.node_id
485
+ result.append((parent, node_id))
486
+ q.append((node, node_id))
487
+ return result
488
+
489
+ def valid_paths(self):
490
+ """Generates all valid paths in the tree.
491
+
492
+ A valid path is a sequence of child indices that starts at the root node and ends at a leaf node.
493
+ Each path is represented as a tuple of integers, where each integer corresponds to the index of a child node.
494
+
495
+ Yields:
496
+ tuple: A valid path in the tree.
497
+ """
498
+ # Initialize a queue with the current tree node and an empty path
499
+ q = deque()
500
+ cur_path = ()
501
+ q.append((self, cur_path))
502
+ # Perform BFS traversal of the tree
503
+ while len(q):
504
+ # Dequeue the next tree node and its current path
505
+ tree, cur_path = q.popleft()
506
+ # Get the number of child nodes
507
+ n = int(tree.num_children)
508
+ # If this is a leaf node, yield the current path
509
+ if not n:
510
+ yield cur_path
511
+ # Iterate over the child nodes
512
+ for i in range(n):
513
+ cur_path_tree = cur_path + (i,)
514
+ q.append((tree.subtree[i], cur_path_tree))
515
+
516
+ def max_length(self):
517
+ """Returns the maximum length of all valid paths in the tree.
518
+
519
+ The length of a path is defined as the number of nodes in the path.
520
+ If the tree is empty, returns 0.
521
+
522
+ Returns:
523
+ int: The maximum length of all valid paths in the tree.
524
+
525
+ """
526
+ lengths = tuple(len(path) for path in self.valid_paths())
527
+ if len(lengths) == 0:
528
+ return 0
529
+ elif len(lengths) == 1:
530
+ return lengths[0]
531
+ return max(*lengths)
532
+
533
+ def rollout_from_path(self, path: tuple[int]) -> TensorDictBase | None:
534
+ """Retrieves the rollout data along a given path in the tree.
535
+
536
+ The rollout data is concatenated along the last dimension (dim=-1) for each node in the path.
537
+ If no rollout data is found along the path, returns ``None``.
538
+
539
+ Args:
540
+ path: A tuple of integers representing the path in the tree.
541
+
542
+ Returns:
543
+ The concatenated rollout data along the path, or None if no data is found.
544
+
545
+ """
546
+ r = self.rollout
547
+ tree = self
548
+ rollouts = []
549
+ if r is not None:
550
+ rollouts.append(r)
551
+ for i in path:
552
+ tree = tree.subtree[i]
553
+ r = tree.rollout
554
+ if r is not None:
555
+ rollouts.append(r)
556
+ if rollouts:
557
+ return torch.cat(rollouts, dim=-1)
558
+
559
+ @staticmethod
560
+ def _label(info: list[str], tree: Tree, root=False):
561
+ labels = []
562
+ for key in info:
563
+ if key == "hash":
564
+ hash = tree.hash
565
+ if hash is not None:
566
+ hash = hash.item()
567
+ v = f"hash={hash}"
568
+ elif root:
569
+ v = f"{key}=None"
570
+ else:
571
+ v = f"{key}={tree.rollout[key].mean().item()}"
572
+
573
+ labels.append(v)
574
+ return ", ".join(labels)
575
+
576
+ def plot(
577
+ self: Tree,
578
+ backend: str = "plotly",
579
+ figure: str = "tree",
580
+ info: list[str] = None,
581
+ make_labels: Callable[[Any, ...], Any] | None = None,
582
+ ):
583
+ """Plots a visualization of the tree using the specified backend and figure type.
584
+
585
+ Args:
586
+ backend: The plotting backend to use. Currently only supports 'plotly'.
587
+ figure: The type of figure to plot. Can be either 'tree' or 'box'.
588
+ info: A list of additional information to include in the plot (not currently used).
589
+ make_labels: An optional function to generate custom labels for the plot.
590
+
591
+ Raises:
592
+ NotImplementedError: If an unsupported backend or figure type is specified.
593
+ """
594
+ if backend == "plotly":
595
+ if figure == "box":
596
+ _plot_plotly_box(self)
597
+ return
598
+ elif figure == "tree":
599
+ _plot_plotly_tree(self, make_labels=make_labels)
600
+ return
601
+ else:
602
+ pass
603
+ raise NotImplementedError(
604
+ f"Unknown plotting backend {backend} with figure {figure}."
605
+ )
606
+
607
+ def to_string(self, node_format_fn=lambda tree: tree.node_data.to_dict()):
608
+ """Generates a string representation of the tree.
609
+
610
+ This function can pull out information from each of the nodes in a tree,
611
+ so it can be useful for debugging. The nodes are listed line-by-line.
612
+ Each line contains the path to the node, followed by the string
613
+ representation of that node generated with ``node_format_fn``. Each
614
+ line is indented according to number of steps in the path required to
615
+ get to the corresponding node.
616
+
617
+ Args:
618
+ node_format_fn (Callable, optional): User-defined function to
619
+ generate a string for each node of the tree. The signature must
620
+ be ``(Tree) -> Any``, and the output must be convertible to a
621
+ string. If this argument is not given, the generated string is
622
+ the node's :attr:`Tree.node_data` attribute converted to a dict.
623
+
624
+ Examples:
625
+ >>> from torchrl.data import MCTSForest
626
+ >>> from tensordict import TensorDict
627
+ >>> forest = MCTSForest()
628
+ >>> td_root = TensorDict({"observation": 0,})
629
+ >>> rollouts_data = [
630
+ ... # [(action, obs), ...]
631
+ ... [(3, 123), (1, 456)],
632
+ ... [(2, 359), (2, 3094)],
633
+ ... [(3, 123), (9, 392), (6, 989), (20, 809), (21, 847)],
634
+ ... [(1, 75)],
635
+ ... [(3, 123), (0, 948)],
636
+ ... [(2, 359), (2, 3094), (10, 68)],
637
+ ... [(2, 359), (2, 3094), (11, 9045)],
638
+ ... ]
639
+ >>> for rollout_data in rollouts_data:
640
+ ... td = td_root.clone().unsqueeze(0)
641
+ ... for action, obs in rollout_data:
642
+ ... td = td.update(TensorDict({
643
+ ... "action": [action],
644
+ ... "next": TensorDict({"observation": [obs]}, [1]),
645
+ ... }, [1]))
646
+ ... forest.extend(td)
647
+ ... td = td["next"].clone()
648
+ ...
649
+ >>> tree = forest.get_tree(td_root)
650
+ >>> print(tree.to_string())
651
+ (0,) {'observation': tensor(123)}
652
+ (0, 0) {'observation': tensor(456)}
653
+ (0, 1) {'observation': tensor(847)}
654
+ (0, 2) {'observation': tensor(948)}
655
+ (1,) {'observation': tensor(3094)}
656
+ (1, 0) {'observation': tensor(68)}
657
+ (1, 1) {'observation': tensor(9045)}
658
+ (2,) {'observation': tensor(75)}
659
+ """
660
+ queue = [
661
+ # tree, path
662
+ (self, ()),
663
+ ]
664
+
665
+ strings = []
666
+
667
+ while len(queue) > 0:
668
+ self, path = queue.pop()
669
+ if self.subtree is not None:
670
+ for subtree_idx, subtree in reversed(list(enumerate(self.subtree))):
671
+ queue.append((subtree, path + (subtree_idx,)))
672
+
673
+ if self.rollout is not None:
674
+ level = len(path)
675
+ string = node_format_fn(self)
676
+ strings.append(f"{' ' * (level - 1)}{path} {string}")
677
+
678
+ return "\n".join(strings)
679
+
680
+
681
+ class MCTSForest:
682
+ """A collection of MCTS trees.
683
+
684
+ .. warning:: This class is currently under active development. Expect frequent API changes.
685
+
686
+ The class is aimed at storing rollouts in a storage, and produce trees based on a given root
687
+ in that dataset.
688
+
689
+ Keyword Args:
690
+ data_map (TensorDictMap, optional): the storage to use to store the data
691
+ (observation, reward, states etc). If not provided, it is lazily
692
+ initialized using :meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair`
693
+ using the list of :attr:`observation_keys` and :attr:`action_keys` as ``in_keys``.
694
+ node_map (TensorDictMap, optional): a map from the observation space to the index space.
695
+ Internally, the node map is used to gather all possible branches coming out of
696
+ a given node. For example, if an observation has two associated actions and outcomes
697
+ in the data map, then the :attr:`node_map` will return a data structure containing the
698
+ two indices in the :attr:`data_map` that correspond to these two outcomes.
699
+ If not provided, it is lazily initialized using
700
+ :meth:`~torchrl.data.map.tdstorage.TensorDictMap.from_tensordict_pair` using the list of
701
+ :attr:`observation_keys` as ``in_keys`` and the :class:`~torchrl.data.QueryModule` as
702
+ ``out_keys``.
703
+ max_size (int, optional): the size of the maps.
704
+ If not provided, defaults to ``data_map.max_size`` if this can be found, then
705
+ ``node_map.max_size``. If none of these are provided, defaults to `1000`.
706
+ done_keys (list of NestedKey, optional): the done keys of the environment. If not provided,
707
+ defaults to ``("done", "terminated", "truncated")``.
708
+ The :meth:`get_keys_from_env` can be used to automatically determine the keys.
709
+ action_keys (list of NestedKey, optional): the action keys of the environment. If not provided,
710
+ defaults to ``("action",)``.
711
+ The :meth:`get_keys_from_env` can be used to automatically determine the keys.
712
+ reward_keys (list of NestedKey, optional): the reward keys of the environment. If not provided,
713
+ defaults to ``("reward",)``.
714
+ The :meth:`get_keys_from_env` can be used to automatically determine the keys.
715
+ observation_keys (list of NestedKey, optional): the observation keys of the environment. If not provided,
716
+ defaults to ``("observation",)``.
717
+ The :meth:`get_keys_from_env` can be used to automatically determine the keys.
718
+ excluded_keys (list of NestedKey, optional): a list of keys to exclude from the data storage.
719
+ consolidated (bool, optional): if ``True``, the data_map storage will be consolidated on disk.
720
+ Defaults to ``False``.
721
+
722
+ Examples:
723
+ >>> from torchrl.envs import GymEnv
724
+ >>> import torch
725
+ >>> from tensordict import TensorDict, LazyStackedTensorDict
726
+ >>> from torchrl.data import TensorDictMap, ListStorage
727
+ >>> from torchrl.data.map.tree import MCTSForest
728
+ >>>
729
+ >>> from torchrl.envs import PendulumEnv, CatTensors, UnsqueezeTransform, StepCounter
730
+ >>> # Create the MCTS Forest
731
+ >>> forest = MCTSForest()
732
+ >>> # Create an environment. We're using a stateless env to be able to query it at any given state (like an oracle)
733
+ >>> env = PendulumEnv()
734
+ >>> obs_keys = list(env.observation_spec.keys(True, True))
735
+ >>> state_keys = set(env.full_state_spec.keys(True, True)) - set(obs_keys)
736
+ >>> # Appending transforms to get an "observation" key that concatenates the observations together
737
+ >>> env = env.append_transform(
738
+ ... UnsqueezeTransform(
739
+ ... in_keys=obs_keys,
740
+ ... out_keys=[("unsqueeze", key) for key in obs_keys],
741
+ ... dim=-1
742
+ ... )
743
+ ... )
744
+ >>> env = env.append_transform(
745
+ ... CatTensors([("unsqueeze", key) for key in obs_keys], "observation")
746
+ ... )
747
+ >>> env = env.append_transform(StepCounter())
748
+ >>> env.set_seed(0)
749
+ >>> # Get a reset state, then make a rollout out of it
750
+ >>> reset_state = env.reset()
751
+ >>> rollout0 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
752
+ >>> # Append the rollout to the forest. We're removing the state entries for clarity
753
+ >>> rollout0 = rollout0.copy()
754
+ >>> rollout0.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
755
+ >>> forest.extend(rollout0)
756
+ >>> # The forest should have 6 elements (the length of the rollout)
757
+ >>> assert len(forest) == 6
758
+ >>> # Let's make another rollout from the same reset state
759
+ >>> rollout1 = env.rollout(6, auto_reset=False, tensordict=reset_state.clone())
760
+ >>> rollout1.exclude(*state_keys, inplace=True).get("next").exclude(*state_keys, inplace=True)
761
+ >>> forest.extend(rollout1)
762
+ >>> assert len(forest) == 12
763
+ >>> # Let's make another final rollout from an intermediate step in the second rollout
764
+ >>> rollout1b = env.rollout(6, auto_reset=False, tensordict=rollout1[3].exclude("next"))
765
+ >>> rollout1b.exclude(*state_keys, inplace=True)
766
+ >>> rollout1b.get("next").exclude(*state_keys, inplace=True)
767
+ >>> forest.extend(rollout1b)
768
+ >>> assert len(forest) == 18
769
+ >>> # Since we have 2 rollouts starting at the same state, our tree should have two
770
+ >>> # branches if we produce it from the reset entry. Take the state, and call `get_tree`:
771
+ >>> r = rollout0[0]
772
+ >>> # Let's get the compact tree that follows the initial reset. A compact tree is
773
+ >>> # a tree where nodes that have a single child are collapsed.
774
+ >>> tree = forest.get_tree(r)
775
+ >>> print(tree.max_length())
776
+ 2
777
+ >>> print(list(tree.valid_paths()))
778
+ [(0,), (1, 0), (1, 1)]
779
+ >>> from tensordict import assert_close
780
+ >>> # We can manually rebuild the tree
781
+ >>> assert_close(
782
+ ... rollout1,
783
+ ... torch.cat([tree.subtree[1].rollout, tree.subtree[1].subtree[0].rollout]),
784
+ ... intersection=True,
785
+ ... )
786
+ True
787
+ >>> # Or we can rebuild it using the dedicated method
788
+ >>> assert_close(
789
+ ... rollout1,
790
+ ... tree.rollout_from_path((1, 0)),
791
+ ... intersection=True,
792
+ ... )
793
+ True
794
+ >>> tree.plot()
795
+ >>> tree = forest.get_tree(r, compact=False)
796
+ >>> print(tree.max_length())
797
+ 9
798
+ >>> print(list(tree.valid_paths()))
799
+ [(0, 0, 0, 0, 0, 0), (1, 0, 0, 0, 0, 0), (1, 0, 0, 1, 0, 0, 0, 0, 0)]
800
+ >>> assert_close(
801
+ ... rollout1,
802
+ ... tree.rollout_from_path((1, 0, 0, 0, 0, 0)),
803
+ ... intersection=True,
804
+ ... )
805
+ True
806
+ """
807
+
808
+ def __init__(
809
+ self,
810
+ *,
811
+ data_map: TensorDictMap | None = None,
812
+ node_map: TensorDictMap | None = None,
813
+ max_size: int | None = None,
814
+ done_keys: list[NestedKey] | None = None,
815
+ reward_keys: list[NestedKey] = None,
816
+ observation_keys: list[NestedKey] = None,
817
+ action_keys: list[NestedKey] = None,
818
+ excluded_keys: list[NestedKey] = None,
819
+ consolidated: bool | None = None,
820
+ ):
821
+
822
+ self.data_map = data_map
823
+
824
+ self.node_map = node_map
825
+
826
+ if max_size is None:
827
+ if data_map is not None:
828
+ max_size = data_map.max_size
829
+ if max_size != getattr(node_map, "max_size", max_size):
830
+ raise ValueError(
831
+ f"Conflicting max_size: got data_map.max_size={data_map.max_size} and node_map.max_size={node_map.max_size}."
832
+ )
833
+ elif node_map is not None:
834
+ max_size = node_map.max_size
835
+ else:
836
+ max_size = None
837
+ elif data_map is not None and max_size != getattr(
838
+ data_map, "max_size", max_size
839
+ ):
840
+ raise ValueError(
841
+ f"Conflicting max_size: got data_map.max_size={data_map.max_size} and max_size={max_size}."
842
+ )
843
+ elif node_map is not None and max_size != getattr(
844
+ node_map, "max_size", max_size
845
+ ):
846
+ raise ValueError(
847
+ f"Conflicting max_size: got node_map.max_size={node_map.max_size} and max_size={max_size}."
848
+ )
849
+ self.max_size = max_size
850
+
851
+ self.done_keys = done_keys
852
+ self.action_keys = action_keys
853
+ self.reward_keys = reward_keys
854
+ self.observation_keys = observation_keys
855
+ self.excluded_keys = excluded_keys
856
+ self.consolidated = consolidated
857
+
858
+ @property
859
+ def done_keys(self) -> list[NestedKey]:
860
+ """Done Keys.
861
+
862
+ Returns the keys used to indicate that an episode has ended.
863
+ The default done keys are "done", "terminated", and "truncated". These keys can be
864
+ used in the environment's output to signal the end of an episode.
865
+
866
+ Returns:
867
+ A list of strings representing the done keys.
868
+
869
+ """
870
+ done_keys = getattr(self, "_done_keys", None)
871
+ if done_keys is None:
872
+ self._done_keys = done_keys = ["done", "terminated", "truncated"]
873
+ return done_keys
874
+
875
+ @done_keys.setter
876
+ def done_keys(self, value):
877
+ self._done_keys = _make_list_of_nestedkeys(value, "done_keys")
878
+
879
+ @property
880
+ def reward_keys(self) -> list[NestedKey]:
881
+ """Reward Keys.
882
+
883
+ Returns the keys used to retrieve rewards from the environment's output.
884
+ The default reward key is "reward".
885
+
886
+ Returns:
887
+ A list of strings or tuples representing the reward keys.
888
+
889
+ """
890
+ reward_keys = getattr(self, "_reward_keys", None)
891
+ if reward_keys is None:
892
+ self._reward_keys = reward_keys = ["reward"]
893
+ return reward_keys
894
+
895
+ @reward_keys.setter
896
+ def reward_keys(self, value):
897
+ self._reward_keys = _make_list_of_nestedkeys(value, "reward_keys")
898
+
899
+ @property
900
+ def action_keys(self) -> list[NestedKey]:
901
+ """Action Keys.
902
+
903
+ Returns the keys used to retrieve actions from the environment's input.
904
+ The default action key is "action".
905
+
906
+ Returns:
907
+ A list of strings or tuples representing the action keys.
908
+
909
+ """
910
+ action_keys = getattr(self, "_action_keys", None)
911
+ if action_keys is None:
912
+ self._action_keys = action_keys = ["action"]
913
+ return action_keys
914
+
915
+ @action_keys.setter
916
+ def action_keys(self, value):
917
+ self._action_keys = _make_list_of_nestedkeys(value, "action_keys")
918
+
919
+ @property
920
+ def observation_keys(self) -> list[NestedKey]:
921
+ """Observation Keys.
922
+
923
+ Returns the keys used to retrieve observations from the environment's output.
924
+ The default observation key is "observation".
925
+
926
+ Returns:
927
+ A list of strings or tuples representing the observation keys.
928
+ """
929
+ observation_keys = getattr(self, "_observation_keys", None)
930
+ if observation_keys is None:
931
+ self._observation_keys = observation_keys = ["observation"]
932
+ return observation_keys
933
+
934
+ @observation_keys.setter
935
+ def observation_keys(self, value):
936
+ self._observation_keys = _make_list_of_nestedkeys(value, "observation_keys")
937
+
938
+ @property
939
+ def excluded_keys(self) -> list[NestedKey] | None:
940
+ return self._excluded_keys
941
+
942
+ @excluded_keys.setter
943
+ def excluded_keys(self, value):
944
+ self._excluded_keys = _make_list_of_nestedkeys(value, "excluded_keys")
945
+
946
+ def get_keys_from_env(self, env: EnvBase):
947
+ """Writes missing done, action and reward keys to the Forest given an environment.
948
+
949
+ Existing keys are not overwritten.
950
+ """
951
+ if getattr(self, "_reward_keys", None) is None:
952
+ self.reward_keys = env.reward_keys
953
+ if getattr(self, "_done_keys", None) is None:
954
+ self.done_keys = env.done_keys
955
+ if getattr(self, "_action_keys", None) is None:
956
+ self.action_keys = env.action_keys
957
+ if getattr(self, "_observation_keys", None) is None:
958
+ self.observation_keys = env.observation_keys
959
+
960
+ @classmethod
961
+ def _write_fn_stack(cls, new, old=None):
962
+ # This function updates the old values by adding the new ones
963
+ # if and only if the new ones are not there.
964
+ # If the old value is not provided, we assume there are none and the
965
+ # `new` is just prepared.
966
+ # This involves unsqueezing the last dim (since we'll be stacking tensors
967
+ # and calling unique).
968
+ # The update involves calling cat along the last dim + unique
969
+ # which will keep only the new values that were unknown to
970
+ # the storage.
971
+ # We use this method to track all the indices that are associated with
972
+ # an observation. Every time a new index is obtained, it is stacked alongside
973
+ # the others.
974
+ if old is None:
975
+ # we unsqueeze the values to stack them along dim -1
976
+ result = new.apply(lambda x: x.unsqueeze(-1), filter_empty=False)
977
+ result.set(
978
+ "count", torch.ones(result.shape, dtype=torch.int, device=result.device)
979
+ )
980
+ else:
981
+
982
+ def cat(name, x, y):
983
+ if name == "count":
984
+ return x
985
+ if y.ndim < x.ndim:
986
+ y = y.unsqueeze(-1)
987
+ result = torch.cat([x, y], -1)
988
+ # Breaks on mps
989
+ if result.device.type == "mps":
990
+ result = result.cpu()
991
+ result = result.unique(dim=-1, sorted=False)
992
+ result = result.to("mps")
993
+ else:
994
+ result = result.unique(dim=-1, sorted=False)
995
+ return result
996
+
997
+ result = old.named_apply(cat, new, default=None)
998
+ result.set_("count", old.get("count") + 1)
999
+ return result
1000
+
1001
+ def _make_data_map(self, source, dest):
1002
+ try:
1003
+ kwargs = {}
1004
+ if self.max_size is not None:
1005
+ kwargs["max_size"] = self.max_size
1006
+ self.data_map = TensorDictMap.from_tensordict_pair(
1007
+ source,
1008
+ dest,
1009
+ in_keys=[*self.observation_keys, *self.action_keys],
1010
+ consolidated=self.consolidated,
1011
+ **kwargs,
1012
+ )
1013
+ if self.max_size is None:
1014
+ self.max_size = self.data_map.max_size
1015
+ except KeyError as err:
1016
+ raise KeyError(
1017
+ "A KeyError occurred during data map creation. This could be due to the wrong setting of a key in the MCTSForest constructor. Scroll up for more info."
1018
+ ) from err
1019
+
1020
+ def _make_node_map(self, source, dest):
1021
+ kwargs = {}
1022
+ if self.max_size is not None:
1023
+ kwargs["max_size"] = self.max_size
1024
+ self.node_map = TensorDictMap.from_tensordict_pair(
1025
+ source,
1026
+ dest,
1027
+ in_keys=[*self.observation_keys],
1028
+ out_keys=[
1029
+ *self.data_map.query_module.out_keys, # hash and index
1030
+ # *self.action_keys,
1031
+ # *[("next", rk) for rk in self.reward_keys],
1032
+ "count",
1033
+ ],
1034
+ storage_constructor=ListStorage,
1035
+ collate_fn=TensorDict.lazy_stack,
1036
+ write_fn=self._write_fn_stack,
1037
+ **kwargs,
1038
+ )
1039
+ if self.max_size is None:
1040
+ self.max_size = self.data_map.max_size
1041
+
1042
+ def extend(self, rollout, *, return_node: bool = False):
1043
+ """Add a rollout to the forest.
1044
+
1045
+ Nodes are only added to a tree at points where rollouts diverge from
1046
+ each other and at the endpoints of rollouts.
1047
+
1048
+ If there is no existing tree that matches the first steps of the
1049
+ rollout, a new tree is added. Only one node is created, for the final
1050
+ step.
1051
+
1052
+ If there is an existing tree that matches, the rollout is added to that
1053
+ tree. If the rollout diverges from all other rollouts in the tree at
1054
+ some step, a new node is created before the step where the rollouts
1055
+ diverge, and a leaf node is created for the final step of the rollout.
1056
+ If all of the rollout's steps match with a previously added rollout,
1057
+ nothing changes. If the rollout matches up to a leaf node of a tree but
1058
+ continues beyond it, that node is extended to the end of the rollout,
1059
+ and no new nodes are created.
1060
+
1061
+ Args:
1062
+ rollout (TensorDict): The rollout to add to the forest.
1063
+ return_node (bool, optional): If ``True``, the method returns the
1064
+ added node. Default is ``False``.
1065
+
1066
+ Returns:
1067
+ Tree: The node that was added to the forest. This is only
1068
+ returned if ``return_node`` is True.
1069
+
1070
+ Examples:
1071
+ >>> from torchrl.data import MCTSForest
1072
+ >>> from tensordict import TensorDict
1073
+ >>> import torch
1074
+ >>> forest = MCTSForest()
1075
+ >>> r0 = TensorDict({
1076
+ ... 'action': torch.tensor([1, 2, 3, 4, 5]),
1077
+ ... 'next': {'observation': torch.tensor([123, 392, 989, 809, 847])},
1078
+ ... 'observation': torch.tensor([ 0, 123, 392, 989, 809])
1079
+ ... }, [5])
1080
+ >>> r1 = TensorDict({
1081
+ ... 'action': torch.tensor([1, 2, 6, 7]),
1082
+ ... 'next': {'observation': torch.tensor([123, 392, 235, 38])},
1083
+ ... 'observation': torch.tensor([ 0, 123, 392, 235])
1084
+ ... }, [4])
1085
+ >>> td_root = r0[0].exclude("next")
1086
+ >>> forest.extend(r0)
1087
+ >>> forest.extend(r1)
1088
+ >>> tree = forest.get_tree(td_root)
1089
+ >>> print(tree)
1090
+ Tree(
1091
+ count=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.int32, is_shared=False),
1092
+ index=Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False),
1093
+ node_data=TensorDict(
1094
+ fields={
1095
+ observation: Tensor(shape=torch.Size([]), device=cpu, dtype=torch.int64, is_shared=False)},
1096
+ batch_size=torch.Size([]),
1097
+ device=cpu,
1098
+ is_shared=False),
1099
+ node_id=NonTensorData(data=0, batch_size=torch.Size([]), device=None),
1100
+ rollout=TensorDict(
1101
+ fields={
1102
+ action: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False),
1103
+ next: TensorDict(
1104
+ fields={
1105
+ observation: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False)},
1106
+ batch_size=torch.Size([2]),
1107
+ device=cpu,
1108
+ is_shared=False),
1109
+ observation: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False)},
1110
+ batch_size=torch.Size([2]),
1111
+ device=cpu,
1112
+ is_shared=False),
1113
+ subtree=Tree(
1114
+ _parent=NonTensorStack(
1115
+ [<weakref at 0x716eeb78fbf0; to 'TensorDict' at 0x...,
1116
+ batch_size=torch.Size([2]),
1117
+ device=None),
1118
+ count=Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int32, is_shared=False),
1119
+ hash=NonTensorStack(
1120
+ [4341220243998689835, 6745467818783115365],
1121
+ batch_size=torch.Size([2]),
1122
+ device=None),
1123
+ node_data=LazyStackedTensorDict(
1124
+ fields={
1125
+ observation: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.int64, is_shared=False)},
1126
+ exclusive_fields={
1127
+ },
1128
+ batch_size=torch.Size([2]),
1129
+ device=cpu,
1130
+ is_shared=False,
1131
+ stack_dim=0),
1132
+ node_id=NonTensorStack(
1133
+ [1, 2],
1134
+ batch_size=torch.Size([2]),
1135
+ device=None),
1136
+ rollout=LazyStackedTensorDict(
1137
+ fields={
1138
+ action: Tensor(shape=torch.Size([2, -1]), device=cpu, dtype=torch.int64, is_shared=False),
1139
+ next: LazyStackedTensorDict(
1140
+ fields={
1141
+ observation: Tensor(shape=torch.Size([2, -1]), device=cpu, dtype=torch.int64, is_shared=False)},
1142
+ exclusive_fields={
1143
+ },
1144
+ batch_size=torch.Size([2, -1]),
1145
+ device=cpu,
1146
+ is_shared=False,
1147
+ stack_dim=0),
1148
+ observation: Tensor(shape=torch.Size([2, -1]), device=cpu, dtype=torch.int64, is_shared=False)},
1149
+ exclusive_fields={
1150
+ },
1151
+ batch_size=torch.Size([2, -1]),
1152
+ device=cpu,
1153
+ is_shared=False,
1154
+ stack_dim=0),
1155
+ wins=Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
1156
+ index=None,
1157
+ subtree=None,
1158
+ specs=None,
1159
+ batch_size=torch.Size([2]),
1160
+ device=None,
1161
+ is_shared=False),
1162
+ wins=Tensor(shape=torch.Size([]), device=cpu, dtype=torch.float32, is_shared=False),
1163
+ hash=None,
1164
+ _parent=None,
1165
+ specs=None,
1166
+ batch_size=torch.Size([]),
1167
+ device=None,
1168
+ is_shared=False)
1169
+ """
1170
+ source, dest = (
1171
+ rollout.exclude("next").copy(),
1172
+ rollout.select("next", *self.action_keys).copy(),
1173
+ )
1174
+ if self.excluded_keys is not None:
1175
+ dest = dest.exclude(*self.excluded_keys, inplace=True)
1176
+ dest.get("next").exclude(*self.excluded_keys, inplace=True)
1177
+
1178
+ if self.data_map is None:
1179
+ self._make_data_map(source, dest)
1180
+
1181
+ # We need to set the action somewhere to keep track of what action lead to what child
1182
+ # # Set the action in the 'next'
1183
+ # dest[1:] = source[:-1].exclude(*self.done_keys)
1184
+
1185
+ # Add ('observation', 'action') -> ('next, observation')
1186
+ self.data_map[source] = dest
1187
+ value = source
1188
+ if self.node_map is None:
1189
+ self._make_node_map(source, dest)
1190
+ # map ('observation',) -> ('indices',)
1191
+ self.node_map[source] = TensorDict.lazy_stack(value.unbind(0))
1192
+ if return_node:
1193
+ return self.get_tree(rollout)
1194
+
1195
+ def add(self, step, *, return_node: bool = False):
1196
+ source, dest = (
1197
+ step.exclude("next").copy(),
1198
+ step.select("next", *self.action_keys).copy(),
1199
+ )
1200
+
1201
+ if self.data_map is None:
1202
+ self._make_data_map(source, dest)
1203
+
1204
+ # We need to set the action somewhere to keep track of what action lead to what child
1205
+ # # Set the action in the 'next'
1206
+ # dest[1:] = source[:-1].exclude(*self.done_keys)
1207
+
1208
+ # Add ('observation', 'action') -> ('next, observation')
1209
+ self.data_map[source] = dest
1210
+ value = source
1211
+ if self.node_map is None:
1212
+ self._make_node_map(source, dest)
1213
+ # map ('observation',) -> ('indices',)
1214
+ self.node_map[source] = value
1215
+ if return_node:
1216
+ return self.get_tree(step)
1217
+
1218
+ def get_child(self, root: TensorDictBase) -> TensorDictBase:
1219
+ return self.data_map[root]
1220
+
1221
+ def _make_local_tree(
1222
+ self,
1223
+ root: TensorDictBase,
1224
+ index: torch.Tensor | None = None,
1225
+ compact: bool = True,
1226
+ ) -> tuple[Tree, torch.Tensor | None, torch.Tensor | None]:
1227
+ root = root.select(*self.node_map.in_keys)
1228
+ node_meta = None
1229
+ if root in self.node_map:
1230
+ node_meta = self.node_map[root]
1231
+ if index is None:
1232
+ node_meta = self.node_map[root]
1233
+ index = node_meta["_index"]
1234
+ elif index is not None:
1235
+ pass
1236
+ else:
1237
+ return None
1238
+ steps = []
1239
+ while index.numel() <= 1:
1240
+ index = index.squeeze()
1241
+ d = self.data_map.storage[index]
1242
+
1243
+ # Rebuild rollout step
1244
+ steps.append(merge_tensordicts(d, root, callback_exist=lambda *x: None))
1245
+ d = d["next"]
1246
+ if d in self.node_map:
1247
+ root = d.select(*self.node_map.in_keys)
1248
+ node_meta = self.node_map[root]
1249
+ index = node_meta["_index"]
1250
+ if not compact:
1251
+ break
1252
+ else:
1253
+ # If the root is provided and not gathered from the storage, it could be that its
1254
+ # device doesn't match the data_map storage device.
1255
+ root = steps[-1]["next"].select(*self.node_map.in_keys)
1256
+ device = getattr(self.data_map.storage, "device", None)
1257
+ if root.device != device:
1258
+ if device is not None:
1259
+ root = root.to(self.data_map.storage.device)
1260
+ else:
1261
+ root.clear_device_()
1262
+ index = None
1263
+ break
1264
+ rollout = None
1265
+ if steps:
1266
+ rollout = torch.stack(steps, -1)
1267
+ # Will be populated later
1268
+ hash = node_meta["_hash"]
1269
+ return (
1270
+ Tree(
1271
+ rollout=rollout,
1272
+ count=torch.zeros((), dtype=torch.int32),
1273
+ wins=torch.zeros(()),
1274
+ node_data=root,
1275
+ index=index,
1276
+ hash=None,
1277
+ # We do this to avoid raising an exception as rollout and subtree must be provided together
1278
+ subtree=None,
1279
+ ),
1280
+ index,
1281
+ hash,
1282
+ )
1283
+
1284
+ # The recursive implementation is slower and less compatible with compile
1285
+ # def _make_tree(self, root: TensorDictBase, index: torch.Tensor|None=None)->Tree:
1286
+ # tree, indices = self._make_local_tree(root, index=index)
1287
+ # subtrees = []
1288
+ # if indices is not None:
1289
+ # for i in indices:
1290
+ # subtree = self._make_tree(tree.node, index=i)
1291
+ # subtrees.append(subtree)
1292
+ # subtrees = TensorDict.lazy_stack(subtrees)
1293
+ # tree.subtree = subtrees
1294
+ # return tree
1295
+ def _make_tree_iter(
1296
+ self, root, index=None, max_depth: int | None = None, compact: bool = True
1297
+ ):
1298
+ q = deque()
1299
+ memo = {}
1300
+ tree, indices, hash = self._make_local_tree(root, index=index, compact=compact)
1301
+ tree.node_id = 0
1302
+
1303
+ result = tree
1304
+ depth = 0
1305
+ counter = 1
1306
+ if indices is not None:
1307
+ q.append((tree, indices, hash, depth))
1308
+
1309
+ while len(q):
1310
+ tree, indices, hash, depth = q.popleft()
1311
+ extend = max_depth is None or depth < max_depth
1312
+ subtrees = []
1313
+ for i, h in zip(indices, hash):
1314
+ # TODO: remove the .item()
1315
+ h = h.item()
1316
+ subtree, subtree_indices, subtree_hash = memo.get(h, (None,) * 3)
1317
+ if subtree is None:
1318
+ subtree, subtree_indices, subtree_hash = self._make_local_tree(
1319
+ tree.node_data,
1320
+ index=i,
1321
+ compact=compact,
1322
+ )
1323
+ subtree.node_id = counter
1324
+ counter += 1
1325
+ subtree.hash = h
1326
+ memo[h] = (subtree, subtree_indices, subtree_hash)
1327
+ else:
1328
+ # We just need to save the two (or more) rollouts
1329
+ subtree_bis, _, _ = self._make_local_tree(
1330
+ tree.node_data,
1331
+ index=i,
1332
+ compact=compact,
1333
+ )
1334
+ if subtree.rollout.ndim == subtree_bis.rollout.ndim:
1335
+ subtree.rollout = TensorDict.stack(
1336
+ [subtree.rollout, subtree_bis.rollout]
1337
+ )
1338
+ else:
1339
+ subtree.rollout = TensorDict.stack(
1340
+ [*subtree.rollout, subtree_bis.rollout]
1341
+ )
1342
+
1343
+ subtrees.append(subtree)
1344
+ if extend and subtree_indices is not None:
1345
+ q.append((subtree, subtree_indices, subtree_hash, depth + 1))
1346
+ subtrees = TensorDict.lazy_stack(subtrees)
1347
+ tree.subtree = subtrees
1348
+
1349
+ return result
1350
+
1351
+ def get_tree(
1352
+ self,
1353
+ root,
1354
+ *,
1355
+ max_depth: int | None = None,
1356
+ compact: bool = True,
1357
+ ) -> Tree:
1358
+ return self._make_tree_iter(root=root, max_depth=max_depth, compact=compact)
1359
+
1360
+ @classmethod
1361
+ def valid_paths(cls, tree: Tree):
1362
+ yield from tree.valid_paths()
1363
+
1364
+ def __len__(self):
1365
+ return len(self.data_map)
1366
+
1367
+ def to_string(self, td_root, node_format_fn=lambda tree: tree.node_data.to_dict()):
1368
+ """Generates a string representation of a tree in the forest.
1369
+
1370
+ This function can pull out information from each of the nodes in a tree,
1371
+ so it can be useful for debugging. The nodes are listed line-by-line.
1372
+ Each line contains the path to the node, followed by the string
1373
+ representation of that node generated with ``node_format_fn``. Each
1374
+ line is indented according to number of steps in the path required to
1375
+ get to the corresponding node.
1376
+
1377
+ Args:
1378
+ td_root (TensorDict): Root of the tree.
1379
+
1380
+ node_format_fn (Callable, optional): User-defined function to
1381
+ generate a string for each node of the tree. The signature must
1382
+ be ``(Tree) -> Any``, and the output must be convertible to a
1383
+ string. If this argument is not given, the generated string is
1384
+ the node's :attr:`Tree.node_data` attribute converted to a dict.
1385
+
1386
+ Examples:
1387
+ >>> from torchrl.data import MCTSForest
1388
+ >>> from tensordict import TensorDict
1389
+ >>> forest = MCTSForest()
1390
+ >>> td_root = TensorDict({"observation": 0,})
1391
+ >>> rollouts_data = [
1392
+ ... # [(action, obs), ...]
1393
+ ... [(3, 123), (1, 456)],
1394
+ ... [(2, 359), (2, 3094)],
1395
+ ... [(3, 123), (9, 392), (6, 989), (20, 809), (21, 847)],
1396
+ ... [(1, 75)],
1397
+ ... [(3, 123), (0, 948)],
1398
+ ... [(2, 359), (2, 3094), (10, 68)],
1399
+ ... [(2, 359), (2, 3094), (11, 9045)],
1400
+ ... ]
1401
+ >>> for rollout_data in rollouts_data:
1402
+ ... td = td_root.clone().unsqueeze(0)
1403
+ ... for action, obs in rollout_data:
1404
+ ... td = td.update(TensorDict({
1405
+ ... "action": [action],
1406
+ ... "next": TensorDict({"observation": [obs]}, [1]),
1407
+ ... }, [1]))
1408
+ ... forest.extend(td)
1409
+ ... td = td["next"].clone()
1410
+ ...
1411
+ >>> print(forest.to_string(td_root))
1412
+ (0,) {'observation': tensor(123)}
1413
+ (0, 0) {'observation': tensor(456)}
1414
+ (0, 1) {'observation': tensor(847)}
1415
+ (0, 2) {'observation': tensor(948)}
1416
+ (1,) {'observation': tensor(3094)}
1417
+ (1, 0) {'observation': tensor(68)}
1418
+ (1, 1) {'observation': tensor(9045)}
1419
+ (2,) {'observation': tensor(75)}
1420
+ """
1421
+ tree = self.get_tree(td_root)
1422
+ return tree.to_string(node_format_fn)
1423
+
1424
+
1425
+ def _make_list_of_nestedkeys(obj: Any, attr: str) -> list[NestedKey]:
1426
+ if obj is None:
1427
+ return obj
1428
+ if isinstance(obj, (str, tuple)):
1429
+ return [obj]
1430
+ if not isinstance(obj, list):
1431
+ raise ValueError(
1432
+ f"{attr} must be a list of NestedKeys or a NestedKey, got {obj}."
1433
+ )
1434
+ return [unravel_key(key) for key in obj]