trinity-rft 0.1.1__tar.gz → 0.2.1.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. trinity_rft-0.2.1.dev0/PKG-INFO +497 -0
  2. trinity_rft-0.2.1.dev0/README.md +430 -0
  3. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/pyproject.toml +18 -3
  4. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/__init__.py +1 -1
  5. trinity_rft-0.2.1.dev0/trinity/algorithm/__init__.py +24 -0
  6. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/__init__.py +25 -0
  7. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/add_strategy.py +230 -0
  8. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/correct_bias_add_strategy.py +54 -0
  9. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/duplicate_add_strategy.py +72 -0
  10. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/step_wise_add_strategy.py +123 -0
  11. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/__init__.py +20 -0
  12. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/advantage_fn.py +29 -0
  13. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/grpo_advantage.py +83 -0
  14. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/opmd_advantage.py +96 -0
  15. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/ppo_advantage.py +86 -0
  16. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/reinforce_plus_plus_advantage.py +66 -0
  17. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/remax_advantage.py +66 -0
  18. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/rloo_advantage.py +75 -0
  19. trinity_rft-0.2.1.dev0/trinity/algorithm/algorithm.py +245 -0
  20. trinity_rft-0.2.1.dev0/trinity/algorithm/algorithm_manager.py +34 -0
  21. trinity_rft-0.2.1.dev0/trinity/algorithm/entropy_loss_fn/__init__.py +9 -0
  22. trinity_rft-0.2.1.dev0/trinity/algorithm/entropy_loss_fn/entropy_loss_fn.py +104 -0
  23. trinity_rft-0.2.1.dev0/trinity/algorithm/key_mapper.py +29 -0
  24. trinity_rft-0.2.1.dev0/trinity/algorithm/kl_fn/__init__.py +3 -0
  25. trinity_rft-0.2.1.dev0/trinity/algorithm/kl_fn/kl_fn.py +165 -0
  26. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/__init__.py +26 -0
  27. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/chord_policy_loss.py +257 -0
  28. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/dpo_loss.py +67 -0
  29. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py +76 -0
  30. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/mix_policy_loss.py +126 -0
  31. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/opmd_policy_loss.py +31 -0
  32. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/policy_loss_fn.py +121 -0
  33. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/ppo_policy_loss.py +65 -0
  34. trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/sft_loss.py +33 -0
  35. trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/__init__.py +15 -0
  36. trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/mix_sample_strategy.py +98 -0
  37. trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/sample_strategy.py +92 -0
  38. trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/utils.py +42 -0
  39. trinity_rft-0.2.1.dev0/trinity/algorithm/utils.py +62 -0
  40. trinity_rft-0.2.1.dev0/trinity/buffer/__init__.py +15 -0
  41. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/buffer.py +5 -3
  42. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/buffer_reader.py +6 -0
  43. trinity_rft-0.2.1.dev0/trinity/buffer/buffer_writer.py +31 -0
  44. trinity_rft-0.2.1.dev0/trinity/buffer/queue.py +195 -0
  45. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/ray_wrapper.py +140 -6
  46. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/reader/file_reader.py +140 -55
  47. trinity_rft-0.2.1.dev0/trinity/buffer/reader/queue_reader.py +52 -0
  48. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/reader/sql_reader.py +8 -0
  49. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/schema/sql_schema.py +12 -19
  50. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/file_writer.py +16 -3
  51. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/queue_writer.py +10 -7
  52. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/sql_writer.py +18 -5
  53. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/cli/client.py +2 -2
  54. trinity_rft-0.2.1.dev0/trinity/cli/launcher.py +280 -0
  55. trinity_rft-0.2.1.dev0/trinity/common/config.py +910 -0
  56. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/constants.py +42 -28
  57. trinity_rft-0.2.1.dev0/trinity/common/experience.py +547 -0
  58. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/models/__init__.py +15 -15
  59. trinity_rft-0.2.1.dev0/trinity/common/models/api/vllm_patch.py +374 -0
  60. trinity_rft-0.2.1.dev0/trinity/common/models/model.py +214 -0
  61. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/models/utils.py +37 -16
  62. trinity_rft-0.1.1/trinity/common/models/vllm_async_model.py → trinity_rft-0.2.1.dev0/trinity/common/models/vllm_model.py +62 -57
  63. trinity_rft-0.2.1.dev0/trinity/common/models/vllm_worker.py +74 -0
  64. trinity_rft-0.2.1.dev0/trinity/common/rewards/__init__.py +25 -0
  65. trinity_rft-0.2.1.dev0/trinity/common/rewards/accuracy_reward.py +68 -0
  66. trinity_rft-0.2.1.dev0/trinity/common/rewards/countdown_reward.py +58 -0
  67. trinity_rft-0.2.1.dev0/trinity/common/rewards/dapo_reward.py +67 -0
  68. trinity_rft-0.2.1.dev0/trinity/common/rewards/format_reward.py +28 -0
  69. trinity_rft-0.2.1.dev0/trinity/common/rewards/math_reward.py +70 -0
  70. trinity_rft-0.2.1.dev0/trinity/common/rewards/reward_fn.py +107 -0
  71. trinity_rft-0.2.1.dev0/trinity/common/rewards/utils.py +22 -0
  72. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/verl_config.py +74 -53
  73. trinity_rft-0.2.1.dev0/trinity/common/workflows/__init__.py +32 -0
  74. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/customized_math_workflows.py +24 -20
  75. trinity_rft-0.2.1.dev0/trinity/common/workflows/customized_toolcall_workflows.py +265 -0
  76. trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/agentscope/agentscope_react_workflow.py +162 -0
  77. trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/alfworld/RAFT_alfworld_workflow.py +225 -0
  78. trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/alfworld/RAFT_reflect_alfworld_workflow.py +310 -0
  79. trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/alfworld/RAFT_utils.py +196 -0
  80. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/envs/alfworld/alfworld_workflow.py +121 -5
  81. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/envs/sciworld/sciworld_workflow.py +1 -1
  82. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/envs/webshop/webshop_workflow.py +1 -1
  83. trinity_rft-0.2.1.dev0/trinity/common/workflows/eval_workflow.py +90 -0
  84. trinity_rft-0.2.1.dev0/trinity/common/workflows/math_rm_workflow.py +56 -0
  85. trinity_rft-0.2.1.dev0/trinity/common/workflows/step_wise_workflow.py +147 -0
  86. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/workflow.py +66 -28
  87. trinity_rft-0.2.1.dev0/trinity/data/controllers/active_iterator.py +394 -0
  88. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/controllers/task_parser.py +14 -35
  89. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/core/dataset.py +58 -26
  90. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/cleaner.py +5 -3
  91. trinity_rft-0.2.1.dev0/trinity/data/server.py +81 -0
  92. trinity_rft-0.2.1.dev0/trinity/data/utils.py +72 -0
  93. trinity_rft-0.2.1.dev0/trinity/explorer/__init__.py +3 -0
  94. trinity_rft-0.2.1.dev0/trinity/explorer/explorer.py +395 -0
  95. trinity_rft-0.2.1.dev0/trinity/explorer/scheduler.py +448 -0
  96. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/explorer/workflow_runner.py +50 -22
  97. trinity_rft-0.2.1.dev0/trinity/manager/__init__.py +7 -0
  98. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_manager.py +178 -111
  99. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/__init__.py +2 -0
  100. trinity_rft-0.2.1.dev0/trinity/manager/config_registry/algorithm_config_manager.py +371 -0
  101. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/buffer_config_manager.py +77 -13
  102. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/explorer_config_manager.py +31 -8
  103. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/model_config_manager.py +6 -92
  104. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/trainer_config_manager.py +34 -97
  105. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/manager.py +16 -4
  106. trinity_rft-0.2.1.dev0/trinity/manager/synchronizer.py +325 -0
  107. trinity_rft-0.2.1.dev0/trinity/trainer/trainer.py +215 -0
  108. trinity_rft-0.2.1.dev0/trinity/trainer/verl/dp_actor.py +234 -0
  109. trinity_rft-0.2.1.dev0/trinity/trainer/verl/fsdp_checkpoint_manager.py +363 -0
  110. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/trainer/verl/fsdp_workers.py +430 -655
  111. trinity_rft-0.2.1.dev0/trinity/trainer/verl/utils.py +152 -0
  112. trinity_rft-0.2.1.dev0/trinity/trainer/verl_trainer.py +426 -0
  113. trinity_rft-0.2.1.dev0/trinity/utils/distributed.py +74 -0
  114. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/dlc_utils.py +25 -14
  115. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/eval_utils.py +2 -130
  116. trinity_rft-0.2.1.dev0/trinity/utils/math_eval_utils.py +558 -0
  117. trinity_rft-0.2.1.dev0/trinity/utils/monitor.py +220 -0
  118. trinity_rft-0.2.1.dev0/trinity/utils/plugin_loader.py +87 -0
  119. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/registry.py +19 -16
  120. trinity_rft-0.2.1.dev0/trinity/utils/timer.py +18 -0
  121. trinity_rft-0.2.1.dev0/trinity_rft.egg-info/PKG-INFO +497 -0
  122. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/SOURCES.txt +56 -9
  123. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/requires.txt +16 -2
  124. trinity_rft-0.1.1/PKG-INFO +0 -412
  125. trinity_rft-0.1.1/README.md +0 -357
  126. trinity_rft-0.1.1/trinity/buffer/__init__.py +0 -7
  127. trinity_rft-0.1.1/trinity/buffer/buffer_writer.py +0 -15
  128. trinity_rft-0.1.1/trinity/buffer/queue.py +0 -75
  129. trinity_rft-0.1.1/trinity/buffer/reader/queue_reader.py +0 -37
  130. trinity_rft-0.1.1/trinity/cli/launcher.py +0 -249
  131. trinity_rft-0.1.1/trinity/common/config.py +0 -602
  132. trinity_rft-0.1.1/trinity/common/experience.py +0 -295
  133. trinity_rft-0.1.1/trinity/common/models/model.py +0 -136
  134. trinity_rft-0.1.1/trinity/common/models/openai_api.py +0 -79
  135. trinity_rft-0.1.1/trinity/common/models/vllm_model.py +0 -287
  136. trinity_rft-0.1.1/trinity/common/models/vllm_worker.py +0 -74
  137. trinity_rft-0.1.1/trinity/common/rewards/__init__.py +0 -11
  138. trinity_rft-0.1.1/trinity/common/rewards/accuracy_reward.py +0 -33
  139. trinity_rft-0.1.1/trinity/common/rewards/base.py +0 -24
  140. trinity_rft-0.1.1/trinity/common/rewards/composite_reward.py +0 -24
  141. trinity_rft-0.1.1/trinity/common/rewards/format_reward.py +0 -29
  142. trinity_rft-0.1.1/trinity/common/rewards/reward_fn.py +0 -229
  143. trinity_rft-0.1.1/trinity/common/schema.py +0 -148
  144. trinity_rft-0.1.1/trinity/common/workflows/__init__.py +0 -19
  145. trinity_rft-0.1.1/trinity/data/controllers/active_iterator.py +0 -290
  146. trinity_rft-0.1.1/trinity/data/core/dataset_db.py +0 -84
  147. trinity_rft-0.1.1/trinity/data/server.py +0 -27
  148. trinity_rft-0.1.1/trinity/explorer/__init__.py +0 -4
  149. trinity_rft-0.1.1/trinity/explorer/explorer.py +0 -303
  150. trinity_rft-0.1.1/trinity/explorer/runner_pool.py +0 -287
  151. trinity_rft-0.1.1/trinity/manager/__init__.py +0 -7
  152. trinity_rft-0.1.1/trinity/trainer/trainer.py +0 -175
  153. trinity_rft-0.1.1/trinity/trainer/verl/core_algos.py +0 -717
  154. trinity_rft-0.1.1/trinity/trainer/verl/dp_actor.py +0 -538
  155. trinity_rft-0.1.1/trinity/trainer/verl/ray_trainer.py +0 -1160
  156. trinity_rft-0.1.1/trinity/trainer/verl_trainer.py +0 -552
  157. trinity_rft-0.1.1/trinity/utils/distributed.py +0 -82
  158. trinity_rft-0.1.1/trinity/utils/monitor.py +0 -112
  159. trinity_rft-0.1.1/trinity/utils/plugin_loader.py +0 -65
  160. trinity_rft-0.1.1/trinity_rft.egg-info/PKG-INFO +0 -412
  161. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/LICENSE +0 -0
  162. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/setup.cfg +0 -0
  163. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/setup.py +0 -0
  164. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/reader/__init__.py +0 -0
  165. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/schema/__init__.py +0 -0
  166. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/utils.py +0 -0
  167. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/__init__.py +0 -0
  168. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/cli/server.py +0 -0
  169. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/__init__.py +0 -0
  170. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/rewards/agents_reward.py +0 -0
  171. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/rewards/human_reward.py +0 -0
  172. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/rewards/tool_reward.py +0 -0
  173. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/controllers/default_ops.py +0 -0
  174. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/core/comparator.py +0 -0
  175. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/core/formatter.py +0 -0
  176. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/base.py +0 -0
  177. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/human_annotator.py +0 -0
  178. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/synthesizer.py +0 -0
  179. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/config_registry.py +0 -0
  180. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/plugins/__init__.py +0 -0
  181. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/trainer/__init__.py +0 -0
  182. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/trainer/verl/__init__.py +0 -0
  183. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/__init__.py +0 -0
  184. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/log.py +0 -0
  185. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/dependency_links.txt +0 -0
  186. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/entry_points.txt +0 -0
  187. {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/top_level.txt +0 -0
@@ -0,0 +1,497 @@
1
+ Metadata-Version: 2.4
2
+ Name: trinity-rft
3
+ Version: 0.2.1.dev0
4
+ Summary: Trinity-RFT: A Framework for Training Large Language Models with Reinforcement Fine-Tuning
5
+ Author-email: Trinity-RFT Team <trinity-rft@outlook.com>
6
+ Project-URL: Homepage, https://github.com/modelscope/Trinity-RFT
7
+ Project-URL: Documentation, https://modelscope.github.io/Trinity-RFT/
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: verl==0.4.1
19
+ Requires-Dist: ray[default]>=2.45.0
20
+ Requires-Dist: vllm<=0.10.0,>=0.9.1
21
+ Requires-Dist: tensordict==0.6.2
22
+ Requires-Dist: wandb
23
+ Requires-Dist: omegaconf
24
+ Requires-Dist: sqlalchemy
25
+ Requires-Dist: psycopg2-binary
26
+ Requires-Dist: networkx
27
+ Requires-Dist: latex2sympy2_extended
28
+ Requires-Dist: math_verify
29
+ Requires-Dist: ninja
30
+ Requires-Dist: fire
31
+ Requires-Dist: streamlit
32
+ Requires-Dist: flask
33
+ Requires-Dist: requests
34
+ Requires-Dist: tensorboard
35
+ Requires-Dist: openai
36
+ Requires-Dist: jsonlines
37
+ Requires-Dist: sortedcontainers
38
+ Requires-Dist: word2number
39
+ Requires-Dist: transformers<4.54.0
40
+ Provides-Extra: data
41
+ Requires-Dist: py-data-juicer; extra == "data"
42
+ Provides-Extra: agent
43
+ Requires-Dist: agentscope; extra == "agent"
44
+ Provides-Extra: rm-gallery
45
+ Requires-Dist: rm-gallery>=0.1.1; extra == "rm-gallery"
46
+ Provides-Extra: dev
47
+ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
48
+ Requires-Dist: black>=23.7.0; extra == "dev"
49
+ Requires-Dist: flake8>=6.1.0; extra == "dev"
50
+ Requires-Dist: flake8-docstrings>=1.6.0; extra == "dev"
51
+ Requires-Dist: isort>=5.12.0; extra == "dev"
52
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
53
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
54
+ Requires-Dist: pytest-json-ctrf; extra == "dev"
55
+ Requires-Dist: parameterized; extra == "dev"
56
+ Requires-Dist: matplotlib; extra == "dev"
57
+ Provides-Extra: doc
58
+ Requires-Dist: sphinx; extra == "doc"
59
+ Requires-Dist: sphinx-autobuild; extra == "doc"
60
+ Requires-Dist: sphinx_rtd_theme; extra == "doc"
61
+ Requires-Dist: myst-parser; extra == "doc"
62
+ Requires-Dist: sphinxcontrib-apidoc; extra == "doc"
63
+ Requires-Dist: sphinx-multiversion; extra == "doc"
64
+ Provides-Extra: flash-attn
65
+ Requires-Dist: flash-attn==2.8.0.post2; extra == "flash-attn"
66
+ Dynamic: license-file
67
+
68
+ [**中文主页**](https://github.com/modelscope/Trinity-RFT/blob/main/README_zh.md) | [**Tutorial**](https://modelscope.github.io/Trinity-RFT/) | [**FAQ**](./docs/sphinx_doc/source/tutorial/faq.md)
69
+
70
+ <div align="center">
71
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01lvLpfw25Pl4ohGZnU_!!6000000007519-2-tps-1628-490.png" alt="Trinity-RFT" style="height: 120px;">
72
+ </div>
73
+
74
+
75
+
76
+ <h2 align="center">Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models</h2>
77
+
78
+
79
+ <div align="center">
80
+
81
+ [![paper](http://img.shields.io/badge/cs.LG-2505.17826-B31B1B?logo=arxiv&logoColor=red)](https://arxiv.org/abs/2505.17826)
82
+ [![doc](https://img.shields.io/badge/Docs-blue?logo=markdown)](https://modelscope.github.io/Trinity-RFT/)
83
+ [![pypi](https://img.shields.io/pypi/v/trinity-rft?logo=pypi&color=026cad)](https://pypi.org/project/trinity-rft/)
84
+ ![license](https://img.shields.io/badge/license-Apache--2.0-000000.svg)
85
+
86
+ </div>
87
+
88
+
89
+
90
+ ## 🚀 News
91
+
92
+ * [2025-08] ✨ Trinity-RFT v0.2.1 is released with enhanced features for Agentic RL and Async RL.
93
+ * [2025-08] 🎵 We introduce [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord), a dynamic integration of SFT and RL for enhanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
94
+ * [2025-08] We now support training on general multi-step workflows! Please check out examples for [ALFWorld](./docs/sphinx_doc/source/tutorial/example_step_wise.md) and [ReAct](./docs/sphinx_doc/source/tutorial/example_react.md).
95
+ * [2025-07] Trinity-RFT v0.2.0 is released.
96
+ * [2025-07] We update the [technical report](https://arxiv.org/abs/2505.17826) (arXiv v2) with new features, examples, and experiments.
97
+ * [2025-06] Trinity-RFT v0.1.1 is released.
98
+ * [2025-05] We release Trinity-RFT v0.1.0 and a technical report.
99
+ * [2025-04] The initial codebase of Trinity-RFT is open.
100
+
101
+
102
+ ## 💡 What is Trinity-RFT?
103
+
104
+
105
+
106
+ Trinity-RFT is a general-purpose, flexible and easy-to-use framework for reinforcement fine-tuning (RFT) of large language models (LLM).
107
+ It is designed to support diverse application scenarios and serve as a unified platform for exploring advanced RL paradigms in the [era of experience](https://storage.googleapis.com/deepmind-media/Era-of-Experience%20/The%20Era%20of%20Experience%20Paper.pdf).
108
+
109
+
110
+
111
+ ## ✨ Key Features
112
+
113
+ * **Unified RFT Core:**
114
+
115
+ Supports *synchronous/asynchronous*, *on-policy/off-policy*, and *online/offline* training. Rollout and training can run separately and scale independently on different devices.
116
+
117
+ * **First-Class Agent-Environment Interaction:**
118
+
119
+ Handles lagged feedback, long-tailed latencies, and agent/env failures gracefully. Supports multi-turn agent-env interaction.
120
+
121
+ * **Optimized Data Pipelines:**
122
+
123
+ Treats rollout tasks and experiences as dynamic assets, enabling active management (prioritization, cleaning, augmentation) throughout the RFT lifecycle.
124
+
125
+ * **User-Friendly Design:**
126
+
127
+ Modular and decoupled architecture for easy adoption and development, plus rich graphical user interfaces for low-code usage.
128
+
129
+
130
+ <p align="center">
131
+ <img src="https://img.alicdn.com/imgextra/i2/O1CN01H3UbpF1yP7E1OCLbi_!!6000000006570-2-tps-1334-638.png" alt="Trinity-RFT">
132
+ <em>Figure: The high-level design of Trinity-RFT</em>
133
+ </p>
134
+
135
+
136
+ <details>
137
+ <summary>Figure: The architecture of RFT-core</summary>
138
+
139
+
140
+ <p align="center">
141
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01BFCZRV1zS9T1PoH49_!!6000000006712-2-tps-922-544.png" alt="Trinity-RFT-core-architecture">
142
+ </p>
143
+
144
+ </details>
145
+
146
+
147
+ <details>
148
+ <summary>Figure: Some RFT modes supported by Trinity-RFT</summary>
149
+
150
+ <p align="center">
151
+ <img src="https://img.alicdn.com/imgextra/i3/O1CN01E7NskS1FFoTI9jlaQ_!!6000000000458-2-tps-1458-682.png" alt="Trinity-RFT-modes">
152
+ </p>
153
+
154
+ </details>
155
+
156
+
157
+ <details>
158
+ <summary>Figure: Concatenated and general multi-step workflows</summary>
159
+
160
+ <p align="center">
161
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01z1i7kk1jlMEVa8ZHV_!!6000000004588-2-tps-1262-695.png" alt="Trinity-RFT-multi-step">
162
+ </p>
163
+
164
+ </details>
165
+
166
+
167
+ <details>
168
+ <summary>Figure: The architecture of data processors</summary>
169
+
170
+ <p align="center">
171
+ <img src="https://img.alicdn.com/imgextra/i3/O1CN01hR1LCh25kpJMKmYR4_!!6000000007565-2-tps-1474-740.png" alt="Trinity-RFT-data-pipeline-buffer">
172
+ </p>
173
+
174
+ </details>
175
+
176
+
177
+ <details>
178
+ <summary>Figure: The high-level design of data pipelines in Trinity-RFT</summary>
179
+
180
+ <p align="center">
181
+ <img src="https://img.alicdn.com/imgextra/i4/O1CN01UvyfcZ1WoTv5t3pCp_!!6000000002835-2-tps-1166-274.png" alt="Trinity-RFT-data-pipelines">
182
+ </p>
183
+
184
+ </details>
185
+
186
+
187
+
188
+ ## 🛠️ What can I use Trinity-RFT for?
189
+
190
+
191
+ * **Adaptation to New Scenarios:**
192
+
193
+ Implement agent-environment interaction logic in a single `Workflow` or `MultiTurnWorkflow` class. ([Example](./docs/sphinx_doc/source/tutorial/example_multi_turn.md))
194
+
195
+
196
+ * **RL Algorithm Development:**
197
+
198
+ Develop custom RL algorithms (loss design, sampling, data processing) in compact, plug-and-play classes. ([Example](./docs/sphinx_doc/source/tutorial/example_mix_algo.md))
199
+
200
+
201
+ * **Low-Code Usage:**
202
+
203
+ Use graphical interfaces for easy monitoring and tracking of the learning process.
204
+
205
+
206
+ ---
207
+
208
+ ## Table of contents
209
+
210
+
211
+ - [Getting started](#getting-started)
212
+ - [Step 1: installation](#step-1-installation)
213
+ - [Step 2: prepare dataset and model](#step-2-prepare-dataset-and-model)
214
+ - [Step 3: configurations](#step-3-configurations)
215
+ - [Step 4: run the RFT process](#step-4-run-the-rft-process)
216
+ - [Further tutorials](#further-tutorials)
217
+ - [Upcoming features](#upcoming-features)
218
+ - [Contribution guide](#contribution-guide)
219
+ - [Acknowledgements](#acknowledgements)
220
+ - [Citation](#citation)
221
+
222
+
223
+
224
+ ## Getting started
225
+
226
+
227
+ > [!NOTE]
228
+ > This project is currently under active development. Comments and suggestions are welcome!
229
+
230
+
231
+ ### Step 1: installation
232
+
233
+
234
+ Requirements:
235
+ - Python version >= 3.10, <= 3.12
236
+ - CUDA version >= 12.4, <= 12.8
237
+ - At least 2 GPUs
238
+
239
+
240
+ Installation from source **(recommended)**:
241
+
242
+ ```shell
243
+ # Pull the source code from GitHub
244
+ git clone https://github.com/modelscope/Trinity-RFT
245
+ cd Trinity-RFT
246
+
247
+ # Create a new environment using Conda or venv
248
+ # Option 1: Conda
249
+ conda create -n trinity python=3.10
250
+ conda activate trinity
251
+
252
+ # Option 2: venv
253
+ python3.10 -m venv .venv
254
+ source .venv/bin/activate
255
+
256
+ # Install the package in editable mode
257
+ # for bash
258
+ pip install -e .[dev]
259
+ # for zsh
260
+ pip install -e .\[dev\]
261
+
262
+ # Install flash-attn after all dependencies are installed
263
+ # Note: flash-attn will take a long time to compile, please be patient.
264
+ # for bash
265
+ pip install -e .[flash_attn]
266
+ # for zsh
267
+ pip install -e .\[flash_attn\]
268
+ # Try the following command if you encounter errors during flash-attn installation
269
+ # pip install flash-attn==2.8.0.post2 -v --no-build-isolation
270
+ ```
271
+
272
+ Installation using pip:
273
+
274
+ ```shell
275
+ pip install trinity-rft==0.2.0
276
+ # install flash-attn separately
277
+ pip install flash-attn==2.8.0.post2
278
+ ```
279
+
280
+ Installation from docker:
281
+ we have provided a dockerfile for Trinity-RFT (trinity)
282
+
283
+ ```shell
284
+ git clone https://github.com/modelscope/Trinity-RFT
285
+ cd Trinity-RFT
286
+
287
+ # build the docker image
288
+ # Note: you can edit the dockerfile to customize the environment
289
+ # e.g., use pip mirrors or set api key
290
+ docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
291
+
292
+ # run the docker image
293
+ docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v <root_path_of_data_and_checkpoints>:/data trinity-rft:latest
294
+ ```
295
+
296
+ ### Step 2: prepare dataset and model
297
+
298
+
299
+ Trinity-RFT supports most datasets and models from Huggingface and ModelScope.
300
+
301
+
302
+ **Prepare the model** in the local directory `$MODEL_PATH/{model_name}`:
303
+
304
+ ```bash
305
+ # Using Huggingface
306
+ huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name}
307
+
308
+ # Using Modelscope
309
+ modelscope download {model_name} --local_dir $MODEL_PATH/{model_name}
310
+ ```
311
+
312
+ For more details about model downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli) or [ModelScope](https://modelscope.cn/docs/models/download).
313
+
314
+
315
+
316
+ **Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`:
317
+
318
+ ```bash
319
+ # Using Huggingface
320
+ huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name}
321
+
322
+ # Using Modelscope
323
+ modelscope download --dataset {dataset_name} --local_dir $DATASET_PATH/{dataset_name}
324
+ ```
325
+
326
+ For more details about dataset downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli#download-a-dataset-or-a-space) or [ModelScope](https://modelscope.cn/docs/datasets/download).
327
+
328
+
329
+
330
+ ### Step 3: configurations
331
+
332
+
333
+ Trinity-RFT provides a web interface for configuring your RFT process.
334
+
335
+ > [!NOTE]
336
+ > This is an experimental feature, and we will continue to improve it.
337
+
338
+
339
+ To launch the web interface for minimal configurations, you can run
340
+
341
+ ```bash
342
+ trinity studio --port 8080
343
+ ```
344
+
345
+ Then you can configure your RFT process in the web page and generate a config file. You can save the config file for later use or run it directly as described in the following section.
346
+
347
+ Advanced users can also edit the config file directly.
348
+ We provide example config files in [`examples`](examples/).
349
+
350
+ For complete GUI features, please refer to the monorepo for [Trinity-Studio](https://github.com/modelscope/Trinity-Studio).
351
+
352
+
353
+ <details>
354
+
355
+ <summary> Example: config manager GUI </summary>
356
+
357
+ ![config-manager](https://img.alicdn.com/imgextra/i1/O1CN01yhYrV01lGKchtywSH_!!6000000004791-2-tps-1480-844.png)
358
+
359
+
360
+ </details>
361
+
362
+
363
+
364
+
365
+ ### Step 4: run the RFT process
366
+
367
+
368
+ Start a ray cluster:
369
+
370
+ ```shell
371
+ # On master node
372
+ ray start --head
373
+
374
+ # On worker nodes
375
+ ray start --address=<master_address>
376
+ ```
377
+
378
+ (Optional) Log in to [wandb](https://docs.wandb.ai/quickstart/) for better monitoring:
379
+
380
+ ```shell
381
+ export WANDB_API_KEY=<your_api_key>
382
+ wandb login
383
+ ```
384
+
385
+ For command-line users, run the RFT process:
386
+
387
+ ```shell
388
+ trinity run --config <config_path>
389
+ ```
390
+
391
+ For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k with GRPO:
392
+
393
+ ```shell
394
+ trinity run --config examples/grpo_gsm8k/gsm8k.yaml
395
+ ```
396
+
397
+ For studio users, click "Run" in the web interface.
398
+
399
+
400
+ ## Further tutorials
401
+
402
+
403
+ Tutorials for running different RFT modes:
404
+
405
+ + [Quick example: GRPO on GSM8k](./docs/sphinx_doc/source/tutorial/example_reasoning_basic.md)
406
+ + [Off-policy RFT](./docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md)
407
+ + [Fully asynchronous RFT](./docs/sphinx_doc/source/tutorial/example_async_mode.md)
408
+ + [Offline learning by DPO or SFT](./docs/sphinx_doc/source/tutorial/example_dpo.md)
409
+
410
+
411
+ Tutorials for adapting Trinity-RFT to a new multi-turn agentic scenario:
412
+
413
+ + [Concatenated Multi-turn tasks](./docs/sphinx_doc/source/tutorial/example_multi_turn.md)
414
+
415
+ Tutorials for adapting Trinity-RFT to a general multi-step agentic scenario:
416
+
417
+ + [General Multi-Step tasks](./docs/sphinx_doc/source/tutorial/example_step_wise.md)
418
+ + [ReAct agent tasks](./docs/sphinx_doc/source/tutorial/example_react.md)
419
+
420
+
421
+ Tutorials for data-related functionalities:
422
+
423
+ + [Advanced data processing & human-in-the-loop](./docs/sphinx_doc/source/tutorial/example_data_functionalities.md)
424
+
425
+
426
+ Tutorials for RL algorithm development/research with Trinity-RFT:
427
+
428
+ + [RL algorithm development with Trinity-RFT](./docs/sphinx_doc/source/tutorial/example_mix_algo.md)
429
+
430
+
431
+ Guidelines for full configurations: see [this document](./docs/sphinx_doc/source/tutorial/trinity_configs.md)
432
+
433
+
434
+ Guidelines for developers and researchers:
435
+
436
+ + [Build new RL scenarios](./docs/sphinx_doc/source/tutorial/trinity_programming_guide.md#workflows-for-rl-environment-developers)
437
+ + [Implement new RL algorithms](./docs/sphinx_doc/source/tutorial/trinity_programming_guide.md#algorithms-for-rl-algorithm-developers)
438
+
439
+
440
+
441
+
442
+
443
+ ## Upcoming features
444
+
445
+ A tentative roadmap: [#51](https://github.com/modelscope/Trinity-RFT/issues/51)
446
+
447
+
448
+
449
+ ## Contribution guide
450
+
451
+
452
+ This project is currently under active development, and we welcome contributions from the community!
453
+
454
+
455
+ Code style check:
456
+
457
+ ```shell
458
+ pre-commit run --all-files
459
+ ```
460
+
461
+
462
+
463
+ Unit tests:
464
+
465
+ ```shell
466
+ python -m pytest tests
467
+ ```
468
+
469
+
470
+
471
+ ## Acknowledgements
472
+
473
+
474
+ This project is built upon many excellent open-source projects, including:
475
+
476
+ + [verl](https://github.com/volcengine/verl) and [PyTorch's FSDP](https://pytorch.org/docs/stable/fsdp.html) for LLM training;
477
+ + [vLLM](https://github.com/vllm-project/vllm) for LLM inference;
478
+ + [Data-Juicer](https://github.com/modelscope/data-juicer?tab=readme-ov-file) for data processing pipelines;
479
+ + [AgentScope](https://github.com/modelscope/agentscope) for agentic workflow;
480
+ + [Ray](https://github.com/ray-project/ray) for distributed systems;
481
+ + we have also drawn inspirations from RL frameworks like [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF), [TRL](https://github.com/huggingface/trl) and [ChatLearn](https://github.com/alibaba/ChatLearn);
482
+ + ......
483
+
484
+ ## Citation
485
+
486
+
487
+ ```plain
488
+ @misc{trinity-rft,
489
+ title={Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models},
490
+ author={Xuchen Pan and Yanxi Chen and Yushuo Chen and Yuchang Sun and Daoyuan Chen and Wenhao Zhang and Yuexiang Xie and Yilun Huang and Yilei Zhang and Dawei Gao and Yaliang Li and Bolin Ding and Jingren Zhou},
491
+ year={2025},
492
+ eprint={2505.17826},
493
+ archivePrefix={arXiv},
494
+ primaryClass={cs.LG},
495
+ url={https://arxiv.org/abs/2505.17826},
496
+ }
497
+ ```