trinity-rft 0.2.1.dev0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. trinity_rft-0.3.1/PKG-INFO +453 -0
  2. trinity_rft-0.3.1/README.md +380 -0
  3. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/pyproject.toml +23 -13
  4. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/__init__.py +1 -1
  5. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/__init__.py +0 -3
  6. trinity_rft-0.3.1/trinity/algorithm/advantage_fn/__init__.py +43 -0
  7. trinity_rft-0.3.1/trinity/algorithm/advantage_fn/advantage_fn.py +89 -0
  8. trinity_rft-0.3.1/trinity/algorithm/advantage_fn/asymre_advantage.py +122 -0
  9. trinity_rft-0.3.1/trinity/algorithm/advantage_fn/grpo_advantage.py +236 -0
  10. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/step_wise_add_strategy.py → trinity_rft-0.3.1/trinity/algorithm/advantage_fn/multi_step_grpo_advantage.py +68 -25
  11. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/opmd_advantage.py +52 -3
  12. trinity_rft-0.3.1/trinity/algorithm/advantage_fn/rec_advantage.py +100 -0
  13. trinity_rft-0.3.1/trinity/algorithm/advantage_fn/reinforce_advantage.py +36 -0
  14. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/algorithm.py +165 -24
  15. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/__init__.py +8 -0
  16. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/chord_policy_loss.py +19 -18
  17. trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/cispo_policy_loss.py +91 -0
  18. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py +9 -5
  19. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/mix_policy_loss.py +4 -2
  20. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/opmd_policy_loss.py +7 -4
  21. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/ppo_policy_loss.py +7 -2
  22. trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/rec_policy_loss.py +132 -0
  23. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/sft_loss.py +6 -8
  24. trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/sppo_loss_fn.py +57 -0
  25. trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/topr_policy_loss.py +77 -0
  26. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/mix_sample_strategy.py +18 -3
  27. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/sample_strategy.py +14 -38
  28. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/utils.py +43 -0
  29. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/__init__.py +0 -2
  30. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/buffer.py +11 -32
  31. trinity_rft-0.3.1/trinity/buffer/buffer_reader.py +15 -0
  32. trinity_rft-0.3.1/trinity/buffer/operators/__init__.py +16 -0
  33. trinity_rft-0.3.1/trinity/buffer/operators/data_juicer_operator.py +50 -0
  34. trinity_rft-0.3.1/trinity/buffer/operators/experience_operator.py +50 -0
  35. trinity_rft-0.3.1/trinity/buffer/operators/filters/reward_filter.py +54 -0
  36. trinity_rft-0.3.1/trinity/buffer/operators/mappers/reward_shaping_mapper.py +112 -0
  37. trinity_rft-0.3.1/trinity/buffer/pipelines/__init__.py +11 -0
  38. trinity_rft-0.3.1/trinity/buffer/pipelines/experience_pipeline.py +144 -0
  39. trinity_rft-0.3.1/trinity/buffer/pipelines/task_pipeline.py +72 -0
  40. trinity_rft-0.3.1/trinity/buffer/reader/file_reader.py +157 -0
  41. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/reader/queue_reader.py +5 -16
  42. trinity_rft-0.3.1/trinity/buffer/reader/sql_reader.py +34 -0
  43. trinity_rft-0.3.1/trinity/buffer/schema/__init__.py +4 -0
  44. trinity_rft-0.3.1/trinity/buffer/schema/formatter.py +395 -0
  45. trinity_rft-0.3.1/trinity/buffer/schema/sql_schema.py +136 -0
  46. trinity_rft-0.3.1/trinity/buffer/storage/file.py +84 -0
  47. {trinity_rft-0.2.1.dev0/trinity/buffer → trinity_rft-0.3.1/trinity/buffer/storage}/queue.py +162 -9
  48. trinity_rft-0.3.1/trinity/buffer/storage/sql.py +281 -0
  49. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/utils.py +3 -2
  50. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/writer/file_writer.py +2 -2
  51. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/writer/queue_writer.py +2 -5
  52. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/writer/sql_writer.py +5 -5
  53. trinity_rft-0.3.1/trinity/cli/launcher.py +329 -0
  54. trinity_rft-0.3.1/trinity/common/__init__.py +0 -0
  55. trinity_rft-0.3.1/trinity/common/config.py +1147 -0
  56. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/constants.py +21 -44
  57. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/experience.py +125 -40
  58. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/__init__.py +57 -4
  59. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/api/vllm_patch.py +3 -2
  60. trinity_rft-0.3.1/trinity/common/models/mm_utils.py +73 -0
  61. trinity_rft-0.3.1/trinity/common/models/model.py +374 -0
  62. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/utils.py +186 -14
  63. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/vllm_model.py +220 -71
  64. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/vllm_worker.py +24 -8
  65. trinity_rft-0.3.1/trinity/common/rewards/__init__.py +25 -0
  66. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/accuracy_reward.py +9 -9
  67. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/countdown_reward.py +0 -3
  68. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/dapo_reward.py +0 -3
  69. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/format_reward.py +0 -3
  70. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/math_reward.py +0 -3
  71. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/reward_fn.py +0 -4
  72. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/verl_config.py +162 -19
  73. trinity_rft-0.3.1/trinity/common/workflows/__init__.py +97 -0
  74. trinity_rft-0.3.1/trinity/common/workflows/agentscope/__init__.py +1 -0
  75. trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/__init__.py +0 -0
  76. trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/react_agent.py +63 -0
  77. trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/react_workflow.py +107 -0
  78. trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/templates.py +59 -0
  79. trinity_rft-0.3.1/trinity/common/workflows/agentscope_workflow.py +83 -0
  80. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/customized_math_workflows.py +48 -6
  81. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/customized_toolcall_workflows.py +10 -9
  82. trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/agentscope/agentscope_react_workflow.py → trinity_rft-0.3.1/trinity/common/workflows/envs/agentscope/agentscopev0_react_workflow.py +21 -21
  83. trinity_rft-0.3.1/trinity/common/workflows/envs/agentscope/agentscopev1_react_workflow.py +172 -0
  84. trinity_rft-0.3.1/trinity/common/workflows/envs/agentscope/agentscopev1_search_workflow.py +245 -0
  85. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/RAFT_alfworld_workflow.py +17 -9
  86. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/RAFT_reflect_alfworld_workflow.py +22 -17
  87. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/alfworld_workflow.py +13 -9
  88. trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/prepare_data.py +279 -0
  89. trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/react_agent.py +127 -0
  90. trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/utils.py +333 -0
  91. trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/workflow.py +191 -0
  92. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/sciworld/sciworld_workflow.py +12 -8
  93. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/webshop/webshop_workflow.py +10 -8
  94. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/eval_workflow.py +27 -3
  95. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/math_rm_workflow.py +33 -5
  96. trinity_rft-0.3.1/trinity/common/workflows/math_ruler_workflow.py +204 -0
  97. trinity_rft-0.3.1/trinity/common/workflows/math_trainable_ruler_workflow.py +219 -0
  98. trinity_rft-0.3.1/trinity/common/workflows/rubric_judge_workflow.py +172 -0
  99. trinity_rft-0.3.1/trinity/common/workflows/simple_mm_workflow.py +112 -0
  100. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/step_wise_workflow.py +104 -12
  101. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/workflow.py +62 -19
  102. trinity_rft-0.3.1/trinity/explorer/api/__init__.py +0 -0
  103. trinity_rft-0.3.1/trinity/explorer/api/api.py +65 -0
  104. trinity_rft-0.3.1/trinity/explorer/api/service.py +160 -0
  105. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/explorer.py +140 -72
  106. trinity_rft-0.3.1/trinity/explorer/explorer_client.py +49 -0
  107. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/scheduler.py +15 -10
  108. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/workflow_runner.py +77 -29
  109. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/__init__.py +2 -2
  110. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_manager.py +115 -67
  111. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/algorithm_config_manager.py +11 -4
  112. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/buffer_config_manager.py +28 -93
  113. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/model_config_manager.py +14 -4
  114. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/trainer_config_manager.py +134 -45
  115. trinity_rft-0.3.1/trinity/manager/state_manager.py +159 -0
  116. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/synchronizer.py +63 -8
  117. trinity_rft-0.3.1/trinity/service/__init__.py +3 -0
  118. trinity_rft-0.3.1/trinity/service/data_juicer/__init__.py +0 -0
  119. trinity_rft-0.3.1/trinity/service/data_juicer/client.py +150 -0
  120. trinity_rft-0.3.1/trinity/service/data_juicer/server/__init__.py +0 -0
  121. trinity_rft-0.3.1/trinity/service/data_juicer/server/server.py +144 -0
  122. trinity_rft-0.3.1/trinity/service/data_juicer/server/session.py +125 -0
  123. trinity_rft-0.3.1/trinity/service/data_juicer/server/utils.py +196 -0
  124. trinity_rft-0.3.1/trinity/trainer/trainer.py +261 -0
  125. trinity_rft-0.3.1/trinity/trainer/verl/__init__.py +0 -0
  126. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/dp_actor.py +27 -87
  127. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/fsdp_checkpoint_manager.py +153 -107
  128. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/fsdp_workers.py +117 -63
  129. trinity_rft-0.3.1/trinity/trainer/verl/megatron_actor.py +444 -0
  130. trinity_rft-0.3.1/trinity/trainer/verl/megatron_checkpoint_manager.py +281 -0
  131. trinity_rft-0.3.1/trinity/trainer/verl/megatron_workers.py +1010 -0
  132. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/utils.py +28 -1
  133. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl_trainer.py +294 -107
  134. trinity_rft-0.3.1/trinity/utils/__init__.py +0 -0
  135. trinity_rft-0.3.1/trinity/utils/annotations.py +19 -0
  136. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/distributed.py +18 -0
  137. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/dlc_utils.py +11 -4
  138. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/eval_utils.py +36 -3
  139. trinity_rft-0.3.1/trinity/utils/log.py +103 -0
  140. trinity_rft-0.3.1/trinity/utils/lora_utils.py +26 -0
  141. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/math_eval_utils.py +1 -0
  142. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/monitor.py +8 -7
  143. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/plugin_loader.py +2 -5
  144. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/registry.py +0 -10
  145. trinity_rft-0.3.1/trinity_rft.egg-info/PKG-INFO +453 -0
  146. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/SOURCES.txt +58 -22
  147. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/requires.txt +18 -11
  148. trinity_rft-0.2.1.dev0/PKG-INFO +0 -497
  149. trinity_rft-0.2.1.dev0/README.md +0 -430
  150. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/__init__.py +0 -25
  151. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/add_strategy.py +0 -230
  152. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/correct_bias_add_strategy.py +0 -54
  153. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/duplicate_add_strategy.py +0 -72
  154. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/__init__.py +0 -20
  155. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/advantage_fn.py +0 -29
  156. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/grpo_advantage.py +0 -83
  157. trinity_rft-0.2.1.dev0/trinity/algorithm/algorithm_manager.py +0 -34
  158. trinity_rft-0.2.1.dev0/trinity/buffer/buffer_reader.py +0 -21
  159. trinity_rft-0.2.1.dev0/trinity/buffer/ray_wrapper.py +0 -308
  160. trinity_rft-0.2.1.dev0/trinity/buffer/reader/file_reader.py +0 -359
  161. trinity_rft-0.2.1.dev0/trinity/buffer/reader/sql_reader.py +0 -35
  162. trinity_rft-0.2.1.dev0/trinity/buffer/schema/__init__.py +0 -3
  163. trinity_rft-0.2.1.dev0/trinity/buffer/schema/sql_schema.py +0 -142
  164. trinity_rft-0.2.1.dev0/trinity/cli/launcher.py +0 -280
  165. trinity_rft-0.2.1.dev0/trinity/common/config.py +0 -910
  166. trinity_rft-0.2.1.dev0/trinity/common/models/model.py +0 -214
  167. trinity_rft-0.2.1.dev0/trinity/common/rewards/__init__.py +0 -25
  168. trinity_rft-0.2.1.dev0/trinity/common/workflows/__init__.py +0 -32
  169. trinity_rft-0.2.1.dev0/trinity/data/controllers/active_iterator.py +0 -394
  170. trinity_rft-0.2.1.dev0/trinity/data/controllers/default_ops.py +0 -77
  171. trinity_rft-0.2.1.dev0/trinity/data/controllers/task_parser.py +0 -282
  172. trinity_rft-0.2.1.dev0/trinity/data/core/comparator.py +0 -84
  173. trinity_rft-0.2.1.dev0/trinity/data/core/dataset.py +0 -168
  174. trinity_rft-0.2.1.dev0/trinity/data/core/formatter.py +0 -151
  175. trinity_rft-0.2.1.dev0/trinity/data/processors/base.py +0 -143
  176. trinity_rft-0.2.1.dev0/trinity/data/processors/cleaner.py +0 -231
  177. trinity_rft-0.2.1.dev0/trinity/data/processors/human_annotator.py +0 -47
  178. trinity_rft-0.2.1.dev0/trinity/data/processors/synthesizer.py +0 -107
  179. trinity_rft-0.2.1.dev0/trinity/data/server.py +0 -81
  180. trinity_rft-0.2.1.dev0/trinity/data/utils.py +0 -72
  181. trinity_rft-0.2.1.dev0/trinity/manager/manager.py +0 -81
  182. trinity_rft-0.2.1.dev0/trinity/trainer/trainer.py +0 -215
  183. trinity_rft-0.2.1.dev0/trinity/utils/log.py +0 -65
  184. trinity_rft-0.2.1.dev0/trinity_rft.egg-info/PKG-INFO +0 -497
  185. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/LICENSE +0 -0
  186. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/setup.cfg +0 -0
  187. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/setup.py +0 -0
  188. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/ppo_advantage.py +0 -0
  189. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/reinforce_plus_plus_advantage.py +0 -0
  190. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/remax_advantage.py +0 -0
  191. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/rloo_advantage.py +0 -0
  192. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/entropy_loss_fn/__init__.py +0 -0
  193. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/entropy_loss_fn/entropy_loss_fn.py +0 -0
  194. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/key_mapper.py +0 -0
  195. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/kl_fn/__init__.py +0 -0
  196. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/kl_fn/kl_fn.py +0 -0
  197. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/dpo_loss.py +0 -0
  198. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/policy_loss_fn.py +0 -0
  199. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/__init__.py +0 -0
  200. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/utils.py +0 -0
  201. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/buffer_writer.py +0 -0
  202. {trinity_rft-0.2.1.dev0/trinity/buffer/reader → trinity_rft-0.3.1/trinity/buffer/operators/filters}/__init__.py +0 -0
  203. {trinity_rft-0.2.1.dev0/trinity/buffer/writer → trinity_rft-0.3.1/trinity/buffer/operators/mappers}/__init__.py +0 -0
  204. {trinity_rft-0.2.1.dev0/trinity/common → trinity_rft-0.3.1/trinity/buffer/reader}/__init__.py +0 -0
  205. {trinity_rft-0.2.1.dev0/trinity/trainer/verl → trinity_rft-0.3.1/trinity/buffer/storage}/__init__.py +0 -0
  206. {trinity_rft-0.2.1.dev0/trinity/utils → trinity_rft-0.3.1/trinity/buffer/writer}/__init__.py +0 -0
  207. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/cli/client.py +0 -0
  208. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/cli/server.py +0 -0
  209. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/agents_reward.py +0 -0
  210. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/human_reward.py +0 -0
  211. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/tool_reward.py +0 -0
  212. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/utils.py +0 -0
  213. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/RAFT_utils.py +0 -0
  214. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/__init__.py +0 -0
  215. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/__init__.py +0 -0
  216. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/config_registry.py +0 -0
  217. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/explorer_config_manager.py +0 -0
  218. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/plugins/__init__.py +0 -0
  219. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/__init__.py +0 -0
  220. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/timer.py +0 -0
  221. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/dependency_links.txt +0 -0
  222. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/entry_points.txt +0 -0
  223. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/top_level.txt +0 -0
@@ -0,0 +1,453 @@
1
+ Metadata-Version: 2.4
2
+ Name: trinity-rft
3
+ Version: 0.3.1
4
+ Summary: Trinity-RFT: A Framework for Training Large Language Models with Reinforcement Fine-Tuning
5
+ Author-email: Trinity-RFT Team <trinity-rft@outlook.com>
6
+ Project-URL: Homepage, https://github.com/modelscope/Trinity-RFT
7
+ Project-URL: Documentation, https://modelscope.github.io/Trinity-RFT/
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: <3.13,>=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: verl==0.5.0
19
+ Requires-Dist: ray[default]>=2.48.0
20
+ Requires-Dist: vllm<=0.10.2,>=0.9.1
21
+ Requires-Dist: tensordict
22
+ Requires-Dist: wandb
23
+ Requires-Dist: omegaconf
24
+ Requires-Dist: sqlalchemy
25
+ Requires-Dist: psycopg2-binary
26
+ Requires-Dist: networkx
27
+ Requires-Dist: latex2sympy2_extended
28
+ Requires-Dist: math_verify>=0.8.0
29
+ Requires-Dist: ninja
30
+ Requires-Dist: fire
31
+ Requires-Dist: streamlit
32
+ Requires-Dist: flask
33
+ Requires-Dist: requests
34
+ Requires-Dist: tensorboard
35
+ Requires-Dist: openai
36
+ Requires-Dist: jsonlines
37
+ Requires-Dist: sortedcontainers
38
+ Requires-Dist: word2number
39
+ Requires-Dist: transformers
40
+ Provides-Extra: data
41
+ Requires-Dist: py-data-juicer>=1.4.3; extra == "data"
42
+ Provides-Extra: agent
43
+ Requires-Dist: agentscope>=1.0.5; extra == "agent"
44
+ Provides-Extra: rm-gallery
45
+ Requires-Dist: rm-gallery>=0.1.5; extra == "rm-gallery"
46
+ Provides-Extra: dev
47
+ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
48
+ Requires-Dist: black>=23.7.0; extra == "dev"
49
+ Requires-Dist: flake8>=6.1.0; extra == "dev"
50
+ Requires-Dist: flake8-docstrings>=1.6.0; extra == "dev"
51
+ Requires-Dist: isort>=5.12.0; extra == "dev"
52
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
53
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
54
+ Requires-Dist: pytest-json-ctrf; extra == "dev"
55
+ Requires-Dist: parameterized; extra == "dev"
56
+ Requires-Dist: matplotlib; extra == "dev"
57
+ Requires-Dist: viztracer; extra == "dev"
58
+ Provides-Extra: megatron
59
+ Requires-Dist: megatron-core[mlm]==0.13.1; extra == "megatron"
60
+ Requires-Dist: transformer_engine[pytorch]==2.6.0.post1; extra == "megatron"
61
+ Requires-Dist: mbridge>=0.13.0; extra == "megatron"
62
+ Provides-Extra: doc
63
+ Requires-Dist: sphinx; extra == "doc"
64
+ Requires-Dist: sphinx-autobuild; extra == "doc"
65
+ Requires-Dist: sphinx-book-theme; extra == "doc"
66
+ Requires-Dist: myst-parser; extra == "doc"
67
+ Requires-Dist: sphinxcontrib-apidoc; extra == "doc"
68
+ Requires-Dist: sphinx-multiversion; extra == "doc"
69
+ Requires-Dist: sphinxcontrib-mermaid; extra == "doc"
70
+ Provides-Extra: flash-attn
71
+ Requires-Dist: flash-attn==2.8.1; extra == "flash-attn"
72
+ Dynamic: license-file
73
+
74
+ [**中文主页**](https://github.com/modelscope/Trinity-RFT/blob/main/README_zh.md) | [**Tutorial**](https://modelscope.github.io/Trinity-RFT/) | [**FAQ**](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/faq.html)
75
+
76
+ <div align="center">
77
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01lvLpfw25Pl4ohGZnU_!!6000000007519-2-tps-1628-490.png" alt="Trinity-RFT" style="height: 120px;">
78
+ </div>
79
+
80
+
81
+ <h2 align="center">Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models</h2>
82
+
83
+
84
+ <div align="center">
85
+
86
+ [![paper](http://img.shields.io/badge/cs.LG-2505.17826-B31B1B?logo=arxiv&logoColor=red)](https://arxiv.org/abs/2505.17826)
87
+ [![doc](https://img.shields.io/badge/Docs-blue?logo=markdown)](https://modelscope.github.io/Trinity-RFT/)
88
+ [![pypi](https://img.shields.io/pypi/v/trinity-rft?logo=pypi&color=026cad)](https://pypi.org/project/trinity-rft/)
89
+ ![license](https://img.shields.io/badge/license-Apache--2.0-000000.svg)
90
+
91
+ </div>
92
+
93
+ ## 💡 What is Trinity-RFT?
94
+
95
+ Trinity-RFT is a flexible, general-purpose framework for reinforcement fine-tuning (RFT) of large language models (LLMs). It decouples the RFT process into three key components: **Explorer**, **Trainer**, and **Buffer**, and provides functionalities for users with different backgrounds and objectives:
96
+
97
+
98
+ * 🤖 For agent application developers. [[tutorial]](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/develop_workflow.html)
99
+ - Train agent applications to improve their ability to complete tasks in specific environments.
100
+ - Examples: [Multi-Turn Interaction](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_multi_turn.html), [ReAct Agent](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_react.html)
101
+
102
+ * 🧠 For RL algorithm researchers. [[tutorial]](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/develop_algorithm.html)
103
+ - Design and validate new reinforcement learning algorithms using compact, plug-and-play modules.
104
+ - Example: [Mixture of SFT and GRPO](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_mix_algo.html)
105
+
106
+ * 📊 For data engineers. [[tutorial]](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/develop_operator.html)
107
+ - Create task-specific datasets and build data pipelines for cleaning, augmentation, and human-in-the-loop scenarios.
108
+ - Example: [Data Processing](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_data_functionalities.html)
109
+
110
+
111
+ ## 🌟 Key Features
112
+
113
+ * **Flexible RFT Modes:**
114
+ - Supports synchronous/asynchronous, on-policy/off-policy, and online/offline training. Rollout and training can run separately and scale independently across devices.
115
+
116
+ <img src="https://img.alicdn.com/imgextra/i3/O1CN01E7NskS1FFoTI9jlaQ_!!6000000000458-2-tps-1458-682.png" alt="RFT modes supported by Trinity-RFT" width="600" />
117
+
118
+ * **General Agentic-RL Support:**
119
+ - Supports both concatenated and general multi-turn agentic workflows. Able to directly train agent applications developed using agent frameworks like AgentScope.
120
+
121
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01z1i7kk1jlMEVa8ZHV_!!6000000004588-2-tps-1262-695.png" alt="Agentic workflows" width="600" />
122
+
123
+ * **Full Lifecycle Data Pipelines:**
124
+ - Enables pipeline processing of rollout and experience data, supporting active management (prioritization, cleaning, augmentation) throughout the RFT lifecycle.
125
+
126
+ <img src="https://img.alicdn.com/imgextra/i2/O1CN01BfeHp61sXSlGjH7zQ_!!6000000005776-2-tps-1734-473.png" alt="Data pipeline design" width="600" />
127
+
128
+ * **User-Friendly Design:**
129
+ - Modular, decoupled architecture for easy adoption and development. Rich graphical user interfaces enable low-code usage.
130
+
131
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01Ti0o4320RywoAuyhN_!!6000000006847-2-tps-3840-2134.png" alt="System architecture" width="600" />
132
+
133
+
134
+ ## 🚀 News
135
+
136
+ * [2025-10] ✨ [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.1)] Trinity-RFT v0.3.1 released: multi-stage training support, improved agentic RL examples, LoRA support, debug mode and new RL algorithms.
137
+ * [2025-09] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.0)] Trinity-RFT v0.3.0 released: enhanced Buffer, FSDP2 & Megatron support, multi-modal models, and new RL algorithms/examples.
138
+ * [2025-08] Introducing [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord): dynamic SFT + RL integration for advanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
139
+ * [2025-08] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.2.1)] Trinity-RFT v0.2.1 released.
140
+ * [2025-07] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.2.0)] Trinity-RFT v0.2.0 released.
141
+ * [2025-07] Technical report (arXiv v2) updated with new features, examples, and experiments: [link](https://arxiv.org/abs/2505.17826).
142
+ * [2025-06] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.1.1)] Trinity-RFT v0.1.1 released.
143
+ * [2025-05] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.1.0)] Trinity-RFT v0.1.0 released, plus [technical report](https://arxiv.org/abs/2505.17826).
144
+ * [2025-04] Trinity-RFT open sourced.
145
+
146
+
147
+ ---
148
+
149
+ ## Table of contents
150
+
151
+
152
+ - [Quick Start](#quick-start)
153
+ - [Step 1: installation](#step-1-installation)
154
+ - [Step 2: prepare dataset and model](#step-2-prepare-dataset-and-model)
155
+ - [Step 3: configurations](#step-3-configurations)
156
+ - [Step 4: run the RFT process](#step-4-run-the-rft-process)
157
+ - [Further tutorials](#further-tutorials)
158
+ - [Upcoming features](#upcoming-features)
159
+ - [Contribution guide](#contribution-guide)
160
+ - [Acknowledgements](#acknowledgements)
161
+ - [Citation](#citation)
162
+
163
+
164
+
165
+ ## Quick Start
166
+
167
+
168
+ > [!NOTE]
169
+ > This project is currently under active development. Comments and suggestions are welcome!
170
+
171
+
172
+ ### Step 1: installation
173
+
174
+ Before installing, make sure your system meets the following requirements:
175
+
176
+ - **Python**: version 3.10 to 3.12 (inclusive)
177
+ - **CUDA**: version 12.4 to 12.8 (inclusive)
178
+ - **GPUs**: at least 2 GPUs
179
+
180
+
181
+ #### From Source (Recommended)
182
+
183
+ If you plan to customize or contribute to Trinity-RFT, this is the best option.
184
+
185
+ ##### 1. Clone the Repository
186
+
187
+ ```bash
188
+ git clone https://github.com/modelscope/Trinity-RFT
189
+ cd Trinity-RFT
190
+ ```
191
+
192
+ ##### 2. Set Up a Virtual Environment
193
+
194
+ Choose one of the following options:
195
+
196
+ ###### Using Conda
197
+
198
+ ```bash
199
+ conda create -n trinity python=3.10
200
+ conda activate trinity
201
+
202
+ pip install -e ".[dev]"
203
+ pip install -e ".[flash_attn]"
204
+ # if you encounter issues when installing flash-attn, try:
205
+ # pip install flash-attn==2.8.1 --no-build-isolation
206
+ ```
207
+
208
+ ###### Using venv
209
+
210
+ ```bash
211
+ python3.10 -m venv .venv
212
+ source .venv/bin/activate
213
+
214
+ pip install -e ".[dev]"
215
+ pip install -e ".[flash_attn]"
216
+ # if you encounter issues when installing flash-attn, try:
217
+ # pip install flash-attn==2.8.1 --no-build-isolation
218
+ ```
219
+
220
+ ###### Using `uv`
221
+
222
+ [`uv`](https://github.com/astral-sh/uv) is a modern Python package installer.
223
+
224
+ ```bash
225
+ uv sync --extra dev --extra flash_attn
226
+ ```
227
+
228
+
229
+ #### Via PyPI
230
+
231
+ If you just want to use the package without modifying the code:
232
+
233
+ ```bash
234
+ pip install trinity-rft==0.3.1
235
+ pip install flash-attn==2.8.1
236
+ ```
237
+
238
+ Or with `uv`:
239
+
240
+ ```bash
241
+ uv pip install trinity-rft==0.3.1
242
+ uv pip install flash-attn==2.8.1
243
+ ```
244
+
245
+
246
+ #### Using Docker
247
+
248
+ We provide a Docker setup for hassle-free environment configuration.
249
+
250
+ ```bash
251
+ git clone https://github.com/modelscope/Trinity-RFT
252
+ cd Trinity-RFT
253
+
254
+ # Build the Docker image
255
+ ## Tip: You can modify the Dockerfile to add mirrors or set API keys
256
+ docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
257
+
258
+ # Run the container, replacing <path_to_your_data_and_checkpoints> with your actual path
259
+ docker run -it \
260
+ --gpus all \
261
+ --shm-size="64g" \
262
+ --rm \
263
+ -v $PWD:/workspace \
264
+ -v <path_to_your_data_and_checkpoints>:/data \
265
+ trinity-rft:latest
266
+ ```
267
+
268
+ > For training with **Megatron-LM**, please refer to [Megatron-LM Backend](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_megatron.html).
269
+
270
+ ### Step 2: prepare dataset and model
271
+
272
+
273
+ Trinity-RFT supports most datasets and models from Huggingface and ModelScope.
274
+
275
+
276
+ **Prepare the model** in the local directory `$MODEL_PATH/{model_name}`:
277
+
278
+ ```bash
279
+ # Using Huggingface
280
+ huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name}
281
+
282
+ # Using Modelscope
283
+ modelscope download {model_name} --local_dir $MODEL_PATH/{model_name}
284
+ ```
285
+
286
+ For more details about model downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli) or [ModelScope](https://modelscope.cn/docs/models/download).
287
+
288
+
289
+
290
+ **Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`:
291
+
292
+ ```bash
293
+ # Using Huggingface
294
+ huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name}
295
+
296
+ # Using Modelscope
297
+ modelscope download --dataset {dataset_name} --local_dir $DATASET_PATH/{dataset_name}
298
+ ```
299
+
300
+ For more details about dataset downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli#download-a-dataset-or-a-space) or [ModelScope](https://modelscope.cn/docs/datasets/download).
301
+
302
+
303
+
304
+ ### Step 3: configurations
305
+
306
+
307
+ Trinity-RFT provides a web interface for configuring your RFT process.
308
+
309
+ > [!NOTE]
310
+ > This is an experimental feature, and we will continue to improve it.
311
+
312
+
313
+ To launch the web interface for minimal configurations, you can run
314
+
315
+ ```bash
316
+ trinity studio --port 8080
317
+ ```
318
+
319
+ Then you can configure your RFT process in the web page and generate a config file. You can save the config file for later use or run it directly as described in the following section.
320
+
321
+ Advanced users can also edit the config file directly.
322
+ We provide example config files in [`examples`](examples/).
323
+
324
+ For complete GUI features, please refer to the monorepo for [Trinity-Studio](https://github.com/modelscope/Trinity-Studio).
325
+
326
+
327
+ <details>
328
+
329
+ <summary> Example: config manager GUI </summary>
330
+
331
+ ![config-manager](https://img.alicdn.com/imgextra/i1/O1CN01yhYrV01lGKchtywSH_!!6000000004791-2-tps-1480-844.png)
332
+
333
+
334
+ </details>
335
+
336
+
337
+
338
+
339
+ ### Step 4: run the RFT process
340
+
341
+
342
+ Start a ray cluster:
343
+
344
+ ```shell
345
+ # On master node
346
+ ray start --head
347
+
348
+ # On worker nodes
349
+ ray start --address=<master_address>
350
+ ```
351
+
352
+ (Optional) Log in to [wandb](https://docs.wandb.ai/quickstart/) for better monitoring:
353
+
354
+ ```shell
355
+ export WANDB_API_KEY=<your_api_key>
356
+ wandb login
357
+ ```
358
+
359
+ For command-line users, run the RFT process:
360
+
361
+ ```shell
362
+ trinity run --config <config_path>
363
+ ```
364
+
365
+ For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k with GRPO:
366
+
367
+ ```shell
368
+ trinity run --config examples/grpo_gsm8k/gsm8k.yaml
369
+ ```
370
+
371
+ For studio users, click "Run" in the web interface.
372
+
373
+
374
+ ## Further tutorials
375
+
376
+ > [!NOTE]
377
+ > For more tutorials, please refer to the [Trinity-RFT Documentation](https://modelscope.github.io/Trinity-RFT/).
378
+
379
+
380
+ Tutorials for running different RFT modes:
381
+
382
+ + [Quick example: GRPO on GSM8k](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_reasoning_basic.html)
383
+ + [Off-policy RFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_reasoning_advanced.html)
384
+ + [Fully asynchronous RFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_async_mode.html)
385
+ + [Offline learning by DPO or SFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_dpo.html)
386
+
387
+
388
+ Tutorials for adapting Trinity-RFT to multi-step agentic scenarios:
389
+
390
+ + [Concatenated multi-turn workflow](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_multi_turn.html)
391
+ + [General multi-step workflow](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_step_wise.html)
392
+ + [ReAct workflow with an agent framework](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_react.html)
393
+
394
+
395
+ Tutorials for data-related functionalities:
396
+
397
+ + [Advanced data processing & human-in-the-loop](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_data_functionalities.html)
398
+
399
+
400
+ Tutorials for RL algorithm development/research with Trinity-RFT:
401
+
402
+ + [RL algorithm development with Trinity-RFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_mix_algo.html)
403
+
404
+
405
+ Guidelines for full configurations:
406
+
407
+ + See [this document](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/trinity_configs.html)
408
+
409
+
410
+ Guidelines for developers and researchers:
411
+
412
+ + [Benchmark Toolkit for quick verification and experimentation](./benchmark/README.md)
413
+ + [Understand the coordination between explorer and trainer](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/synchronizer.html)
414
+
415
+
416
+ ## Upcoming features
417
+
418
+ A tentative roadmap: [#51](https://github.com/modelscope/Trinity-RFT/issues/51)
419
+
420
+
421
+ ## Contribution guide
422
+
423
+ This project is currently under active development, and we welcome contributions from the community!
424
+
425
+ See [CONTRIBUTING.md](./CONTRIBUTING.md) for detailed contribution guidelines.
426
+
427
+
428
+ ## Acknowledgements
429
+
430
+ This project is built upon many excellent open-source projects, including:
431
+
432
+ + [verl](https://github.com/volcengine/verl) and [PyTorch's FSDP](https://pytorch.org/docs/stable/fsdp.html) for LLM training;
433
+ + [vLLM](https://github.com/vllm-project/vllm) for LLM inference;
434
+ + [Data-Juicer](https://github.com/modelscope/data-juicer?tab=readme-ov-file) for data processing pipelines;
435
+ + [AgentScope](https://github.com/agentscope-ai/agentscope) for agentic workflow;
436
+ + [Ray](https://github.com/ray-project/ray) for distributed systems;
437
+ + we have also drawn inspirations from RL frameworks like [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF), [TRL](https://github.com/huggingface/trl) and [ChatLearn](https://github.com/alibaba/ChatLearn);
438
+ + ......
439
+
440
+
441
+ ## Citation
442
+
443
+ ```bibtex
444
+ @misc{trinity-rft,
445
+ title={Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models},
446
+ author={Xuchen Pan and Yanxi Chen and Yushuo Chen and Yuchang Sun and Daoyuan Chen and Wenhao Zhang and Yuexiang Xie and Yilun Huang and Yilei Zhang and Dawei Gao and Yaliang Li and Bolin Ding and Jingren Zhou},
447
+ year={2025},
448
+ eprint={2505.17826},
449
+ archivePrefix={arXiv},
450
+ primaryClass={cs.LG},
451
+ url={https://arxiv.org/abs/2505.17826},
452
+ }
453
+ ```