trinity-rft 0.2.1.dev0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. trinity_rft-0.3.0/PKG-INFO +477 -0
  2. trinity_rft-0.3.0/README.md +406 -0
  3. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/pyproject.toml +15 -7
  4. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/__init__.py +1 -1
  5. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/__init__.py +0 -3
  6. trinity_rft-0.3.0/trinity/algorithm/advantage_fn/__init__.py +41 -0
  7. trinity_rft-0.3.0/trinity/algorithm/advantage_fn/advantage_fn.py +89 -0
  8. trinity_rft-0.3.0/trinity/algorithm/advantage_fn/asymre_advantage.py +122 -0
  9. trinity_rft-0.3.0/trinity/algorithm/advantage_fn/grpo_advantage.py +204 -0
  10. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/step_wise_add_strategy.py → trinity_rft-0.3.0/trinity/algorithm/advantage_fn/multi_step_grpo_advantage.py +24 -22
  11. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/advantage_fn/opmd_advantage.py +52 -3
  12. trinity_rft-0.3.0/trinity/algorithm/advantage_fn/reinforce_advantage.py +36 -0
  13. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/algorithm.py +115 -20
  14. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/__init__.py +6 -0
  15. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/chord_policy_loss.py +1 -0
  16. trinity_rft-0.3.0/trinity/algorithm/policy_loss_fn/cispo_policy_loss.py +88 -0
  17. trinity_rft-0.3.0/trinity/algorithm/policy_loss_fn/sppo_loss_fn.py +54 -0
  18. trinity_rft-0.3.0/trinity/algorithm/policy_loss_fn/topr_policy_loss.py +74 -0
  19. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/__init__.py +0 -2
  20. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/buffer.py +11 -32
  21. trinity_rft-0.3.0/trinity/buffer/buffer_reader.py +15 -0
  22. trinity_rft-0.3.0/trinity/buffer/operators/__init__.py +16 -0
  23. trinity_rft-0.3.0/trinity/buffer/operators/data_juicer_operator.py +50 -0
  24. trinity_rft-0.3.0/trinity/buffer/operators/experience_operator.py +50 -0
  25. trinity_rft-0.3.0/trinity/buffer/operators/filters/reward_filter.py +54 -0
  26. trinity_rft-0.3.0/trinity/buffer/operators/mappers/reward_shaping_mapper.py +112 -0
  27. trinity_rft-0.3.0/trinity/buffer/pipelines/__init__.py +11 -0
  28. trinity_rft-0.3.0/trinity/buffer/pipelines/experience_pipeline.py +143 -0
  29. trinity_rft-0.3.0/trinity/buffer/pipelines/task_pipeline.py +72 -0
  30. trinity_rft-0.3.0/trinity/buffer/reader/file_reader.py +159 -0
  31. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/reader/queue_reader.py +5 -16
  32. trinity_rft-0.3.0/trinity/buffer/reader/sql_reader.py +34 -0
  33. trinity_rft-0.3.0/trinity/buffer/schema/__init__.py +4 -0
  34. trinity_rft-0.3.0/trinity/buffer/schema/formatter.py +317 -0
  35. trinity_rft-0.3.0/trinity/buffer/schema/sql_schema.py +136 -0
  36. trinity_rft-0.3.0/trinity/buffer/storage/file.py +84 -0
  37. {trinity_rft-0.2.1.dev0/trinity/buffer → trinity_rft-0.3.0/trinity/buffer/storage}/queue.py +109 -2
  38. trinity_rft-0.3.0/trinity/buffer/storage/sql.py +282 -0
  39. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/utils.py +3 -2
  40. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/writer/file_writer.py +2 -2
  41. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/writer/queue_writer.py +2 -5
  42. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/writer/sql_writer.py +5 -5
  43. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/cli/launcher.py +73 -125
  44. trinity_rft-0.3.0/trinity/common/__init__.py +0 -0
  45. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/config.py +226 -171
  46. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/constants.py +7 -43
  47. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/experience.py +123 -38
  48. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/models/__init__.py +1 -1
  49. trinity_rft-0.3.0/trinity/common/models/mm_utils.py +78 -0
  50. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/models/model.py +82 -9
  51. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/models/utils.py +165 -10
  52. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/models/vllm_model.py +135 -25
  53. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/models/vllm_worker.py +24 -8
  54. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/accuracy_reward.py +2 -3
  55. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/countdown_reward.py +0 -3
  56. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/dapo_reward.py +0 -3
  57. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/format_reward.py +0 -3
  58. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/math_reward.py +0 -3
  59. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/reward_fn.py +0 -4
  60. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/verl_config.py +80 -8
  61. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/__init__.py +6 -0
  62. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/customized_math_workflows.py +3 -6
  63. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/customized_toolcall_workflows.py +4 -7
  64. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/agentscope/agentscope_react_workflow.py +10 -13
  65. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/alfworld/alfworld_workflow.py +1 -1
  66. trinity_rft-0.3.0/trinity/common/workflows/envs/email_searcher/prepare_data.py +279 -0
  67. trinity_rft-0.3.0/trinity/common/workflows/envs/email_searcher/react_agent.py +104 -0
  68. trinity_rft-0.3.0/trinity/common/workflows/envs/email_searcher/utils.py +333 -0
  69. trinity_rft-0.3.0/trinity/common/workflows/envs/email_searcher/workflow.py +224 -0
  70. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/eval_workflow.py +0 -3
  71. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/math_rm_workflow.py +2 -5
  72. trinity_rft-0.3.0/trinity/common/workflows/math_ruler_workflow.py +153 -0
  73. trinity_rft-0.3.0/trinity/common/workflows/simple_mm_workflow.py +76 -0
  74. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/workflow.py +14 -10
  75. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/explorer/explorer.py +75 -52
  76. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/explorer/scheduler.py +4 -3
  77. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/explorer/workflow_runner.py +16 -18
  78. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/__init__.py +2 -2
  79. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_manager.py +112 -37
  80. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/buffer_config_manager.py +28 -30
  81. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/model_config_manager.py +14 -4
  82. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/trainer_config_manager.py +134 -45
  83. trinity_rft-0.2.1.dev0/trinity/manager/manager.py → trinity_rft-0.3.0/trinity/manager/state_manager.py +23 -20
  84. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/synchronizer.py +20 -7
  85. trinity_rft-0.3.0/trinity/service/__init__.py +3 -0
  86. trinity_rft-0.3.0/trinity/service/data_juicer/__init__.py +0 -0
  87. trinity_rft-0.3.0/trinity/service/data_juicer/client.py +150 -0
  88. trinity_rft-0.3.0/trinity/service/data_juicer/server/__init__.py +0 -0
  89. trinity_rft-0.3.0/trinity/service/data_juicer/server/server.py +144 -0
  90. trinity_rft-0.3.0/trinity/service/data_juicer/server/session.py +84 -0
  91. trinity_rft-0.3.0/trinity/service/data_juicer/server/utils.py +194 -0
  92. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/trainer.py +25 -3
  93. trinity_rft-0.3.0/trinity/trainer/verl/__init__.py +0 -0
  94. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/verl/dp_actor.py +27 -87
  95. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/verl/fsdp_checkpoint_manager.py +18 -0
  96. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/verl/fsdp_workers.py +62 -31
  97. trinity_rft-0.3.0/trinity/trainer/verl/megatron_actor.py +444 -0
  98. trinity_rft-0.3.0/trinity/trainer/verl/megatron_checkpoint_manager.py +289 -0
  99. trinity_rft-0.3.0/trinity/trainer/verl/megatron_workers.py +991 -0
  100. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/verl/utils.py +13 -1
  101. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/verl_trainer.py +52 -7
  102. trinity_rft-0.3.0/trinity/utils/__init__.py +0 -0
  103. trinity_rft-0.3.0/trinity/utils/annotations.py +19 -0
  104. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/distributed.py +18 -0
  105. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/eval_utils.py +12 -0
  106. trinity_rft-0.3.0/trinity/utils/log.py +100 -0
  107. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/math_eval_utils.py +1 -0
  108. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/monitor.py +8 -7
  109. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/plugin_loader.py +2 -5
  110. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/registry.py +0 -10
  111. trinity_rft-0.3.0/trinity_rft.egg-info/PKG-INFO +477 -0
  112. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity_rft.egg-info/SOURCES.txt +40 -20
  113. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity_rft.egg-info/requires.txt +10 -5
  114. trinity_rft-0.2.1.dev0/PKG-INFO +0 -497
  115. trinity_rft-0.2.1.dev0/README.md +0 -430
  116. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/__init__.py +0 -25
  117. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/add_strategy.py +0 -230
  118. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/correct_bias_add_strategy.py +0 -54
  119. trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/duplicate_add_strategy.py +0 -72
  120. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/__init__.py +0 -20
  121. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/advantage_fn.py +0 -29
  122. trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/grpo_advantage.py +0 -83
  123. trinity_rft-0.2.1.dev0/trinity/buffer/buffer_reader.py +0 -21
  124. trinity_rft-0.2.1.dev0/trinity/buffer/ray_wrapper.py +0 -308
  125. trinity_rft-0.2.1.dev0/trinity/buffer/reader/file_reader.py +0 -359
  126. trinity_rft-0.2.1.dev0/trinity/buffer/reader/sql_reader.py +0 -35
  127. trinity_rft-0.2.1.dev0/trinity/buffer/schema/__init__.py +0 -3
  128. trinity_rft-0.2.1.dev0/trinity/buffer/schema/sql_schema.py +0 -142
  129. trinity_rft-0.2.1.dev0/trinity/data/controllers/active_iterator.py +0 -394
  130. trinity_rft-0.2.1.dev0/trinity/data/controllers/default_ops.py +0 -77
  131. trinity_rft-0.2.1.dev0/trinity/data/controllers/task_parser.py +0 -282
  132. trinity_rft-0.2.1.dev0/trinity/data/core/comparator.py +0 -84
  133. trinity_rft-0.2.1.dev0/trinity/data/core/dataset.py +0 -168
  134. trinity_rft-0.2.1.dev0/trinity/data/core/formatter.py +0 -151
  135. trinity_rft-0.2.1.dev0/trinity/data/processors/base.py +0 -143
  136. trinity_rft-0.2.1.dev0/trinity/data/processors/cleaner.py +0 -231
  137. trinity_rft-0.2.1.dev0/trinity/data/processors/human_annotator.py +0 -47
  138. trinity_rft-0.2.1.dev0/trinity/data/processors/synthesizer.py +0 -107
  139. trinity_rft-0.2.1.dev0/trinity/data/server.py +0 -81
  140. trinity_rft-0.2.1.dev0/trinity/data/utils.py +0 -72
  141. trinity_rft-0.2.1.dev0/trinity/utils/log.py +0 -65
  142. trinity_rft-0.2.1.dev0/trinity_rft.egg-info/PKG-INFO +0 -497
  143. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/LICENSE +0 -0
  144. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/setup.cfg +0 -0
  145. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/setup.py +0 -0
  146. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/advantage_fn/ppo_advantage.py +0 -0
  147. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/advantage_fn/reinforce_plus_plus_advantage.py +0 -0
  148. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/advantage_fn/remax_advantage.py +0 -0
  149. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/advantage_fn/rloo_advantage.py +0 -0
  150. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/algorithm_manager.py +0 -0
  151. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/entropy_loss_fn/__init__.py +0 -0
  152. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/entropy_loss_fn/entropy_loss_fn.py +0 -0
  153. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/key_mapper.py +0 -0
  154. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/kl_fn/__init__.py +0 -0
  155. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/kl_fn/kl_fn.py +0 -0
  156. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/dpo_loss.py +0 -0
  157. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py +0 -0
  158. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/mix_policy_loss.py +0 -0
  159. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/opmd_policy_loss.py +0 -0
  160. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/policy_loss_fn.py +0 -0
  161. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/ppo_policy_loss.py +0 -0
  162. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/policy_loss_fn/sft_loss.py +0 -0
  163. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/sample_strategy/__init__.py +0 -0
  164. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/sample_strategy/mix_sample_strategy.py +0 -0
  165. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/sample_strategy/sample_strategy.py +0 -0
  166. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/sample_strategy/utils.py +0 -0
  167. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/algorithm/utils.py +0 -0
  168. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/buffer/buffer_writer.py +0 -0
  169. {trinity_rft-0.2.1.dev0/trinity/buffer/reader → trinity_rft-0.3.0/trinity/buffer/operators/filters}/__init__.py +0 -0
  170. {trinity_rft-0.2.1.dev0/trinity/buffer/writer → trinity_rft-0.3.0/trinity/buffer/operators/mappers}/__init__.py +0 -0
  171. {trinity_rft-0.2.1.dev0/trinity/common → trinity_rft-0.3.0/trinity/buffer/reader}/__init__.py +0 -0
  172. {trinity_rft-0.2.1.dev0/trinity/trainer/verl → trinity_rft-0.3.0/trinity/buffer/storage}/__init__.py +0 -0
  173. {trinity_rft-0.2.1.dev0/trinity/utils → trinity_rft-0.3.0/trinity/buffer/writer}/__init__.py +0 -0
  174. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/cli/client.py +0 -0
  175. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/cli/server.py +0 -0
  176. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/models/api/vllm_patch.py +0 -0
  177. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/__init__.py +0 -0
  178. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/agents_reward.py +0 -0
  179. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/human_reward.py +0 -0
  180. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/tool_reward.py +0 -0
  181. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/rewards/utils.py +0 -0
  182. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/alfworld/RAFT_alfworld_workflow.py +0 -0
  183. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/alfworld/RAFT_reflect_alfworld_workflow.py +0 -0
  184. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/alfworld/RAFT_utils.py +0 -0
  185. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/sciworld/sciworld_workflow.py +0 -0
  186. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/envs/webshop/webshop_workflow.py +0 -0
  187. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/common/workflows/step_wise_workflow.py +0 -0
  188. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/explorer/__init__.py +0 -0
  189. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/__init__.py +0 -0
  190. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/algorithm_config_manager.py +0 -0
  191. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/config_registry.py +0 -0
  192. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/manager/config_registry/explorer_config_manager.py +0 -0
  193. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/plugins/__init__.py +0 -0
  194. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/trainer/__init__.py +0 -0
  195. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/dlc_utils.py +0 -0
  196. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity/utils/timer.py +0 -0
  197. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity_rft.egg-info/dependency_links.txt +0 -0
  198. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity_rft.egg-info/entry_points.txt +0 -0
  199. {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.0}/trinity_rft.egg-info/top_level.txt +0 -0
@@ -0,0 +1,477 @@
1
+ Metadata-Version: 2.4
2
+ Name: trinity-rft
3
+ Version: 0.3.0
4
+ Summary: Trinity-RFT: A Framework for Training Large Language Models with Reinforcement Fine-Tuning
5
+ Author-email: Trinity-RFT Team <trinity-rft@outlook.com>
6
+ Project-URL: Homepage, https://github.com/modelscope/Trinity-RFT
7
+ Project-URL: Documentation, https://modelscope.github.io/Trinity-RFT/
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: <3.13,>=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: verl==0.5.0
19
+ Requires-Dist: ray[default]>=2.45.0
20
+ Requires-Dist: vllm<=0.10.0,>=0.9.1
21
+ Requires-Dist: tensordict
22
+ Requires-Dist: wandb
23
+ Requires-Dist: omegaconf
24
+ Requires-Dist: sqlalchemy
25
+ Requires-Dist: psycopg2-binary
26
+ Requires-Dist: networkx
27
+ Requires-Dist: latex2sympy2_extended
28
+ Requires-Dist: math_verify
29
+ Requires-Dist: ninja
30
+ Requires-Dist: fire
31
+ Requires-Dist: streamlit
32
+ Requires-Dist: flask
33
+ Requires-Dist: requests
34
+ Requires-Dist: tensorboard
35
+ Requires-Dist: openai
36
+ Requires-Dist: jsonlines
37
+ Requires-Dist: sortedcontainers
38
+ Requires-Dist: word2number
39
+ Requires-Dist: transformers<4.54.0
40
+ Provides-Extra: data
41
+ Requires-Dist: py-data-juicer>=1.4; extra == "data"
42
+ Provides-Extra: agent
43
+ Requires-Dist: agentscope; extra == "agent"
44
+ Provides-Extra: rm-gallery
45
+ Requires-Dist: rm-gallery>=0.1.1; extra == "rm-gallery"
46
+ Provides-Extra: dev
47
+ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
48
+ Requires-Dist: black>=23.7.0; extra == "dev"
49
+ Requires-Dist: flake8>=6.1.0; extra == "dev"
50
+ Requires-Dist: flake8-docstrings>=1.6.0; extra == "dev"
51
+ Requires-Dist: isort>=5.12.0; extra == "dev"
52
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
53
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
54
+ Requires-Dist: pytest-json-ctrf; extra == "dev"
55
+ Requires-Dist: parameterized; extra == "dev"
56
+ Requires-Dist: matplotlib; extra == "dev"
57
+ Provides-Extra: megatron
58
+ Requires-Dist: megatron-core[mlm]==0.13.1; extra == "megatron"
59
+ Requires-Dist: transformer_engine[pytorch]==2.6.0.post1; extra == "megatron"
60
+ Requires-Dist: mbridge>=0.13.0; extra == "megatron"
61
+ Provides-Extra: doc
62
+ Requires-Dist: sphinx; extra == "doc"
63
+ Requires-Dist: sphinx-autobuild; extra == "doc"
64
+ Requires-Dist: sphinx-book-theme; extra == "doc"
65
+ Requires-Dist: myst-parser; extra == "doc"
66
+ Requires-Dist: sphinxcontrib-apidoc; extra == "doc"
67
+ Requires-Dist: sphinx-multiversion; extra == "doc"
68
+ Provides-Extra: flash-attn
69
+ Requires-Dist: flash-attn==2.8.1; extra == "flash-attn"
70
+ Dynamic: license-file
71
+
72
+ [**中文主页**](https://github.com/modelscope/Trinity-RFT/blob/main/README_zh.md) | [**Tutorial**](https://modelscope.github.io/Trinity-RFT/) | [**FAQ**](./docs/sphinx_doc/source/tutorial/faq.md)
73
+
74
+ <div align="center">
75
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01lvLpfw25Pl4ohGZnU_!!6000000007519-2-tps-1628-490.png" alt="Trinity-RFT" style="height: 120px;">
76
+ </div>
77
+
78
+
79
+
80
+ <h2 align="center">Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models</h2>
81
+
82
+
83
+ <div align="center">
84
+
85
+ [![paper](http://img.shields.io/badge/cs.LG-2505.17826-B31B1B?logo=arxiv&logoColor=red)](https://arxiv.org/abs/2505.17826)
86
+ [![doc](https://img.shields.io/badge/Docs-blue?logo=markdown)](https://modelscope.github.io/Trinity-RFT/)
87
+ [![pypi](https://img.shields.io/pypi/v/trinity-rft?logo=pypi&color=026cad)](https://pypi.org/project/trinity-rft/)
88
+ ![license](https://img.shields.io/badge/license-Apache--2.0-000000.svg)
89
+
90
+ </div>
91
+
92
+
93
+ ## 🚀 News
94
+
95
+ * [2025-09] ✨ [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.0)] Trinity-RFT v0.3.0 released: enhanced Buffer, FSDP2 & Megatron support, multi-modal models, and new RL algorithms/examples.
96
+ * [2025-08] 🎵 Introducing [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord): dynamic SFT + RL integration for advanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
97
+ * [2025-08] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.2.1)] Trinity-RFT v0.2.1 released.
98
+ * [2025-07] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.2.0)] Trinity-RFT v0.2.0 released.
99
+ * [2025-07] Technical report (arXiv v2) updated with new features, examples, and experiments: [link](https://arxiv.org/abs/2505.17826).
100
+ * [2025-06] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.1.1)] Trinity-RFT v0.1.1 released.
101
+ * [2025-05] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.1.0)] Trinity-RFT v0.1.0 released, plus [technical report](https://arxiv.org/abs/2505.17826).
102
+ * [2025-04] Trinity-RFT open sourced.
103
+
104
+
105
+ ## 💡 What is Trinity-RFT?
106
+
107
+ Trinity-RFT is a flexible, general-purpose framework for reinforcement fine-tuning (RFT) of large language models (LLMs). It supports a wide range of applications and provides a unified platform for RL research in the [era of experience](https://storage.googleapis.com/deepmind-media/Era-of-Experience%20/The%20Era%20of%20Experience%20Paper.pdf).
108
+
109
+ The RFT process is modularized into three core components:
110
+
111
+ * **Explorer**: Handles agent-environment interaction
112
+ * **Trainer**: Manages model training
113
+ * **Buffer**: Manages data storage and processing
114
+
115
+
116
+ <img src="https://img.alicdn.com/imgextra/i2/O1CN01H3UbpF1yP7E1OCLbi_!!6000000006570-2-tps-1334-638.png" alt="The high-level design of Trinity-RFT" width="800" />
117
+
118
+
119
+
120
+ ## ✨ Key Features
121
+
122
+ * **Flexible RFT Modes:**
123
+ - Supports synchronous/asynchronous, on-policy/off-policy, and online/offline training. Rollout and training can run separately and scale independently across devices.
124
+
125
+ <img src="https://img.alicdn.com/imgextra/i3/O1CN01E7NskS1FFoTI9jlaQ_!!6000000000458-2-tps-1458-682.png" alt="RFT modes supported by Trinity-RFT" width="600" />
126
+
127
+ * **Agent Framework Compatible Workflows:**
128
+ - Supports both concatenated and general multi-turn agentic workflows. Automatically collects training data from model API clients (e.g., OpenAI) and is compatible with agent frameworks like AgentScope.
129
+
130
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01z1i7kk1jlMEVa8ZHV_!!6000000004588-2-tps-1262-695.png" alt="Agentic workflows" width="600" />
131
+
132
+ * **Powerful Data Pipelines:**
133
+ - Enables pipeline processing of rollout and experience data, supporting active management (prioritization, cleaning, augmentation) throughout the RFT lifecycle.
134
+
135
+ <img src="https://img.alicdn.com/imgextra/i2/O1CN01BfeHp61sXSlGjH7zQ_!!6000000005776-2-tps-1734-473.png" alt="Data pipeline design" width="600" />
136
+
137
+ * **User-Friendly Design:**
138
+ - Modular, decoupled architecture for easy adoption and development. Rich graphical user interfaces enable low-code usage.
139
+
140
+ <img src="https://img.alicdn.com/imgextra/i1/O1CN01Ti0o4320RywoAuyhN_!!6000000006847-2-tps-3840-2134.png" alt="System architecture" width="600" />
141
+
142
+
143
+
144
+
145
+ ## 🛠️ What can I use Trinity-RFT for?
146
+
147
+ * **Train agent applications with RL and minimal migration cost** [[Tutorial]](https://modelscope.github.io/Trinity-RFT/main/tutorial/trinity_programming_guide.html#workflows-for-rl-environment-developers)
148
+ - Implement agent-environment interaction logic in a single workflow class ([example1](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_multi_turn.html), [example2](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_step_wise.html)),
149
+ - Or import workflows from agent frameworks like AgentScope ([example](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_react.html)).
150
+
151
+ * **Rapid RL algorithm design and validation** [[Tutorial]](https://modelscope.github.io/Trinity-RFT/main/tutorial/trinity_programming_guide.html#algorithms-for-rl-algorithm-developers)
152
+ - Develop custom RL algorithms (loss design, sampling strategy, etc.) in compact, plug-and-play classes ([example](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_mix_algo.html)).
153
+
154
+ * **Custom datasets and data pipelines for RFT** [[Tutorial]](https://modelscope.github.io/Trinity-RFT/main/tutorial/trinity_programming_guide.html#operators-for-data-developers)
155
+ - Design task-specific datasets and build data pipelines for cleaning, augmentation, and human-in-the-loop scenarios ([example](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_data_functionalities.html)).
156
+
157
+ ---
158
+
159
+ ## Table of contents
160
+
161
+
162
+ - [Getting started](#getting-started)
163
+ - [Step 1: installation](#step-1-installation)
164
+ - [Step 2: prepare dataset and model](#step-2-prepare-dataset-and-model)
165
+ - [Step 3: configurations](#step-3-configurations)
166
+ - [Step 4: run the RFT process](#step-4-run-the-rft-process)
167
+ - [Further tutorials](#further-tutorials)
168
+ - [Upcoming features](#upcoming-features)
169
+ - [Contribution guide](#contribution-guide)
170
+ - [Acknowledgements](#acknowledgements)
171
+ - [Citation](#citation)
172
+
173
+
174
+
175
+ ## Getting started
176
+
177
+
178
+ > [!NOTE]
179
+ > This project is currently under active development. Comments and suggestions are welcome!
180
+
181
+
182
+ ### Step 1: installation
183
+
184
+ #### Prerequisites
185
+
186
+ Before installing, make sure your system meets the following requirements:
187
+
188
+ - **Python**: version 3.10 to 3.12 (inclusive)
189
+ - **CUDA**: version 12.4 to 12.8 (inclusive)
190
+ - **GPUs**: at least 2 GPUs
191
+
192
+
193
+ #### Option A: Install from Source (Recommended)
194
+
195
+ This method gives you full control and is best if you plan to customize or contribute to the project.
196
+
197
+ ##### 1. Clone the Repository
198
+
199
+ ```bash
200
+ git clone https://github.com/modelscope/Trinity-RFT
201
+ cd Trinity-RFT
202
+ ```
203
+
204
+ ##### 2. Set Up a Virtual Environment
205
+
206
+ Choose one of the following options to create an isolated environment:
207
+
208
+ ###### Using Conda
209
+ ```bash
210
+ conda create -n trinity python=3.10
211
+ conda activate trinity
212
+ ```
213
+
214
+ ###### Using venv
215
+ ```bash
216
+ python3.10 -m venv .venv
217
+ source .venv/bin/activate
218
+ ```
219
+
220
+ ##### 3. Install the Package
221
+
222
+ Install in editable mode so you can make changes without reinstalling:
223
+
224
+ ```bash
225
+ pip install -e ".[dev]"
226
+ ```
227
+
228
+ ##### 4. Install Flash Attention
229
+
230
+ Flash Attention boosts training speed. It takes a few minutes to compile — please be patient!
231
+
232
+ ```bash
233
+ pip install flash-attn==2.8.1
234
+ ```
235
+
236
+ If you encounter issues during installation, try this alternative:
237
+
238
+ ```bash
239
+ pip install flash-attn==2.8.1 --no-build-isolation
240
+ ```
241
+
242
+
243
+ ##### ⚡ Fast Alternative: Use `uv` (Optional)
244
+
245
+ If you'd like a faster installation, try [`uv`](https://github.com/astral-sh/uv), a modern Python package installer:
246
+
247
+ ```bash
248
+ uv venv
249
+ source .venv/bin/activate
250
+
251
+ uv pip install -e ".[dev]"
252
+ uv pip install flash-attn==2.8.1 --no-build-isolation
253
+ ```
254
+
255
+ #### Option B: Install via pip (Quick Start)
256
+
257
+ If you just want to use the package without modifying the code:
258
+
259
+ ```bash
260
+ pip install trinity-rft==0.3.0
261
+ pip install flash-attn==2.8.1 # Install Flash Attention separately
262
+
263
+ # Use uv to install trinity-rft
264
+ # uv pip install trinity-rft==0.3.0
265
+ # uv pip install flash-attn==2.8.1
266
+ ```
267
+
268
+ #### Option C: Use Docker
269
+
270
+ We provide a Docker setup for hassle-free environment configuration.
271
+
272
+ ```bash
273
+ git clone https://github.com/modelscope/Trinity-RFT
274
+ cd Trinity-RFT
275
+
276
+ ## Build the Docker image
277
+ ## Tip: You can modify the Dockerfile to add mirrors or set API keys
278
+ docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
279
+
280
+ ## Run the container
281
+ docker run -it \
282
+ --gpus all \
283
+ --shm-size="64g" \
284
+ --rm \
285
+ -v $PWD:/workspace \
286
+ -v <path_to_your_data_and_checkpoints>:/data \
287
+ trinity-rft:latest
288
+ ```
289
+
290
+ 💡 **Note**: Replace `<path_to_your_data_and_checkpoints>` with the actual path on your machine where datasets and model checkpoints are stored.
291
+
292
+ > If you'd like to integrate with **Megatron-LM**, check out our [example setup guide for Megatron](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_megatron.html).
293
+
294
+ ### Step 2: prepare dataset and model
295
+
296
+
297
+ Trinity-RFT supports most datasets and models from Huggingface and ModelScope.
298
+
299
+
300
+ **Prepare the model** in the local directory `$MODEL_PATH/{model_name}`:
301
+
302
+ ```bash
303
+ # Using Huggingface
304
+ huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name}
305
+
306
+ # Using Modelscope
307
+ modelscope download {model_name} --local_dir $MODEL_PATH/{model_name}
308
+ ```
309
+
310
+ For more details about model downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli) or [ModelScope](https://modelscope.cn/docs/models/download).
311
+
312
+
313
+
314
+ **Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`:
315
+
316
+ ```bash
317
+ # Using Huggingface
318
+ huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name}
319
+
320
+ # Using Modelscope
321
+ modelscope download --dataset {dataset_name} --local_dir $DATASET_PATH/{dataset_name}
322
+ ```
323
+
324
+ For more details about dataset downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli#download-a-dataset-or-a-space) or [ModelScope](https://modelscope.cn/docs/datasets/download).
325
+
326
+
327
+
328
+ ### Step 3: configurations
329
+
330
+
331
+ Trinity-RFT provides a web interface for configuring your RFT process.
332
+
333
+ > [!NOTE]
334
+ > This is an experimental feature, and we will continue to improve it.
335
+
336
+
337
+ To launch the web interface for minimal configurations, you can run
338
+
339
+ ```bash
340
+ trinity studio --port 8080
341
+ ```
342
+
343
+ Then you can configure your RFT process in the web page and generate a config file. You can save the config file for later use or run it directly as described in the following section.
344
+
345
+ Advanced users can also edit the config file directly.
346
+ We provide example config files in [`examples`](examples/).
347
+
348
+ For complete GUI features, please refer to the monorepo for [Trinity-Studio](https://github.com/modelscope/Trinity-Studio).
349
+
350
+
351
+ <details>
352
+
353
+ <summary> Example: config manager GUI </summary>
354
+
355
+ ![config-manager](https://img.alicdn.com/imgextra/i1/O1CN01yhYrV01lGKchtywSH_!!6000000004791-2-tps-1480-844.png)
356
+
357
+
358
+ </details>
359
+
360
+
361
+
362
+
363
+ ### Step 4: run the RFT process
364
+
365
+
366
+ Start a ray cluster:
367
+
368
+ ```shell
369
+ # On master node
370
+ ray start --head
371
+
372
+ # On worker nodes
373
+ ray start --address=<master_address>
374
+ ```
375
+
376
+ (Optional) Log in to [wandb](https://docs.wandb.ai/quickstart/) for better monitoring:
377
+
378
+ ```shell
379
+ export WANDB_API_KEY=<your_api_key>
380
+ wandb login
381
+ ```
382
+
383
+ For command-line users, run the RFT process:
384
+
385
+ ```shell
386
+ trinity run --config <config_path>
387
+ ```
388
+
389
+ For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k with GRPO:
390
+
391
+ ```shell
392
+ trinity run --config examples/grpo_gsm8k/gsm8k.yaml
393
+ ```
394
+
395
+ For studio users, click "Run" in the web interface.
396
+
397
+
398
+ ## Further tutorials
399
+
400
+ > [!NOTE]
401
+ > For more tutorials, please refer to the [Trinity-RFT Documentation](https://modelscope.github.io/Trinity-RFT/).
402
+
403
+
404
+ Tutorials for running different RFT modes:
405
+
406
+ + [Quick example: GRPO on GSM8k](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_reasoning_basic.html)
407
+ + [Off-policy RFT](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_reasoning_advanced.html)
408
+ + [Fully asynchronous RFT](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_async_mode.html)
409
+ + [Offline learning by DPO or SFT](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_dpo.html)
410
+
411
+
412
+ Tutorials for adapting Trinity-RFT to multi-step agentic scenarios:
413
+
414
+ + [Concatenated multi-turn workflow](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_multi_turn.html)
415
+ + [General multi-step workflow](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_step_wise.html)
416
+ + [ReAct workflow with an agent framework](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_react.html)
417
+
418
+
419
+ Tutorials for data-related functionalities:
420
+
421
+ + [Advanced data processing & human-in-the-loop](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_data_functionalities.html)
422
+
423
+
424
+ Tutorials for RL algorithm development/research with Trinity-RFT:
425
+
426
+ + [RL algorithm development with Trinity-RFT](https://modelscope.github.io/Trinity-RFT/main/tutorial/example_mix_algo.html)
427
+
428
+
429
+ Guidelines for full configurations:
430
+
431
+ + See [this document](https://modelscope.github.io/Trinity-RFT/main/tutorial/trinity_configs.html)
432
+
433
+
434
+ Guidelines for developers and researchers:
435
+
436
+ + [Benchmark Toolkit for quick verification and experimentation](./benchmark/README.md)
437
+ + [Understand the coordination between explorer and trainer](https://modelscope.github.io/Trinity-RFT/main/tutorial/synchronizer.html)
438
+
439
+
440
+ ## Upcoming features
441
+
442
+ A tentative roadmap: [#51](https://github.com/modelscope/Trinity-RFT/issues/51)
443
+
444
+
445
+ ## Contribution guide
446
+
447
+ This project is currently under active development, and we welcome contributions from the community!
448
+
449
+ See [CONTRIBUTING.md](./CONTRIBUTING.md) for detailed contribution guidelines.
450
+
451
+
452
+ ## Acknowledgements
453
+
454
+ This project is built upon many excellent open-source projects, including:
455
+
456
+ + [verl](https://github.com/volcengine/verl) and [PyTorch's FSDP](https://pytorch.org/docs/stable/fsdp.html) for LLM training;
457
+ + [vLLM](https://github.com/vllm-project/vllm) for LLM inference;
458
+ + [Data-Juicer](https://github.com/modelscope/data-juicer?tab=readme-ov-file) for data processing pipelines;
459
+ + [AgentScope](https://github.com/modelscope/agentscope) for agentic workflow;
460
+ + [Ray](https://github.com/ray-project/ray) for distributed systems;
461
+ + we have also drawn inspirations from RL frameworks like [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF), [TRL](https://github.com/huggingface/trl) and [ChatLearn](https://github.com/alibaba/ChatLearn);
462
+ + ......
463
+
464
+
465
+ ## Citation
466
+
467
+ ```bibtex
468
+ @misc{trinity-rft,
469
+ title={Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models},
470
+ author={Xuchen Pan and Yanxi Chen and Yushuo Chen and Yuchang Sun and Daoyuan Chen and Wenhao Zhang and Yuexiang Xie and Yilun Huang and Yilei Zhang and Dawei Gao and Yaliang Li and Bolin Ding and Jingren Zhou},
471
+ year={2025},
472
+ eprint={2505.17826},
473
+ archivePrefix={arXiv},
474
+ primaryClass={cs.LG},
475
+ url={https://arxiv.org/abs/2505.17826},
476
+ }
477
+ ```