trinity-rft 0.2.1.dev0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trinity_rft-0.3.1/PKG-INFO +453 -0
- trinity_rft-0.3.1/README.md +380 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/pyproject.toml +23 -13
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/__init__.py +1 -1
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/__init__.py +0 -3
- trinity_rft-0.3.1/trinity/algorithm/advantage_fn/__init__.py +43 -0
- trinity_rft-0.3.1/trinity/algorithm/advantage_fn/advantage_fn.py +89 -0
- trinity_rft-0.3.1/trinity/algorithm/advantage_fn/asymre_advantage.py +122 -0
- trinity_rft-0.3.1/trinity/algorithm/advantage_fn/grpo_advantage.py +236 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/step_wise_add_strategy.py → trinity_rft-0.3.1/trinity/algorithm/advantage_fn/multi_step_grpo_advantage.py +68 -25
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/opmd_advantage.py +52 -3
- trinity_rft-0.3.1/trinity/algorithm/advantage_fn/rec_advantage.py +100 -0
- trinity_rft-0.3.1/trinity/algorithm/advantage_fn/reinforce_advantage.py +36 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/algorithm.py +165 -24
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/__init__.py +8 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/chord_policy_loss.py +19 -18
- trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/cispo_policy_loss.py +91 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py +9 -5
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/mix_policy_loss.py +4 -2
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/opmd_policy_loss.py +7 -4
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/ppo_policy_loss.py +7 -2
- trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/rec_policy_loss.py +132 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/sft_loss.py +6 -8
- trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/sppo_loss_fn.py +57 -0
- trinity_rft-0.3.1/trinity/algorithm/policy_loss_fn/topr_policy_loss.py +77 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/mix_sample_strategy.py +18 -3
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/sample_strategy.py +14 -38
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/utils.py +43 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/__init__.py +0 -2
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/buffer.py +11 -32
- trinity_rft-0.3.1/trinity/buffer/buffer_reader.py +15 -0
- trinity_rft-0.3.1/trinity/buffer/operators/__init__.py +16 -0
- trinity_rft-0.3.1/trinity/buffer/operators/data_juicer_operator.py +50 -0
- trinity_rft-0.3.1/trinity/buffer/operators/experience_operator.py +50 -0
- trinity_rft-0.3.1/trinity/buffer/operators/filters/reward_filter.py +54 -0
- trinity_rft-0.3.1/trinity/buffer/operators/mappers/reward_shaping_mapper.py +112 -0
- trinity_rft-0.3.1/trinity/buffer/pipelines/__init__.py +11 -0
- trinity_rft-0.3.1/trinity/buffer/pipelines/experience_pipeline.py +144 -0
- trinity_rft-0.3.1/trinity/buffer/pipelines/task_pipeline.py +72 -0
- trinity_rft-0.3.1/trinity/buffer/reader/file_reader.py +157 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/reader/queue_reader.py +5 -16
- trinity_rft-0.3.1/trinity/buffer/reader/sql_reader.py +34 -0
- trinity_rft-0.3.1/trinity/buffer/schema/__init__.py +4 -0
- trinity_rft-0.3.1/trinity/buffer/schema/formatter.py +395 -0
- trinity_rft-0.3.1/trinity/buffer/schema/sql_schema.py +136 -0
- trinity_rft-0.3.1/trinity/buffer/storage/file.py +84 -0
- {trinity_rft-0.2.1.dev0/trinity/buffer → trinity_rft-0.3.1/trinity/buffer/storage}/queue.py +162 -9
- trinity_rft-0.3.1/trinity/buffer/storage/sql.py +281 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/utils.py +3 -2
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/writer/file_writer.py +2 -2
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/writer/queue_writer.py +2 -5
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/writer/sql_writer.py +5 -5
- trinity_rft-0.3.1/trinity/cli/launcher.py +329 -0
- trinity_rft-0.3.1/trinity/common/__init__.py +0 -0
- trinity_rft-0.3.1/trinity/common/config.py +1147 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/constants.py +21 -44
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/experience.py +125 -40
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/__init__.py +57 -4
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/api/vllm_patch.py +3 -2
- trinity_rft-0.3.1/trinity/common/models/mm_utils.py +73 -0
- trinity_rft-0.3.1/trinity/common/models/model.py +374 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/utils.py +186 -14
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/vllm_model.py +220 -71
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/models/vllm_worker.py +24 -8
- trinity_rft-0.3.1/trinity/common/rewards/__init__.py +25 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/accuracy_reward.py +9 -9
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/countdown_reward.py +0 -3
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/dapo_reward.py +0 -3
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/format_reward.py +0 -3
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/math_reward.py +0 -3
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/reward_fn.py +0 -4
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/verl_config.py +162 -19
- trinity_rft-0.3.1/trinity/common/workflows/__init__.py +97 -0
- trinity_rft-0.3.1/trinity/common/workflows/agentscope/__init__.py +1 -0
- trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/__init__.py +0 -0
- trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/react_agent.py +63 -0
- trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/react_workflow.py +107 -0
- trinity_rft-0.3.1/trinity/common/workflows/agentscope/react/templates.py +59 -0
- trinity_rft-0.3.1/trinity/common/workflows/agentscope_workflow.py +83 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/customized_math_workflows.py +48 -6
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/customized_toolcall_workflows.py +10 -9
- trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/agentscope/agentscope_react_workflow.py → trinity_rft-0.3.1/trinity/common/workflows/envs/agentscope/agentscopev0_react_workflow.py +21 -21
- trinity_rft-0.3.1/trinity/common/workflows/envs/agentscope/agentscopev1_react_workflow.py +172 -0
- trinity_rft-0.3.1/trinity/common/workflows/envs/agentscope/agentscopev1_search_workflow.py +245 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/RAFT_alfworld_workflow.py +17 -9
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/RAFT_reflect_alfworld_workflow.py +22 -17
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/alfworld_workflow.py +13 -9
- trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/prepare_data.py +279 -0
- trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/react_agent.py +127 -0
- trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/utils.py +333 -0
- trinity_rft-0.3.1/trinity/common/workflows/envs/email_searcher/workflow.py +191 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/sciworld/sciworld_workflow.py +12 -8
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/webshop/webshop_workflow.py +10 -8
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/eval_workflow.py +27 -3
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/math_rm_workflow.py +33 -5
- trinity_rft-0.3.1/trinity/common/workflows/math_ruler_workflow.py +204 -0
- trinity_rft-0.3.1/trinity/common/workflows/math_trainable_ruler_workflow.py +219 -0
- trinity_rft-0.3.1/trinity/common/workflows/rubric_judge_workflow.py +172 -0
- trinity_rft-0.3.1/trinity/common/workflows/simple_mm_workflow.py +112 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/step_wise_workflow.py +104 -12
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/workflow.py +62 -19
- trinity_rft-0.3.1/trinity/explorer/api/__init__.py +0 -0
- trinity_rft-0.3.1/trinity/explorer/api/api.py +65 -0
- trinity_rft-0.3.1/trinity/explorer/api/service.py +160 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/explorer.py +140 -72
- trinity_rft-0.3.1/trinity/explorer/explorer_client.py +49 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/scheduler.py +15 -10
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/workflow_runner.py +77 -29
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/__init__.py +2 -2
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_manager.py +115 -67
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/algorithm_config_manager.py +11 -4
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/buffer_config_manager.py +28 -93
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/model_config_manager.py +14 -4
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/trainer_config_manager.py +134 -45
- trinity_rft-0.3.1/trinity/manager/state_manager.py +159 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/synchronizer.py +63 -8
- trinity_rft-0.3.1/trinity/service/__init__.py +3 -0
- trinity_rft-0.3.1/trinity/service/data_juicer/__init__.py +0 -0
- trinity_rft-0.3.1/trinity/service/data_juicer/client.py +150 -0
- trinity_rft-0.3.1/trinity/service/data_juicer/server/__init__.py +0 -0
- trinity_rft-0.3.1/trinity/service/data_juicer/server/server.py +144 -0
- trinity_rft-0.3.1/trinity/service/data_juicer/server/session.py +125 -0
- trinity_rft-0.3.1/trinity/service/data_juicer/server/utils.py +196 -0
- trinity_rft-0.3.1/trinity/trainer/trainer.py +261 -0
- trinity_rft-0.3.1/trinity/trainer/verl/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/dp_actor.py +27 -87
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/fsdp_checkpoint_manager.py +153 -107
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/fsdp_workers.py +117 -63
- trinity_rft-0.3.1/trinity/trainer/verl/megatron_actor.py +444 -0
- trinity_rft-0.3.1/trinity/trainer/verl/megatron_checkpoint_manager.py +281 -0
- trinity_rft-0.3.1/trinity/trainer/verl/megatron_workers.py +1010 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl/utils.py +28 -1
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/verl_trainer.py +294 -107
- trinity_rft-0.3.1/trinity/utils/__init__.py +0 -0
- trinity_rft-0.3.1/trinity/utils/annotations.py +19 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/distributed.py +18 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/dlc_utils.py +11 -4
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/eval_utils.py +36 -3
- trinity_rft-0.3.1/trinity/utils/log.py +103 -0
- trinity_rft-0.3.1/trinity/utils/lora_utils.py +26 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/math_eval_utils.py +1 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/monitor.py +8 -7
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/plugin_loader.py +2 -5
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/registry.py +0 -10
- trinity_rft-0.3.1/trinity_rft.egg-info/PKG-INFO +453 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/SOURCES.txt +58 -22
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/requires.txt +18 -11
- trinity_rft-0.2.1.dev0/PKG-INFO +0 -497
- trinity_rft-0.2.1.dev0/README.md +0 -430
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/__init__.py +0 -25
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/add_strategy.py +0 -230
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/correct_bias_add_strategy.py +0 -54
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/duplicate_add_strategy.py +0 -72
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/__init__.py +0 -20
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/advantage_fn.py +0 -29
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/grpo_advantage.py +0 -83
- trinity_rft-0.2.1.dev0/trinity/algorithm/algorithm_manager.py +0 -34
- trinity_rft-0.2.1.dev0/trinity/buffer/buffer_reader.py +0 -21
- trinity_rft-0.2.1.dev0/trinity/buffer/ray_wrapper.py +0 -308
- trinity_rft-0.2.1.dev0/trinity/buffer/reader/file_reader.py +0 -359
- trinity_rft-0.2.1.dev0/trinity/buffer/reader/sql_reader.py +0 -35
- trinity_rft-0.2.1.dev0/trinity/buffer/schema/__init__.py +0 -3
- trinity_rft-0.2.1.dev0/trinity/buffer/schema/sql_schema.py +0 -142
- trinity_rft-0.2.1.dev0/trinity/cli/launcher.py +0 -280
- trinity_rft-0.2.1.dev0/trinity/common/config.py +0 -910
- trinity_rft-0.2.1.dev0/trinity/common/models/model.py +0 -214
- trinity_rft-0.2.1.dev0/trinity/common/rewards/__init__.py +0 -25
- trinity_rft-0.2.1.dev0/trinity/common/workflows/__init__.py +0 -32
- trinity_rft-0.2.1.dev0/trinity/data/controllers/active_iterator.py +0 -394
- trinity_rft-0.2.1.dev0/trinity/data/controllers/default_ops.py +0 -77
- trinity_rft-0.2.1.dev0/trinity/data/controllers/task_parser.py +0 -282
- trinity_rft-0.2.1.dev0/trinity/data/core/comparator.py +0 -84
- trinity_rft-0.2.1.dev0/trinity/data/core/dataset.py +0 -168
- trinity_rft-0.2.1.dev0/trinity/data/core/formatter.py +0 -151
- trinity_rft-0.2.1.dev0/trinity/data/processors/base.py +0 -143
- trinity_rft-0.2.1.dev0/trinity/data/processors/cleaner.py +0 -231
- trinity_rft-0.2.1.dev0/trinity/data/processors/human_annotator.py +0 -47
- trinity_rft-0.2.1.dev0/trinity/data/processors/synthesizer.py +0 -107
- trinity_rft-0.2.1.dev0/trinity/data/server.py +0 -81
- trinity_rft-0.2.1.dev0/trinity/data/utils.py +0 -72
- trinity_rft-0.2.1.dev0/trinity/manager/manager.py +0 -81
- trinity_rft-0.2.1.dev0/trinity/trainer/trainer.py +0 -215
- trinity_rft-0.2.1.dev0/trinity/utils/log.py +0 -65
- trinity_rft-0.2.1.dev0/trinity_rft.egg-info/PKG-INFO +0 -497
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/LICENSE +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/setup.cfg +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/setup.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/ppo_advantage.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/reinforce_plus_plus_advantage.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/remax_advantage.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/advantage_fn/rloo_advantage.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/entropy_loss_fn/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/entropy_loss_fn/entropy_loss_fn.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/key_mapper.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/kl_fn/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/kl_fn/kl_fn.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/dpo_loss.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/policy_loss_fn/policy_loss_fn.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/algorithm/sample_strategy/utils.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/buffer/buffer_writer.py +0 -0
- {trinity_rft-0.2.1.dev0/trinity/buffer/reader → trinity_rft-0.3.1/trinity/buffer/operators/filters}/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0/trinity/buffer/writer → trinity_rft-0.3.1/trinity/buffer/operators/mappers}/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0/trinity/common → trinity_rft-0.3.1/trinity/buffer/reader}/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0/trinity/trainer/verl → trinity_rft-0.3.1/trinity/buffer/storage}/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0/trinity/utils → trinity_rft-0.3.1/trinity/buffer/writer}/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/cli/client.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/cli/server.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/agents_reward.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/human_reward.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/tool_reward.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/rewards/utils.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/common/workflows/envs/alfworld/RAFT_utils.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/explorer/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/config_registry.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/manager/config_registry/explorer_config_manager.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/plugins/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/trainer/__init__.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity/utils/timer.py +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/dependency_links.txt +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/entry_points.txt +0 -0
- {trinity_rft-0.2.1.dev0 → trinity_rft-0.3.1}/trinity_rft.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: trinity-rft
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Trinity-RFT: A Framework for Training Large Language Models with Reinforcement Fine-Tuning
|
|
5
|
+
Author-email: Trinity-RFT Team <trinity-rft@outlook.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/modelscope/Trinity-RFT
|
|
7
|
+
Project-URL: Documentation, https://modelscope.github.io/Trinity-RFT/
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: <3.13,>=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: verl==0.5.0
|
|
19
|
+
Requires-Dist: ray[default]>=2.48.0
|
|
20
|
+
Requires-Dist: vllm<=0.10.2,>=0.9.1
|
|
21
|
+
Requires-Dist: tensordict
|
|
22
|
+
Requires-Dist: wandb
|
|
23
|
+
Requires-Dist: omegaconf
|
|
24
|
+
Requires-Dist: sqlalchemy
|
|
25
|
+
Requires-Dist: psycopg2-binary
|
|
26
|
+
Requires-Dist: networkx
|
|
27
|
+
Requires-Dist: latex2sympy2_extended
|
|
28
|
+
Requires-Dist: math_verify>=0.8.0
|
|
29
|
+
Requires-Dist: ninja
|
|
30
|
+
Requires-Dist: fire
|
|
31
|
+
Requires-Dist: streamlit
|
|
32
|
+
Requires-Dist: flask
|
|
33
|
+
Requires-Dist: requests
|
|
34
|
+
Requires-Dist: tensorboard
|
|
35
|
+
Requires-Dist: openai
|
|
36
|
+
Requires-Dist: jsonlines
|
|
37
|
+
Requires-Dist: sortedcontainers
|
|
38
|
+
Requires-Dist: word2number
|
|
39
|
+
Requires-Dist: transformers
|
|
40
|
+
Provides-Extra: data
|
|
41
|
+
Requires-Dist: py-data-juicer>=1.4.3; extra == "data"
|
|
42
|
+
Provides-Extra: agent
|
|
43
|
+
Requires-Dist: agentscope>=1.0.5; extra == "agent"
|
|
44
|
+
Provides-Extra: rm-gallery
|
|
45
|
+
Requires-Dist: rm-gallery>=0.1.5; extra == "rm-gallery"
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
48
|
+
Requires-Dist: black>=23.7.0; extra == "dev"
|
|
49
|
+
Requires-Dist: flake8>=6.1.0; extra == "dev"
|
|
50
|
+
Requires-Dist: flake8-docstrings>=1.6.0; extra == "dev"
|
|
51
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
52
|
+
Requires-Dist: mypy>=1.7.0; extra == "dev"
|
|
53
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
54
|
+
Requires-Dist: pytest-json-ctrf; extra == "dev"
|
|
55
|
+
Requires-Dist: parameterized; extra == "dev"
|
|
56
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
57
|
+
Requires-Dist: viztracer; extra == "dev"
|
|
58
|
+
Provides-Extra: megatron
|
|
59
|
+
Requires-Dist: megatron-core[mlm]==0.13.1; extra == "megatron"
|
|
60
|
+
Requires-Dist: transformer_engine[pytorch]==2.6.0.post1; extra == "megatron"
|
|
61
|
+
Requires-Dist: mbridge>=0.13.0; extra == "megatron"
|
|
62
|
+
Provides-Extra: doc
|
|
63
|
+
Requires-Dist: sphinx; extra == "doc"
|
|
64
|
+
Requires-Dist: sphinx-autobuild; extra == "doc"
|
|
65
|
+
Requires-Dist: sphinx-book-theme; extra == "doc"
|
|
66
|
+
Requires-Dist: myst-parser; extra == "doc"
|
|
67
|
+
Requires-Dist: sphinxcontrib-apidoc; extra == "doc"
|
|
68
|
+
Requires-Dist: sphinx-multiversion; extra == "doc"
|
|
69
|
+
Requires-Dist: sphinxcontrib-mermaid; extra == "doc"
|
|
70
|
+
Provides-Extra: flash-attn
|
|
71
|
+
Requires-Dist: flash-attn==2.8.1; extra == "flash-attn"
|
|
72
|
+
Dynamic: license-file
|
|
73
|
+
|
|
74
|
+
[**中文主页**](https://github.com/modelscope/Trinity-RFT/blob/main/README_zh.md) | [**Tutorial**](https://modelscope.github.io/Trinity-RFT/) | [**FAQ**](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/faq.html)
|
|
75
|
+
|
|
76
|
+
<div align="center">
|
|
77
|
+
<img src="https://img.alicdn.com/imgextra/i1/O1CN01lvLpfw25Pl4ohGZnU_!!6000000007519-2-tps-1628-490.png" alt="Trinity-RFT" style="height: 120px;">
|
|
78
|
+
</div>
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
<h2 align="center">Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models</h2>
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
<div align="center">
|
|
85
|
+
|
|
86
|
+
[](https://arxiv.org/abs/2505.17826)
|
|
87
|
+
[](https://modelscope.github.io/Trinity-RFT/)
|
|
88
|
+
[](https://pypi.org/project/trinity-rft/)
|
|
89
|
+

|
|
90
|
+
|
|
91
|
+
</div>
|
|
92
|
+
|
|
93
|
+
## 💡 What is Trinity-RFT?
|
|
94
|
+
|
|
95
|
+
Trinity-RFT is a flexible, general-purpose framework for reinforcement fine-tuning (RFT) of large language models (LLMs). It decouples the RFT process into three key components: **Explorer**, **Trainer**, and **Buffer**, and provides functionalities for users with different backgrounds and objectives:
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
* 🤖 For agent application developers. [[tutorial]](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/develop_workflow.html)
|
|
99
|
+
- Train agent applications to improve their ability to complete tasks in specific environments.
|
|
100
|
+
- Examples: [Multi-Turn Interaction](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_multi_turn.html), [ReAct Agent](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_react.html)
|
|
101
|
+
|
|
102
|
+
* 🧠 For RL algorithm researchers. [[tutorial]](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/develop_algorithm.html)
|
|
103
|
+
- Design and validate new reinforcement learning algorithms using compact, plug-and-play modules.
|
|
104
|
+
- Example: [Mixture of SFT and GRPO](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_mix_algo.html)
|
|
105
|
+
|
|
106
|
+
* 📊 For data engineers. [[tutorial]](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/develop_operator.html)
|
|
107
|
+
- Create task-specific datasets and build data pipelines for cleaning, augmentation, and human-in-the-loop scenarios.
|
|
108
|
+
- Example: [Data Processing](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_data_functionalities.html)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
## 🌟 Key Features
|
|
112
|
+
|
|
113
|
+
* **Flexible RFT Modes:**
|
|
114
|
+
- Supports synchronous/asynchronous, on-policy/off-policy, and online/offline training. Rollout and training can run separately and scale independently across devices.
|
|
115
|
+
|
|
116
|
+
<img src="https://img.alicdn.com/imgextra/i3/O1CN01E7NskS1FFoTI9jlaQ_!!6000000000458-2-tps-1458-682.png" alt="RFT modes supported by Trinity-RFT" width="600" />
|
|
117
|
+
|
|
118
|
+
* **General Agentic-RL Support:**
|
|
119
|
+
- Supports both concatenated and general multi-turn agentic workflows. Able to directly train agent applications developed using agent frameworks like AgentScope.
|
|
120
|
+
|
|
121
|
+
<img src="https://img.alicdn.com/imgextra/i1/O1CN01z1i7kk1jlMEVa8ZHV_!!6000000004588-2-tps-1262-695.png" alt="Agentic workflows" width="600" />
|
|
122
|
+
|
|
123
|
+
* **Full Lifecycle Data Pipelines:**
|
|
124
|
+
- Enables pipeline processing of rollout and experience data, supporting active management (prioritization, cleaning, augmentation) throughout the RFT lifecycle.
|
|
125
|
+
|
|
126
|
+
<img src="https://img.alicdn.com/imgextra/i2/O1CN01BfeHp61sXSlGjH7zQ_!!6000000005776-2-tps-1734-473.png" alt="Data pipeline design" width="600" />
|
|
127
|
+
|
|
128
|
+
* **User-Friendly Design:**
|
|
129
|
+
- Modular, decoupled architecture for easy adoption and development. Rich graphical user interfaces enable low-code usage.
|
|
130
|
+
|
|
131
|
+
<img src="https://img.alicdn.com/imgextra/i1/O1CN01Ti0o4320RywoAuyhN_!!6000000006847-2-tps-3840-2134.png" alt="System architecture" width="600" />
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
## 🚀 News
|
|
135
|
+
|
|
136
|
+
* [2025-10] ✨ [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.1)] Trinity-RFT v0.3.1 released: multi-stage training support, improved agentic RL examples, LoRA support, debug mode and new RL algorithms.
|
|
137
|
+
* [2025-09] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.0)] Trinity-RFT v0.3.0 released: enhanced Buffer, FSDP2 & Megatron support, multi-modal models, and new RL algorithms/examples.
|
|
138
|
+
* [2025-08] Introducing [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord): dynamic SFT + RL integration for advanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
|
|
139
|
+
* [2025-08] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.2.1)] Trinity-RFT v0.2.1 released.
|
|
140
|
+
* [2025-07] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.2.0)] Trinity-RFT v0.2.0 released.
|
|
141
|
+
* [2025-07] Technical report (arXiv v2) updated with new features, examples, and experiments: [link](https://arxiv.org/abs/2505.17826).
|
|
142
|
+
* [2025-06] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.1.1)] Trinity-RFT v0.1.1 released.
|
|
143
|
+
* [2025-05] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.1.0)] Trinity-RFT v0.1.0 released, plus [technical report](https://arxiv.org/abs/2505.17826).
|
|
144
|
+
* [2025-04] Trinity-RFT open sourced.
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Table of contents
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
- [Quick Start](#quick-start)
|
|
153
|
+
- [Step 1: installation](#step-1-installation)
|
|
154
|
+
- [Step 2: prepare dataset and model](#step-2-prepare-dataset-and-model)
|
|
155
|
+
- [Step 3: configurations](#step-3-configurations)
|
|
156
|
+
- [Step 4: run the RFT process](#step-4-run-the-rft-process)
|
|
157
|
+
- [Further tutorials](#further-tutorials)
|
|
158
|
+
- [Upcoming features](#upcoming-features)
|
|
159
|
+
- [Contribution guide](#contribution-guide)
|
|
160
|
+
- [Acknowledgements](#acknowledgements)
|
|
161
|
+
- [Citation](#citation)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
## Quick Start
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
> [!NOTE]
|
|
169
|
+
> This project is currently under active development. Comments and suggestions are welcome!
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
### Step 1: installation
|
|
173
|
+
|
|
174
|
+
Before installing, make sure your system meets the following requirements:
|
|
175
|
+
|
|
176
|
+
- **Python**: version 3.10 to 3.12 (inclusive)
|
|
177
|
+
- **CUDA**: version 12.4 to 12.8 (inclusive)
|
|
178
|
+
- **GPUs**: at least 2 GPUs
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
#### From Source (Recommended)
|
|
182
|
+
|
|
183
|
+
If you plan to customize or contribute to Trinity-RFT, this is the best option.
|
|
184
|
+
|
|
185
|
+
##### 1. Clone the Repository
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
git clone https://github.com/modelscope/Trinity-RFT
|
|
189
|
+
cd Trinity-RFT
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
##### 2. Set Up a Virtual Environment
|
|
193
|
+
|
|
194
|
+
Choose one of the following options:
|
|
195
|
+
|
|
196
|
+
###### Using Conda
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
conda create -n trinity python=3.10
|
|
200
|
+
conda activate trinity
|
|
201
|
+
|
|
202
|
+
pip install -e ".[dev]"
|
|
203
|
+
pip install -e ".[flash_attn]"
|
|
204
|
+
# if you encounter issues when installing flash-attn, try:
|
|
205
|
+
# pip install flash-attn==2.8.1 --no-build-isolation
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
###### Using venv
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
python3.10 -m venv .venv
|
|
212
|
+
source .venv/bin/activate
|
|
213
|
+
|
|
214
|
+
pip install -e ".[dev]"
|
|
215
|
+
pip install -e ".[flash_attn]"
|
|
216
|
+
# if you encounter issues when installing flash-attn, try:
|
|
217
|
+
# pip install flash-attn==2.8.1 --no-build-isolation
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
###### Using `uv`
|
|
221
|
+
|
|
222
|
+
[`uv`](https://github.com/astral-sh/uv) is a modern Python package installer.
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
uv sync --extra dev --extra flash_attn
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
#### Via PyPI
|
|
230
|
+
|
|
231
|
+
If you just want to use the package without modifying the code:
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
pip install trinity-rft==0.3.1
|
|
235
|
+
pip install flash-attn==2.8.1
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Or with `uv`:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
uv pip install trinity-rft==0.3.1
|
|
242
|
+
uv pip install flash-attn==2.8.1
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
#### Using Docker
|
|
247
|
+
|
|
248
|
+
We provide a Docker setup for hassle-free environment configuration.
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
git clone https://github.com/modelscope/Trinity-RFT
|
|
252
|
+
cd Trinity-RFT
|
|
253
|
+
|
|
254
|
+
# Build the Docker image
|
|
255
|
+
## Tip: You can modify the Dockerfile to add mirrors or set API keys
|
|
256
|
+
docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
|
|
257
|
+
|
|
258
|
+
# Run the container, replacing <path_to_your_data_and_checkpoints> with your actual path
|
|
259
|
+
docker run -it \
|
|
260
|
+
--gpus all \
|
|
261
|
+
--shm-size="64g" \
|
|
262
|
+
--rm \
|
|
263
|
+
-v $PWD:/workspace \
|
|
264
|
+
-v <path_to_your_data_and_checkpoints>:/data \
|
|
265
|
+
trinity-rft:latest
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
> For training with **Megatron-LM**, please refer to [Megatron-LM Backend](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_megatron.html).
|
|
269
|
+
|
|
270
|
+
### Step 2: prepare dataset and model
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
Trinity-RFT supports most datasets and models from Huggingface and ModelScope.
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
**Prepare the model** in the local directory `$MODEL_PATH/{model_name}`:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
# Using Huggingface
|
|
280
|
+
huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name}
|
|
281
|
+
|
|
282
|
+
# Using Modelscope
|
|
283
|
+
modelscope download {model_name} --local_dir $MODEL_PATH/{model_name}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
For more details about model downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli) or [ModelScope](https://modelscope.cn/docs/models/download).
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
**Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
# Using Huggingface
|
|
294
|
+
huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name}
|
|
295
|
+
|
|
296
|
+
# Using Modelscope
|
|
297
|
+
modelscope download --dataset {dataset_name} --local_dir $DATASET_PATH/{dataset_name}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
For more details about dataset downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli#download-a-dataset-or-a-space) or [ModelScope](https://modelscope.cn/docs/datasets/download).
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
### Step 3: configurations
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
Trinity-RFT provides a web interface for configuring your RFT process.
|
|
308
|
+
|
|
309
|
+
> [!NOTE]
|
|
310
|
+
> This is an experimental feature, and we will continue to improve it.
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
To launch the web interface for minimal configurations, you can run
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
trinity studio --port 8080
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
Then you can configure your RFT process in the web page and generate a config file. You can save the config file for later use or run it directly as described in the following section.
|
|
320
|
+
|
|
321
|
+
Advanced users can also edit the config file directly.
|
|
322
|
+
We provide example config files in [`examples`](examples/).
|
|
323
|
+
|
|
324
|
+
For complete GUI features, please refer to the monorepo for [Trinity-Studio](https://github.com/modelscope/Trinity-Studio).
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
<details>
|
|
328
|
+
|
|
329
|
+
<summary> Example: config manager GUI </summary>
|
|
330
|
+
|
|
331
|
+

|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
</details>
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
### Step 4: run the RFT process
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
Start a ray cluster:
|
|
343
|
+
|
|
344
|
+
```shell
|
|
345
|
+
# On master node
|
|
346
|
+
ray start --head
|
|
347
|
+
|
|
348
|
+
# On worker nodes
|
|
349
|
+
ray start --address=<master_address>
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
(Optional) Log in to [wandb](https://docs.wandb.ai/quickstart/) for better monitoring:
|
|
353
|
+
|
|
354
|
+
```shell
|
|
355
|
+
export WANDB_API_KEY=<your_api_key>
|
|
356
|
+
wandb login
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
For command-line users, run the RFT process:
|
|
360
|
+
|
|
361
|
+
```shell
|
|
362
|
+
trinity run --config <config_path>
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k with GRPO:
|
|
366
|
+
|
|
367
|
+
```shell
|
|
368
|
+
trinity run --config examples/grpo_gsm8k/gsm8k.yaml
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
For studio users, click "Run" in the web interface.
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
## Further tutorials
|
|
375
|
+
|
|
376
|
+
> [!NOTE]
|
|
377
|
+
> For more tutorials, please refer to the [Trinity-RFT Documentation](https://modelscope.github.io/Trinity-RFT/).
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
Tutorials for running different RFT modes:
|
|
381
|
+
|
|
382
|
+
+ [Quick example: GRPO on GSM8k](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_reasoning_basic.html)
|
|
383
|
+
+ [Off-policy RFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_reasoning_advanced.html)
|
|
384
|
+
+ [Fully asynchronous RFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_async_mode.html)
|
|
385
|
+
+ [Offline learning by DPO or SFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_dpo.html)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
Tutorials for adapting Trinity-RFT to multi-step agentic scenarios:
|
|
389
|
+
|
|
390
|
+
+ [Concatenated multi-turn workflow](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_multi_turn.html)
|
|
391
|
+
+ [General multi-step workflow](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_step_wise.html)
|
|
392
|
+
+ [ReAct workflow with an agent framework](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_react.html)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
Tutorials for data-related functionalities:
|
|
396
|
+
|
|
397
|
+
+ [Advanced data processing & human-in-the-loop](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_data_functionalities.html)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
Tutorials for RL algorithm development/research with Trinity-RFT:
|
|
401
|
+
|
|
402
|
+
+ [RL algorithm development with Trinity-RFT](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/example_mix_algo.html)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
Guidelines for full configurations:
|
|
406
|
+
|
|
407
|
+
+ See [this document](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/trinity_configs.html)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
Guidelines for developers and researchers:
|
|
411
|
+
|
|
412
|
+
+ [Benchmark Toolkit for quick verification and experimentation](./benchmark/README.md)
|
|
413
|
+
+ [Understand the coordination between explorer and trainer](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/synchronizer.html)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
## Upcoming features
|
|
417
|
+
|
|
418
|
+
A tentative roadmap: [#51](https://github.com/modelscope/Trinity-RFT/issues/51)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
## Contribution guide
|
|
422
|
+
|
|
423
|
+
This project is currently under active development, and we welcome contributions from the community!
|
|
424
|
+
|
|
425
|
+
See [CONTRIBUTING.md](./CONTRIBUTING.md) for detailed contribution guidelines.
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
## Acknowledgements
|
|
429
|
+
|
|
430
|
+
This project is built upon many excellent open-source projects, including:
|
|
431
|
+
|
|
432
|
+
+ [verl](https://github.com/volcengine/verl) and [PyTorch's FSDP](https://pytorch.org/docs/stable/fsdp.html) for LLM training;
|
|
433
|
+
+ [vLLM](https://github.com/vllm-project/vllm) for LLM inference;
|
|
434
|
+
+ [Data-Juicer](https://github.com/modelscope/data-juicer?tab=readme-ov-file) for data processing pipelines;
|
|
435
|
+
+ [AgentScope](https://github.com/agentscope-ai/agentscope) for agentic workflow;
|
|
436
|
+
+ [Ray](https://github.com/ray-project/ray) for distributed systems;
|
|
437
|
+
+ we have also drawn inspirations from RL frameworks like [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF), [TRL](https://github.com/huggingface/trl) and [ChatLearn](https://github.com/alibaba/ChatLearn);
|
|
438
|
+
+ ......
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
## Citation
|
|
442
|
+
|
|
443
|
+
```bibtex
|
|
444
|
+
@misc{trinity-rft,
|
|
445
|
+
title={Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models},
|
|
446
|
+
author={Xuchen Pan and Yanxi Chen and Yushuo Chen and Yuchang Sun and Daoyuan Chen and Wenhao Zhang and Yuexiang Xie and Yilun Huang and Yilei Zhang and Dawei Gao and Yaliang Li and Bolin Ding and Jingren Zhou},
|
|
447
|
+
year={2025},
|
|
448
|
+
eprint={2505.17826},
|
|
449
|
+
archivePrefix={arXiv},
|
|
450
|
+
primaryClass={cs.LG},
|
|
451
|
+
url={https://arxiv.org/abs/2505.17826},
|
|
452
|
+
}
|
|
453
|
+
```
|