trinity-rft 0.1.1__tar.gz → 0.2.1.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trinity_rft-0.2.1.dev0/PKG-INFO +497 -0
- trinity_rft-0.2.1.dev0/README.md +430 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/pyproject.toml +18 -3
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/__init__.py +1 -1
- trinity_rft-0.2.1.dev0/trinity/algorithm/__init__.py +24 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/__init__.py +25 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/add_strategy.py +230 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/correct_bias_add_strategy.py +54 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/duplicate_add_strategy.py +72 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/add_strategy/step_wise_add_strategy.py +123 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/__init__.py +20 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/advantage_fn.py +29 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/grpo_advantage.py +83 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/opmd_advantage.py +96 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/ppo_advantage.py +86 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/reinforce_plus_plus_advantage.py +66 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/remax_advantage.py +66 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/advantage_fn/rloo_advantage.py +75 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/algorithm.py +245 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/algorithm_manager.py +34 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/entropy_loss_fn/__init__.py +9 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/entropy_loss_fn/entropy_loss_fn.py +104 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/key_mapper.py +29 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/kl_fn/__init__.py +3 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/kl_fn/kl_fn.py +165 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/__init__.py +26 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/chord_policy_loss.py +257 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/dpo_loss.py +67 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/gspo_policy_loss.py +76 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/mix_policy_loss.py +126 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/opmd_policy_loss.py +31 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/policy_loss_fn.py +121 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/ppo_policy_loss.py +65 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/policy_loss_fn/sft_loss.py +33 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/__init__.py +15 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/mix_sample_strategy.py +98 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/sample_strategy.py +92 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/sample_strategy/utils.py +42 -0
- trinity_rft-0.2.1.dev0/trinity/algorithm/utils.py +62 -0
- trinity_rft-0.2.1.dev0/trinity/buffer/__init__.py +15 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/buffer.py +5 -3
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/buffer_reader.py +6 -0
- trinity_rft-0.2.1.dev0/trinity/buffer/buffer_writer.py +31 -0
- trinity_rft-0.2.1.dev0/trinity/buffer/queue.py +195 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/ray_wrapper.py +140 -6
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/reader/file_reader.py +140 -55
- trinity_rft-0.2.1.dev0/trinity/buffer/reader/queue_reader.py +52 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/reader/sql_reader.py +8 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/schema/sql_schema.py +12 -19
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/file_writer.py +16 -3
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/queue_writer.py +10 -7
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/sql_writer.py +18 -5
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/cli/client.py +2 -2
- trinity_rft-0.2.1.dev0/trinity/cli/launcher.py +280 -0
- trinity_rft-0.2.1.dev0/trinity/common/config.py +910 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/constants.py +42 -28
- trinity_rft-0.2.1.dev0/trinity/common/experience.py +547 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/models/__init__.py +15 -15
- trinity_rft-0.2.1.dev0/trinity/common/models/api/vllm_patch.py +374 -0
- trinity_rft-0.2.1.dev0/trinity/common/models/model.py +214 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/models/utils.py +37 -16
- trinity_rft-0.1.1/trinity/common/models/vllm_async_model.py → trinity_rft-0.2.1.dev0/trinity/common/models/vllm_model.py +62 -57
- trinity_rft-0.2.1.dev0/trinity/common/models/vllm_worker.py +74 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/__init__.py +25 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/accuracy_reward.py +68 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/countdown_reward.py +58 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/dapo_reward.py +67 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/format_reward.py +28 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/math_reward.py +70 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/reward_fn.py +107 -0
- trinity_rft-0.2.1.dev0/trinity/common/rewards/utils.py +22 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/verl_config.py +74 -53
- trinity_rft-0.2.1.dev0/trinity/common/workflows/__init__.py +32 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/customized_math_workflows.py +24 -20
- trinity_rft-0.2.1.dev0/trinity/common/workflows/customized_toolcall_workflows.py +265 -0
- trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/agentscope/agentscope_react_workflow.py +162 -0
- trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/alfworld/RAFT_alfworld_workflow.py +225 -0
- trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/alfworld/RAFT_reflect_alfworld_workflow.py +310 -0
- trinity_rft-0.2.1.dev0/trinity/common/workflows/envs/alfworld/RAFT_utils.py +196 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/envs/alfworld/alfworld_workflow.py +121 -5
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/envs/sciworld/sciworld_workflow.py +1 -1
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/envs/webshop/webshop_workflow.py +1 -1
- trinity_rft-0.2.1.dev0/trinity/common/workflows/eval_workflow.py +90 -0
- trinity_rft-0.2.1.dev0/trinity/common/workflows/math_rm_workflow.py +56 -0
- trinity_rft-0.2.1.dev0/trinity/common/workflows/step_wise_workflow.py +147 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/workflows/workflow.py +66 -28
- trinity_rft-0.2.1.dev0/trinity/data/controllers/active_iterator.py +394 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/controllers/task_parser.py +14 -35
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/core/dataset.py +58 -26
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/cleaner.py +5 -3
- trinity_rft-0.2.1.dev0/trinity/data/server.py +81 -0
- trinity_rft-0.2.1.dev0/trinity/data/utils.py +72 -0
- trinity_rft-0.2.1.dev0/trinity/explorer/__init__.py +3 -0
- trinity_rft-0.2.1.dev0/trinity/explorer/explorer.py +395 -0
- trinity_rft-0.2.1.dev0/trinity/explorer/scheduler.py +448 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/explorer/workflow_runner.py +50 -22
- trinity_rft-0.2.1.dev0/trinity/manager/__init__.py +7 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_manager.py +178 -111
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/__init__.py +2 -0
- trinity_rft-0.2.1.dev0/trinity/manager/config_registry/algorithm_config_manager.py +371 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/buffer_config_manager.py +77 -13
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/explorer_config_manager.py +31 -8
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/model_config_manager.py +6 -92
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/trainer_config_manager.py +34 -97
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/manager.py +16 -4
- trinity_rft-0.2.1.dev0/trinity/manager/synchronizer.py +325 -0
- trinity_rft-0.2.1.dev0/trinity/trainer/trainer.py +215 -0
- trinity_rft-0.2.1.dev0/trinity/trainer/verl/dp_actor.py +234 -0
- trinity_rft-0.2.1.dev0/trinity/trainer/verl/fsdp_checkpoint_manager.py +363 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/trainer/verl/fsdp_workers.py +430 -655
- trinity_rft-0.2.1.dev0/trinity/trainer/verl/utils.py +152 -0
- trinity_rft-0.2.1.dev0/trinity/trainer/verl_trainer.py +426 -0
- trinity_rft-0.2.1.dev0/trinity/utils/distributed.py +74 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/dlc_utils.py +25 -14
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/eval_utils.py +2 -130
- trinity_rft-0.2.1.dev0/trinity/utils/math_eval_utils.py +558 -0
- trinity_rft-0.2.1.dev0/trinity/utils/monitor.py +220 -0
- trinity_rft-0.2.1.dev0/trinity/utils/plugin_loader.py +87 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/registry.py +19 -16
- trinity_rft-0.2.1.dev0/trinity/utils/timer.py +18 -0
- trinity_rft-0.2.1.dev0/trinity_rft.egg-info/PKG-INFO +497 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/SOURCES.txt +56 -9
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/requires.txt +16 -2
- trinity_rft-0.1.1/PKG-INFO +0 -412
- trinity_rft-0.1.1/README.md +0 -357
- trinity_rft-0.1.1/trinity/buffer/__init__.py +0 -7
- trinity_rft-0.1.1/trinity/buffer/buffer_writer.py +0 -15
- trinity_rft-0.1.1/trinity/buffer/queue.py +0 -75
- trinity_rft-0.1.1/trinity/buffer/reader/queue_reader.py +0 -37
- trinity_rft-0.1.1/trinity/cli/launcher.py +0 -249
- trinity_rft-0.1.1/trinity/common/config.py +0 -602
- trinity_rft-0.1.1/trinity/common/experience.py +0 -295
- trinity_rft-0.1.1/trinity/common/models/model.py +0 -136
- trinity_rft-0.1.1/trinity/common/models/openai_api.py +0 -79
- trinity_rft-0.1.1/trinity/common/models/vllm_model.py +0 -287
- trinity_rft-0.1.1/trinity/common/models/vllm_worker.py +0 -74
- trinity_rft-0.1.1/trinity/common/rewards/__init__.py +0 -11
- trinity_rft-0.1.1/trinity/common/rewards/accuracy_reward.py +0 -33
- trinity_rft-0.1.1/trinity/common/rewards/base.py +0 -24
- trinity_rft-0.1.1/trinity/common/rewards/composite_reward.py +0 -24
- trinity_rft-0.1.1/trinity/common/rewards/format_reward.py +0 -29
- trinity_rft-0.1.1/trinity/common/rewards/reward_fn.py +0 -229
- trinity_rft-0.1.1/trinity/common/schema.py +0 -148
- trinity_rft-0.1.1/trinity/common/workflows/__init__.py +0 -19
- trinity_rft-0.1.1/trinity/data/controllers/active_iterator.py +0 -290
- trinity_rft-0.1.1/trinity/data/core/dataset_db.py +0 -84
- trinity_rft-0.1.1/trinity/data/server.py +0 -27
- trinity_rft-0.1.1/trinity/explorer/__init__.py +0 -4
- trinity_rft-0.1.1/trinity/explorer/explorer.py +0 -303
- trinity_rft-0.1.1/trinity/explorer/runner_pool.py +0 -287
- trinity_rft-0.1.1/trinity/manager/__init__.py +0 -7
- trinity_rft-0.1.1/trinity/trainer/trainer.py +0 -175
- trinity_rft-0.1.1/trinity/trainer/verl/core_algos.py +0 -717
- trinity_rft-0.1.1/trinity/trainer/verl/dp_actor.py +0 -538
- trinity_rft-0.1.1/trinity/trainer/verl/ray_trainer.py +0 -1160
- trinity_rft-0.1.1/trinity/trainer/verl_trainer.py +0 -552
- trinity_rft-0.1.1/trinity/utils/distributed.py +0 -82
- trinity_rft-0.1.1/trinity/utils/monitor.py +0 -112
- trinity_rft-0.1.1/trinity/utils/plugin_loader.py +0 -65
- trinity_rft-0.1.1/trinity_rft.egg-info/PKG-INFO +0 -412
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/LICENSE +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/setup.cfg +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/setup.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/reader/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/schema/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/utils.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/buffer/writer/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/cli/server.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/rewards/agents_reward.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/rewards/human_reward.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/common/rewards/tool_reward.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/controllers/default_ops.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/core/comparator.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/core/formatter.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/base.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/human_annotator.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/data/processors/synthesizer.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/manager/config_registry/config_registry.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/plugins/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/trainer/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/trainer/verl/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/__init__.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity/utils/log.py +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/dependency_links.txt +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/entry_points.txt +0 -0
- {trinity_rft-0.1.1 → trinity_rft-0.2.1.dev0}/trinity_rft.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: trinity-rft
|
|
3
|
+
Version: 0.2.1.dev0
|
|
4
|
+
Summary: Trinity-RFT: A Framework for Training Large Language Models with Reinforcement Fine-Tuning
|
|
5
|
+
Author-email: Trinity-RFT Team <trinity-rft@outlook.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/modelscope/Trinity-RFT
|
|
7
|
+
Project-URL: Documentation, https://modelscope.github.io/Trinity-RFT/
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: verl==0.4.1
|
|
19
|
+
Requires-Dist: ray[default]>=2.45.0
|
|
20
|
+
Requires-Dist: vllm<=0.10.0,>=0.9.1
|
|
21
|
+
Requires-Dist: tensordict==0.6.2
|
|
22
|
+
Requires-Dist: wandb
|
|
23
|
+
Requires-Dist: omegaconf
|
|
24
|
+
Requires-Dist: sqlalchemy
|
|
25
|
+
Requires-Dist: psycopg2-binary
|
|
26
|
+
Requires-Dist: networkx
|
|
27
|
+
Requires-Dist: latex2sympy2_extended
|
|
28
|
+
Requires-Dist: math_verify
|
|
29
|
+
Requires-Dist: ninja
|
|
30
|
+
Requires-Dist: fire
|
|
31
|
+
Requires-Dist: streamlit
|
|
32
|
+
Requires-Dist: flask
|
|
33
|
+
Requires-Dist: requests
|
|
34
|
+
Requires-Dist: tensorboard
|
|
35
|
+
Requires-Dist: openai
|
|
36
|
+
Requires-Dist: jsonlines
|
|
37
|
+
Requires-Dist: sortedcontainers
|
|
38
|
+
Requires-Dist: word2number
|
|
39
|
+
Requires-Dist: transformers<4.54.0
|
|
40
|
+
Provides-Extra: data
|
|
41
|
+
Requires-Dist: py-data-juicer; extra == "data"
|
|
42
|
+
Provides-Extra: agent
|
|
43
|
+
Requires-Dist: agentscope; extra == "agent"
|
|
44
|
+
Provides-Extra: rm-gallery
|
|
45
|
+
Requires-Dist: rm-gallery>=0.1.1; extra == "rm-gallery"
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
48
|
+
Requires-Dist: black>=23.7.0; extra == "dev"
|
|
49
|
+
Requires-Dist: flake8>=6.1.0; extra == "dev"
|
|
50
|
+
Requires-Dist: flake8-docstrings>=1.6.0; extra == "dev"
|
|
51
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
52
|
+
Requires-Dist: mypy>=1.7.0; extra == "dev"
|
|
53
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
54
|
+
Requires-Dist: pytest-json-ctrf; extra == "dev"
|
|
55
|
+
Requires-Dist: parameterized; extra == "dev"
|
|
56
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
57
|
+
Provides-Extra: doc
|
|
58
|
+
Requires-Dist: sphinx; extra == "doc"
|
|
59
|
+
Requires-Dist: sphinx-autobuild; extra == "doc"
|
|
60
|
+
Requires-Dist: sphinx_rtd_theme; extra == "doc"
|
|
61
|
+
Requires-Dist: myst-parser; extra == "doc"
|
|
62
|
+
Requires-Dist: sphinxcontrib-apidoc; extra == "doc"
|
|
63
|
+
Requires-Dist: sphinx-multiversion; extra == "doc"
|
|
64
|
+
Provides-Extra: flash-attn
|
|
65
|
+
Requires-Dist: flash-attn==2.8.0.post2; extra == "flash-attn"
|
|
66
|
+
Dynamic: license-file
|
|
67
|
+
|
|
68
|
+
[**中文主页**](https://github.com/modelscope/Trinity-RFT/blob/main/README_zh.md) | [**Tutorial**](https://modelscope.github.io/Trinity-RFT/) | [**FAQ**](./docs/sphinx_doc/source/tutorial/faq.md)
|
|
69
|
+
|
|
70
|
+
<div align="center">
|
|
71
|
+
<img src="https://img.alicdn.com/imgextra/i1/O1CN01lvLpfw25Pl4ohGZnU_!!6000000007519-2-tps-1628-490.png" alt="Trinity-RFT" style="height: 120px;">
|
|
72
|
+
</div>
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
<h2 align="center">Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models</h2>
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
<div align="center">
|
|
80
|
+
|
|
81
|
+
[](https://arxiv.org/abs/2505.17826)
|
|
82
|
+
[](https://modelscope.github.io/Trinity-RFT/)
|
|
83
|
+
[](https://pypi.org/project/trinity-rft/)
|
|
84
|
+

|
|
85
|
+
|
|
86
|
+
</div>
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
## 🚀 News
|
|
91
|
+
|
|
92
|
+
* [2025-08] ✨ Trinity-RFT v0.2.1 is released with enhanced features for Agentic RL and Async RL.
|
|
93
|
+
* [2025-08] 🎵 We introduce [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord), a dynamic integration of SFT and RL for enhanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
|
|
94
|
+
* [2025-08] We now support training on general multi-step workflows! Please check out examples for [ALFWorld](./docs/sphinx_doc/source/tutorial/example_step_wise.md) and [ReAct](./docs/sphinx_doc/source/tutorial/example_react.md).
|
|
95
|
+
* [2025-07] Trinity-RFT v0.2.0 is released.
|
|
96
|
+
* [2025-07] We update the [technical report](https://arxiv.org/abs/2505.17826) (arXiv v2) with new features, examples, and experiments.
|
|
97
|
+
* [2025-06] Trinity-RFT v0.1.1 is released.
|
|
98
|
+
* [2025-05] We release Trinity-RFT v0.1.0 and a technical report.
|
|
99
|
+
* [2025-04] The initial codebase of Trinity-RFT is open.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
## 💡 What is Trinity-RFT?
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
Trinity-RFT is a general-purpose, flexible and easy-to-use framework for reinforcement fine-tuning (RFT) of large language models (LLM).
|
|
107
|
+
It is designed to support diverse application scenarios and serve as a unified platform for exploring advanced RL paradigms in the [era of experience](https://storage.googleapis.com/deepmind-media/Era-of-Experience%20/The%20Era%20of%20Experience%20Paper.pdf).
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
## ✨ Key Features
|
|
112
|
+
|
|
113
|
+
* **Unified RFT Core:**
|
|
114
|
+
|
|
115
|
+
Supports *synchronous/asynchronous*, *on-policy/off-policy*, and *online/offline* training. Rollout and training can run separately and scale independently on different devices.
|
|
116
|
+
|
|
117
|
+
* **First-Class Agent-Environment Interaction:**
|
|
118
|
+
|
|
119
|
+
Handles lagged feedback, long-tailed latencies, and agent/env failures gracefully. Supports multi-turn agent-env interaction.
|
|
120
|
+
|
|
121
|
+
* **Optimized Data Pipelines:**
|
|
122
|
+
|
|
123
|
+
Treats rollout tasks and experiences as dynamic assets, enabling active management (prioritization, cleaning, augmentation) throughout the RFT lifecycle.
|
|
124
|
+
|
|
125
|
+
* **User-Friendly Design:**
|
|
126
|
+
|
|
127
|
+
Modular and decoupled architecture for easy adoption and development, plus rich graphical user interfaces for low-code usage.
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
<p align="center">
|
|
131
|
+
<img src="https://img.alicdn.com/imgextra/i2/O1CN01H3UbpF1yP7E1OCLbi_!!6000000006570-2-tps-1334-638.png" alt="Trinity-RFT">
|
|
132
|
+
<em>Figure: The high-level design of Trinity-RFT</em>
|
|
133
|
+
</p>
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
<details>
|
|
137
|
+
<summary>Figure: The architecture of RFT-core</summary>
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
<p align="center">
|
|
141
|
+
<img src="https://img.alicdn.com/imgextra/i1/O1CN01BFCZRV1zS9T1PoH49_!!6000000006712-2-tps-922-544.png" alt="Trinity-RFT-core-architecture">
|
|
142
|
+
</p>
|
|
143
|
+
|
|
144
|
+
</details>
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
<details>
|
|
148
|
+
<summary>Figure: Some RFT modes supported by Trinity-RFT</summary>
|
|
149
|
+
|
|
150
|
+
<p align="center">
|
|
151
|
+
<img src="https://img.alicdn.com/imgextra/i3/O1CN01E7NskS1FFoTI9jlaQ_!!6000000000458-2-tps-1458-682.png" alt="Trinity-RFT-modes">
|
|
152
|
+
</p>
|
|
153
|
+
|
|
154
|
+
</details>
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
<details>
|
|
158
|
+
<summary>Figure: Concatenated and general multi-step workflows</summary>
|
|
159
|
+
|
|
160
|
+
<p align="center">
|
|
161
|
+
<img src="https://img.alicdn.com/imgextra/i1/O1CN01z1i7kk1jlMEVa8ZHV_!!6000000004588-2-tps-1262-695.png" alt="Trinity-RFT-multi-step">
|
|
162
|
+
</p>
|
|
163
|
+
|
|
164
|
+
</details>
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
<details>
|
|
168
|
+
<summary>Figure: The architecture of data processors</summary>
|
|
169
|
+
|
|
170
|
+
<p align="center">
|
|
171
|
+
<img src="https://img.alicdn.com/imgextra/i3/O1CN01hR1LCh25kpJMKmYR4_!!6000000007565-2-tps-1474-740.png" alt="Trinity-RFT-data-pipeline-buffer">
|
|
172
|
+
</p>
|
|
173
|
+
|
|
174
|
+
</details>
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
<details>
|
|
178
|
+
<summary>Figure: The high-level design of data pipelines in Trinity-RFT</summary>
|
|
179
|
+
|
|
180
|
+
<p align="center">
|
|
181
|
+
<img src="https://img.alicdn.com/imgextra/i4/O1CN01UvyfcZ1WoTv5t3pCp_!!6000000002835-2-tps-1166-274.png" alt="Trinity-RFT-data-pipelines">
|
|
182
|
+
</p>
|
|
183
|
+
|
|
184
|
+
</details>
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
## 🛠️ What can I use Trinity-RFT for?
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
* **Adaptation to New Scenarios:**
|
|
192
|
+
|
|
193
|
+
Implement agent-environment interaction logic in a single `Workflow` or `MultiTurnWorkflow` class. ([Example](./docs/sphinx_doc/source/tutorial/example_multi_turn.md))
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
* **RL Algorithm Development:**
|
|
197
|
+
|
|
198
|
+
Develop custom RL algorithms (loss design, sampling, data processing) in compact, plug-and-play classes. ([Example](./docs/sphinx_doc/source/tutorial/example_mix_algo.md))
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
* **Low-Code Usage:**
|
|
202
|
+
|
|
203
|
+
Use graphical interfaces for easy monitoring and tracking of the learning process.
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Table of contents
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
- [Getting started](#getting-started)
|
|
212
|
+
- [Step 1: installation](#step-1-installation)
|
|
213
|
+
- [Step 2: prepare dataset and model](#step-2-prepare-dataset-and-model)
|
|
214
|
+
- [Step 3: configurations](#step-3-configurations)
|
|
215
|
+
- [Step 4: run the RFT process](#step-4-run-the-rft-process)
|
|
216
|
+
- [Further tutorials](#further-tutorials)
|
|
217
|
+
- [Upcoming features](#upcoming-features)
|
|
218
|
+
- [Contribution guide](#contribution-guide)
|
|
219
|
+
- [Acknowledgements](#acknowledgements)
|
|
220
|
+
- [Citation](#citation)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
## Getting started
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
> [!NOTE]
|
|
228
|
+
> This project is currently under active development. Comments and suggestions are welcome!
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
### Step 1: installation
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
Requirements:
|
|
235
|
+
- Python version >= 3.10, <= 3.12
|
|
236
|
+
- CUDA version >= 12.4, <= 12.8
|
|
237
|
+
- At least 2 GPUs
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
Installation from source **(recommended)**:
|
|
241
|
+
|
|
242
|
+
```shell
|
|
243
|
+
# Pull the source code from GitHub
|
|
244
|
+
git clone https://github.com/modelscope/Trinity-RFT
|
|
245
|
+
cd Trinity-RFT
|
|
246
|
+
|
|
247
|
+
# Create a new environment using Conda or venv
|
|
248
|
+
# Option 1: Conda
|
|
249
|
+
conda create -n trinity python=3.10
|
|
250
|
+
conda activate trinity
|
|
251
|
+
|
|
252
|
+
# Option 2: venv
|
|
253
|
+
python3.10 -m venv .venv
|
|
254
|
+
source .venv/bin/activate
|
|
255
|
+
|
|
256
|
+
# Install the package in editable mode
|
|
257
|
+
# for bash
|
|
258
|
+
pip install -e .[dev]
|
|
259
|
+
# for zsh
|
|
260
|
+
pip install -e .\[dev\]
|
|
261
|
+
|
|
262
|
+
# Install flash-attn after all dependencies are installed
|
|
263
|
+
# Note: flash-attn will take a long time to compile, please be patient.
|
|
264
|
+
# for bash
|
|
265
|
+
pip install -e .[flash_attn]
|
|
266
|
+
# for zsh
|
|
267
|
+
pip install -e .\[flash_attn\]
|
|
268
|
+
# Try the following command if you encounter errors during flash-attn installation
|
|
269
|
+
# pip install flash-attn==2.8.0.post2 -v --no-build-isolation
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
Installation using pip:
|
|
273
|
+
|
|
274
|
+
```shell
|
|
275
|
+
pip install trinity-rft==0.2.0
|
|
276
|
+
# install flash-attn separately
|
|
277
|
+
pip install flash-attn==2.8.0.post2
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
Installation from docker:
|
|
281
|
+
we have provided a dockerfile for Trinity-RFT (trinity)
|
|
282
|
+
|
|
283
|
+
```shell
|
|
284
|
+
git clone https://github.com/modelscope/Trinity-RFT
|
|
285
|
+
cd Trinity-RFT
|
|
286
|
+
|
|
287
|
+
# build the docker image
|
|
288
|
+
# Note: you can edit the dockerfile to customize the environment
|
|
289
|
+
# e.g., use pip mirrors or set api key
|
|
290
|
+
docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
|
|
291
|
+
|
|
292
|
+
# run the docker image
|
|
293
|
+
docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v <root_path_of_data_and_checkpoints>:/data trinity-rft:latest
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Step 2: prepare dataset and model
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
Trinity-RFT supports most datasets and models from Huggingface and ModelScope.
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
**Prepare the model** in the local directory `$MODEL_PATH/{model_name}`:
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
# Using Huggingface
|
|
306
|
+
huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name}
|
|
307
|
+
|
|
308
|
+
# Using Modelscope
|
|
309
|
+
modelscope download {model_name} --local_dir $MODEL_PATH/{model_name}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
For more details about model downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli) or [ModelScope](https://modelscope.cn/docs/models/download).
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
**Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`:
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
# Using Huggingface
|
|
320
|
+
huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name}
|
|
321
|
+
|
|
322
|
+
# Using Modelscope
|
|
323
|
+
modelscope download --dataset {dataset_name} --local_dir $DATASET_PATH/{dataset_name}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
For more details about dataset downloading, see [Huggingface](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli#download-a-dataset-or-a-space) or [ModelScope](https://modelscope.cn/docs/datasets/download).
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
### Step 3: configurations
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
Trinity-RFT provides a web interface for configuring your RFT process.
|
|
334
|
+
|
|
335
|
+
> [!NOTE]
|
|
336
|
+
> This is an experimental feature, and we will continue to improve it.
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
To launch the web interface for minimal configurations, you can run
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
trinity studio --port 8080
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
Then you can configure your RFT process in the web page and generate a config file. You can save the config file for later use or run it directly as described in the following section.
|
|
346
|
+
|
|
347
|
+
Advanced users can also edit the config file directly.
|
|
348
|
+
We provide example config files in [`examples`](examples/).
|
|
349
|
+
|
|
350
|
+
For complete GUI features, please refer to the monorepo for [Trinity-Studio](https://github.com/modelscope/Trinity-Studio).
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
<details>
|
|
354
|
+
|
|
355
|
+
<summary> Example: config manager GUI </summary>
|
|
356
|
+
|
|
357
|
+

|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
</details>
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
### Step 4: run the RFT process
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
Start a ray cluster:
|
|
369
|
+
|
|
370
|
+
```shell
|
|
371
|
+
# On master node
|
|
372
|
+
ray start --head
|
|
373
|
+
|
|
374
|
+
# On worker nodes
|
|
375
|
+
ray start --address=<master_address>
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
(Optional) Log in to [wandb](https://docs.wandb.ai/quickstart/) for better monitoring:
|
|
379
|
+
|
|
380
|
+
```shell
|
|
381
|
+
export WANDB_API_KEY=<your_api_key>
|
|
382
|
+
wandb login
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
For command-line users, run the RFT process:
|
|
386
|
+
|
|
387
|
+
```shell
|
|
388
|
+
trinity run --config <config_path>
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k with GRPO:
|
|
392
|
+
|
|
393
|
+
```shell
|
|
394
|
+
trinity run --config examples/grpo_gsm8k/gsm8k.yaml
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
For studio users, click "Run" in the web interface.
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
## Further tutorials
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
Tutorials for running different RFT modes:
|
|
404
|
+
|
|
405
|
+
+ [Quick example: GRPO on GSM8k](./docs/sphinx_doc/source/tutorial/example_reasoning_basic.md)
|
|
406
|
+
+ [Off-policy RFT](./docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md)
|
|
407
|
+
+ [Fully asynchronous RFT](./docs/sphinx_doc/source/tutorial/example_async_mode.md)
|
|
408
|
+
+ [Offline learning by DPO or SFT](./docs/sphinx_doc/source/tutorial/example_dpo.md)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
Tutorials for adapting Trinity-RFT to a new multi-turn agentic scenario:
|
|
412
|
+
|
|
413
|
+
+ [Concatenated Multi-turn tasks](./docs/sphinx_doc/source/tutorial/example_multi_turn.md)
|
|
414
|
+
|
|
415
|
+
Tutorials for adapting Trinity-RFT to a general multi-step agentic scenario:
|
|
416
|
+
|
|
417
|
+
+ [General Multi-Step tasks](./docs/sphinx_doc/source/tutorial/example_step_wise.md)
|
|
418
|
+
+ [ReAct agent tasks](./docs/sphinx_doc/source/tutorial/example_react.md)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
Tutorials for data-related functionalities:
|
|
422
|
+
|
|
423
|
+
+ [Advanced data processing & human-in-the-loop](./docs/sphinx_doc/source/tutorial/example_data_functionalities.md)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
Tutorials for RL algorithm development/research with Trinity-RFT:
|
|
427
|
+
|
|
428
|
+
+ [RL algorithm development with Trinity-RFT](./docs/sphinx_doc/source/tutorial/example_mix_algo.md)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
Guidelines for full configurations: see [this document](./docs/sphinx_doc/source/tutorial/trinity_configs.md)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
Guidelines for developers and researchers:
|
|
435
|
+
|
|
436
|
+
+ [Build new RL scenarios](./docs/sphinx_doc/source/tutorial/trinity_programming_guide.md#workflows-for-rl-environment-developers)
|
|
437
|
+
+ [Implement new RL algorithms](./docs/sphinx_doc/source/tutorial/trinity_programming_guide.md#algorithms-for-rl-algorithm-developers)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
## Upcoming features
|
|
444
|
+
|
|
445
|
+
A tentative roadmap: [#51](https://github.com/modelscope/Trinity-RFT/issues/51)
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
## Contribution guide
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
This project is currently under active development, and we welcome contributions from the community!
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
Code style check:
|
|
456
|
+
|
|
457
|
+
```shell
|
|
458
|
+
pre-commit run --all-files
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
Unit tests:
|
|
464
|
+
|
|
465
|
+
```shell
|
|
466
|
+
python -m pytest tests
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
## Acknowledgements
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
This project is built upon many excellent open-source projects, including:
|
|
475
|
+
|
|
476
|
+
+ [verl](https://github.com/volcengine/verl) and [PyTorch's FSDP](https://pytorch.org/docs/stable/fsdp.html) for LLM training;
|
|
477
|
+
+ [vLLM](https://github.com/vllm-project/vllm) for LLM inference;
|
|
478
|
+
+ [Data-Juicer](https://github.com/modelscope/data-juicer?tab=readme-ov-file) for data processing pipelines;
|
|
479
|
+
+ [AgentScope](https://github.com/modelscope/agentscope) for agentic workflow;
|
|
480
|
+
+ [Ray](https://github.com/ray-project/ray) for distributed systems;
|
|
481
|
+
+ we have also drawn inspirations from RL frameworks like [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF), [TRL](https://github.com/huggingface/trl) and [ChatLearn](https://github.com/alibaba/ChatLearn);
|
|
482
|
+
+ ......
|
|
483
|
+
|
|
484
|
+
## Citation
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
```plain
|
|
488
|
+
@misc{trinity-rft,
|
|
489
|
+
title={Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models},
|
|
490
|
+
author={Xuchen Pan and Yanxi Chen and Yushuo Chen and Yuchang Sun and Daoyuan Chen and Wenhao Zhang and Yuexiang Xie and Yilun Huang and Yilei Zhang and Dawei Gao and Yaliang Li and Bolin Ding and Jingren Zhou},
|
|
491
|
+
year={2025},
|
|
492
|
+
eprint={2505.17826},
|
|
493
|
+
archivePrefix={arXiv},
|
|
494
|
+
primaryClass={cs.LG},
|
|
495
|
+
url={https://arxiv.org/abs/2505.17826},
|
|
496
|
+
}
|
|
497
|
+
```
|