synth-ai 0.2.9.dev2__py3-none-any.whl → 0.2.9.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (112) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +58 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  98. synth_ai/api/train/config_finder.py +18 -18
  99. synth_ai/api/train/env_resolver.py +28 -1
  100. synth_ai/cli/task_apps.py +264 -55
  101. synth_ai/demo_registry.py +7 -7
  102. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  103. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +54 -0
  104. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  105. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +165 -0
  106. synth_ai/task/apps/__init__.py +54 -13
  107. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/METADATA +1 -1
  108. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/RECORD +112 -13
  109. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/top_level.txt +1 -0
  110. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/WHEEL +0 -0
  111. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/entry_points.txt +0 -0
  112. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/licenses/LICENSE +0 -0
examples/rl/README.md ADDED
@@ -0,0 +1,169 @@
1
+ # Math RL Demo (Single Step)
2
+
3
+ This example trains a reinforcement learning policy on single-step math problems sourced from the [EleutherAI/math](https://huggingface.co/datasets/EleutherAI/math) dataset. Episodes consist of a single tool call: the model must emit a `math_submit` function call whose `answer` field contains the final solution. Missing or malformed tool calls receive negative reward; correct answers earn positive reward.
4
+
5
+ ## Quick Commands
6
+
7
+ ```bash
8
+ # Serve locally with tracing
9
+ uvx synth-ai serve math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
10
+
11
+ # Modal deployment
12
+ uvx synth-ai deploy --name synth-math-single-step --env-file examples/rl/.env
13
+
14
+ # Evaluate base Qwen policy (validation split)
15
+ uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
16
+
17
+ # Launch RL job from base model
18
+ uvx synth-ai train --type rl --config examples/rl/configs/rl_from_base_qwen.toml
19
+
20
+ # Evaluate RL checkpoint on held-out test split
21
+ uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_rl_qwen.toml
22
+ ```
23
+
24
+ ## 1. Prerequisites
25
+
26
+ - Python 3.11+
27
+ - `uv`/`uvx`
28
+ - Modal CLI (`modal token new`) for deployment
29
+ - `.env` at `examples/rl/.env` containing at least:
30
+ - `SYNTH_API_KEY`
31
+ - `ENVIRONMENT_API_KEY`
32
+ - Optional: `TASK_APP_URL` (Modal URL), `GROQ_API_KEY`, `OPENAI_API_KEY`
33
+
34
+ Run `uvx synth-ai setup` to populate the `.env` if you have not paired the SDK before.
35
+
36
+ ## 2. Task App
37
+
38
+ The task app is defined in `synth_ai/task/apps/math_single_step.py` and registered as `math-single-step`. It loads problems from the Hugging Face dataset (configurable via `MATH_DATASET_*` env vars) and manages per-episode state with an in-memory environment manager.
39
+
40
+ - **Observation**: single math problem (string) plus dataset metadata.
41
+ - **Actions**: exactly one `math_submit` tool call with an `answer` string.
42
+ - **Rewards**:
43
+ - `+1.0` for correct answer
44
+ - `0.0` for incorrect answer
45
+ - `-0.5` if the tool call omits an answer or uses the wrong tool
46
+ - `-1.0` when no tool call is provided
47
+
48
+ Serve locally with tracing to capture trajectories:
49
+
50
+ ```bash
51
+ uvx synth-ai serve math-single-step \
52
+ --port 8101 \
53
+ --env-file examples/rl/.env \
54
+ --trace traces/math \
55
+ --trace-db traces/math/synth_ai.db
56
+ ```
57
+
58
+ Deploy or serve on Modal using the same env file; the registration includes a `ModalDeploymentConfig` that installs the `datasets` package automatically.
59
+
60
+ ## 3. Evaluation
61
+
62
+ `examples/rl/run_eval.py` evaluates a policy by sampling deterministic seeds from the dataset splits. TOML configuration controls the model, split, and number of episodes. Example config (`eval_base_qwen.toml`):
63
+
64
+ ```toml
65
+ provider = "synth"
66
+ task_app_url = "http://localhost:8101"
67
+ model = "Qwen/Qwen3-4B"
68
+ split = "validation"
69
+ num_episodes = 50
70
+ seed_start = 0
71
+
72
+ [policy]
73
+ inference_url = "http://localhost:8000/api/inference"
74
+ max_tokens = 128
75
+ temperature = 0.0
76
+ # Optional: override headers for inference requests
77
+ # [policy.extra_headers]
78
+ # Authorization = "Bearer ..."
79
+ ```
80
+
81
+ The `[policy]` table maps directly to the inference payload; add `[policy.headers]` if you need to forward custom HTTP headers (e.g., `Authorization`). If `SYNTH_API_KEY` is present, the evaluator automatically sends `Authorization: Bearer <key>`.
82
+
83
+ Set `--use-rollout` to exercise the server-side rollout endpoint instead of the per-step API.
84
+
85
+ The script reports accuracy and a breakdown of failure modes (`missing_tool_call`, `blank_answer`, etc.).
86
+
87
+ ## 4. RL Training
88
+
89
+ Example RL config (`configs/rl_from_base_qwen.toml`):
90
+
91
+ ```toml
92
+ [services]
93
+ task_url = "https://your-app.modal.run"
94
+
95
+ [model]
96
+ base = "Qwen/Qwen3-4B"
97
+
98
+ [data]
99
+ split = "train"
100
+ seed_start = 0
101
+ episodes_per_iteration = 2048
102
+
103
+ [training]
104
+ max_turns = 1
105
+ ops = ["agent", "env"]
106
+ batch_size = 128
107
+ group_size = 1024
108
+ reward_positive = 1.0
109
+ reward_negative_no_tool = -1.0
110
+ reward_negative_no_answer = -0.5
111
+
112
+ [policy]
113
+ model = "Qwen/Qwen3-4B"
114
+ inference_url = "https://your-inference-host"
115
+ max_tokens = 128
116
+ temperature = 0.0
117
+
118
+ [tags]
119
+ experiment = "math_single_step"
120
+ ```
121
+
122
+ Submit jobs interactively with:
123
+
124
+ ```bash
125
+ uvx synth-ai train --type rl --config examples/rl/configs/rl_from_base_qwen.toml
126
+ ```
127
+
128
+ The CLI ensures the task app is reachable (`/health`, `/task_info`), prompts for missing secrets, and polls job status until completion. For scripted automation, use `run_rl_and_save.py`:
129
+
130
+ ```bash
131
+ uv run python examples/rl/run_rl_and_save.py \
132
+ --config examples/rl/configs/rl_from_base_qwen.toml \
133
+ --backend https://backend.synth.ai/api
134
+ ```
135
+
136
+ ## 5. Evaluating RL Outputs
137
+
138
+ After training completes, set `model = "rl:<job_or_model_id>"` in `configs/eval_rl_qwen.toml` (and update `split = "test"` for a held-out set). Re-run `run_eval.py` to compare:
139
+
140
+ ```bash
141
+ uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_rl_qwen.toml
142
+ ```
143
+
144
+ Record both validation (pre-RL) and test (post-RL) accuracy to quantify improvements.
145
+
146
+ ## 6. Dataset Notes
147
+
148
+ - By default the task app loads the [Hendrycks MATH benchmark](https://huggingface.co/datasets/nlile/hendrycks-MATH-benchmark). Override via `MATH_DATASET_NAME` / `MATH_DATASET_CONFIG` env vars if you want a different variant. The dataset is public and automatically downloaded when the task app starts; the server will fail fast with a clear error if it cannot be fetched.
149
+ - For offline use, run `uv run python examples/rl/download_dataset.py --output-dir examples/rl/data --dataset nlile/hendrycks-MATH-benchmark --config algebra --limit 2000`. Then start the task app with `MATH_DATASET_LOCAL_DIR=examples/rl/data` (or set `MATH_DATASET_LOCAL_<SPLIT>_FILE`).
150
+ - Hugging Face downloads occur at runtime; pre-fetch locally or mount a Modal volume if you need offline access.
151
+ - Hugging Face downloads occur at runtime; pre-fetch locally or mount a Modal volume if you need offline access.
152
+ - Seeds map directly to dataset indices. Use `seed_start` to control determinism in configs and evaluations.
153
+
154
+ ## 7. Additional Utilities
155
+
156
+ - `examples/rl/task_app/math_task_app.py` – legacy runner (`python .../math_task_app.py --reload`).
157
+ - `examples/rl/run_eval.py` – CLI evaluation helper (supports proxying Groq or hitting arbitrary inference URLs).
158
+ - `examples/rl/run_rl_and_save.py` – thin wrapper around the Synth `/rl/jobs` API.
159
+
160
+ For broader background on Synth task apps, CLI commands, and tracing, see the new documentation under `docs/`.
161
+
162
+
163
+
164
+ uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
165
+ uvx synth-ai serve math-single-step \
166
+ --port 8101 \
167
+ --env-file examples/rl/.env \
168
+ --trace traces/math \
169
+ --force
@@ -0,0 +1,15 @@
1
+ provider = "synth"
2
+ task_app_url = "http://localhost:8101"
3
+ model = "Qwen/Qwen3-1.7B"
4
+ split = "validation"
5
+ num_episodes = 50
6
+ seed_start = 0
7
+
8
+ [policy]
9
+ inference_url = "http://localhost:8000/api/inference"
10
+ max_tokens = 128
11
+ temperature = 0.0
12
+
13
+ # Optionally supply custom headers
14
+ # [policy.headers]
15
+ # Authorization = "Bearer ..."
@@ -0,0 +1,11 @@
1
+ provider = "synth"
2
+ task_app_url = "https://your-math-task.modal.run"
3
+ model = "rl:REPLACE_WITH_JOB_ID"
4
+ split = "test"
5
+ num_episodes = 200
6
+ seed_start = 100000
7
+
8
+ [policy]
9
+ inference_url = "https://your-inference-host"
10
+ max_tokens = 128
11
+ temperature = 0.0
@@ -0,0 +1,35 @@
1
+ [services]
2
+ task_url = "https://your-math-task.modal.run"
3
+
4
+ [model]
5
+ base = "Qwen/Qwen3-4B"
6
+
7
+ [policy]
8
+ model = "Qwen/Qwen3-4B"
9
+ inference_url = "https://your-inference-host"
10
+ max_tokens = 128
11
+ temperature = 0.0
12
+
13
+ [data]
14
+ split = "train"
15
+ seed_start = 0
16
+ episodes_per_iteration = 2048
17
+ evaluation_split = "validation"
18
+ evaluation_episodes = 256
19
+
20
+ [training]
21
+ max_turns = 1
22
+ ops = ["agent", "env"]
23
+ batch_size = 128
24
+ group_size = 1024
25
+ reward_positive = 1.0
26
+ reward_negative_no_tool = -1.0
27
+ reward_negative_no_answer = -0.5
28
+ learning_rate = 5e-6
29
+
30
+ [compute]
31
+ gpu_type = "A10G"
32
+ gpu_count = 4
33
+
34
+ [tags]
35
+ experiment = "math_single_step"
@@ -0,0 +1,74 @@
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
5
+
6
+ [services]
7
+ task_url = "http://localhost:8101"
8
+
9
+ [model]
10
+ base = "Qwen/Qwen3-1.7B"
11
+
12
+ [policy]
13
+ model = "Qwen/Qwen3-1.7B"
14
+ inference_url = "http://localhost:8000/api/inference"
15
+ max_tokens = 1028
16
+ temperature = 0.2
17
+
18
+ [data]
19
+ split = "train"
20
+ seed_start = 0
21
+ episodes_per_iteration = 1280 # 8 per group * 4 groups per batch * 2 batches per step * 20 steps
22
+ evaluation_split = "validation"
23
+ evaluation_episodes = 50
24
+
25
+ [training]
26
+ max_turns = 1
27
+ ops = ["agent", "env"]
28
+ batch_size = 2
29
+ group_size = 16
30
+ reward_positive = 1.0
31
+ reward_negative_no_tool = -1.0
32
+ reward_negative_no_answer = -0.5
33
+ learning_rate = 5e-6
34
+ log_interval = 1
35
+ weight_sync_interval = 1
36
+
37
+ [training.weight_sync]
38
+ enable = true
39
+ targets = ["policy"]
40
+
41
+ [compute]
42
+ gpu_type = "H100"
43
+ gpu_count = 4
44
+
45
+ [topology]
46
+ type = "single_node_split"
47
+ gpus_for_vllm = 2
48
+ gpus_for_training = 1
49
+ gpus_for_ref = 1
50
+ tensor_parallel = 1
51
+
52
+ [vllm]
53
+ tensor_parallel_size = 1
54
+ max_model_len = 4096
55
+
56
+ [reference]
57
+ placement = "dedicated"
58
+ port = 8002
59
+ tp = 1
60
+ health_max_wait_s = 180
61
+ health_interval_ms = 300
62
+
63
+ [rollout]
64
+ policy_name = "math-single-step"
65
+ max_turns = 1
66
+ episodes_per_batch = 32 # group_size * batch_size
67
+
68
+ [evaluation]
69
+ instances = 32
70
+ every_n_iters = 10
71
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
72
+
73
+ [tags]
74
+ experiment = "math_single_step_qwen17"
@@ -0,0 +1,35 @@
1
+ [services]
2
+ task_url = "https://your-math-task.modal.run"
3
+
4
+ [model]
5
+ source = "ft:REPLACE_WITH_MODEL_ID"
6
+
7
+ [policy]
8
+ model = "ft:REPLACE_WITH_MODEL_ID"
9
+ inference_url = "https://your-inference-host"
10
+ max_tokens = 128
11
+ temperature = 0.0
12
+
13
+ [data]
14
+ split = "train"
15
+ seed_start = 0
16
+ episodes_per_iteration = 2048
17
+ evaluation_split = "validation"
18
+ evaluation_episodes = 256
19
+
20
+ [training]
21
+ max_turns = 1
22
+ ops = ["agent", "env"]
23
+ batch_size = 128
24
+ group_size = 1024
25
+ reward_positive = 1.0
26
+ reward_negative_no_tool = -1.0
27
+ reward_negative_no_answer = -0.5
28
+ learning_rate = 5e-6
29
+
30
+ [compute]
31
+ gpu_type = "A10G"
32
+ gpu_count = 4
33
+
34
+ [tags]
35
+ experiment = "math_single_step_from_fft"
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env python3
2
+ """Download subsets of the MATH dataset to local JSONL files."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from datasets import load_dataset
12
+
13
+
14
+ def extract_examples(dataset: Any, *, limit: int | None) -> list[dict[str, str]]:
15
+ if limit is not None:
16
+ dataset = dataset.select(range(min(limit, len(dataset))))
17
+ examples: list[dict[str, str]] = []
18
+ for item in dataset:
19
+ problem = (item.get("problem") or "").strip()
20
+ solution = item.get("solution") or ""
21
+ if isinstance(solution, list):
22
+ solution = "\n".join(str(part) for part in solution)
23
+ examples.append({
24
+ "problem": problem,
25
+ "solution": solution,
26
+ })
27
+ return examples
28
+
29
+
30
+ def write_jsonl(path: Path, rows: list[dict[str, str]]) -> None:
31
+ path.parent.mkdir(parents=True, exist_ok=True)
32
+ with path.open("w", encoding="utf-8") as fh:
33
+ for row in rows:
34
+ fh.write(json.dumps(row, ensure_ascii=False) + "\n")
35
+
36
+
37
+ def main() -> None:
38
+ parser = argparse.ArgumentParser(description="Download MATH dataset splits to JSONL for offline use")
39
+ parser.add_argument("--output-dir", default="examples/rl/data", help="Directory to write <split>.jsonl files")
40
+ parser.add_argument("--dataset", default="nlile/hendrycks-MATH-benchmark", help="Hugging Face dataset identifier")
41
+ parser.add_argument("--config", default="algebra", help="Hugging Face dataset config (if required)")
42
+ parser.add_argument("--splits", nargs="*", default=["train", "validation", "test"], help="Splits to download")
43
+ parser.add_argument("--limit", type=int, default=None, help="Optional cap on examples per split")
44
+ args = parser.parse_args()
45
+
46
+ output_dir = Path(args.output_dir).expanduser()
47
+ output_dir.mkdir(parents=True, exist_ok=True)
48
+
49
+ for split in args.splits:
50
+ print(f"[INFO] Downloading {args.dataset} ({args.config}) split={split}")
51
+ if args.config:
52
+ dataset = load_dataset(args.dataset, args.config, split=split)
53
+ else:
54
+ dataset = load_dataset(args.dataset, split=split)
55
+ rows = extract_examples(dataset, limit=args.limit)
56
+ out_path = output_dir / f"{split}.jsonl"
57
+ write_jsonl(out_path, rows)
58
+ print(f"[INFO] Wrote {len(rows)} examples to {out_path}")
59
+
60
+ print("Done. Set MATH_DATASET_LOCAL_DIR to the output directory when serving the task app.")
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()