synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (353) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1709 -243
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
  263. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  300. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  301. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  302. synth_ai/experimental/synth_oss.py +0 -446
  303. synth_ai/install_sqld.sh +0 -40
  304. synth_ai/learning/filtering.py +0 -0
  305. synth_ai/learning/offline/dpo.py +0 -0
  306. synth_ai/learning/offline/providers.py +0 -7
  307. synth_ai/learning/offline/sft.py +0 -0
  308. synth_ai/learning/offline/shared.py +0 -0
  309. synth_ai/learning/online/grpo.py +0 -0
  310. synth_ai/learning/online/irft.py +0 -0
  311. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  312. synth_ai/learning/prompts/gepa.py +0 -0
  313. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  314. synth_ai/learning/prompts/mipro.py +0 -289
  315. synth_ai/learning/prompts/random_search.py +0 -246
  316. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  317. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  318. synth_ai/rl/secrets.py +0 -19
  319. synth_ai/scripts/verify_rewards.py +0 -100
  320. synth_ai/tracing/__init__.py +0 -30
  321. synth_ai/tracing_v1/__init__.py +0 -33
  322. synth_ai/tracing_v3/turso/__init__.py +0 -25
  323. synth_ai/tracing_v3/turso/manager.py +0 -774
  324. synth_ai/zyk/__init__.py +0 -30
  325. synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
  326. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  329. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  330. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  332. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  333. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  334. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  335. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  337. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  339. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  341. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  348. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  349. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  350. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  351. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env python3
2
+ """Smoke test: Qwen3 Coder inference via the Synth prod proxy endpoint.
3
+
4
+ No CLI args. Reads SYNTH_API_KEY from env. Optional overrides via env:
5
+ - BACKEND_BASE_URL (defaults to https://agent-learning.onrender.com/api)
6
+ - MODEL (defaults to Qwen/Qwen3-Coder-30B-A3B-Instruct)
7
+ - PROMPT (defaults to a simple coding prompt)
8
+
9
+ Run:
10
+ SYNTH_API_KEY=sk_... uv run python examples/qwen_coder/infer_prod_proxy.py
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import os
17
+ from typing import Any
18
+
19
+ import httpx
20
+
21
+
22
+ def _backend_root() -> str:
23
+ raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
24
+ if raw.endswith("/api"):
25
+ raw = raw[:-4]
26
+ return raw.rstrip("/")
27
+
28
+
29
+ async def main() -> None:
30
+ api_key = os.getenv("SYNTH_API_KEY", "").strip()
31
+ if not api_key:
32
+ raise SystemExit("SYNTH_API_KEY required in environment")
33
+
34
+ model = os.getenv("MODEL", "Qwen/Qwen3-Coder-30B-A3B-Instruct")
35
+ prompt = os.getenv(
36
+ "PROMPT",
37
+ "Write a Python function to reverse a string, then show an example call.",
38
+ )
39
+
40
+ # Prod proxy endpoint
41
+ url = f"{_backend_root()}/api/inference/v1/chat/completions"
42
+
43
+ payload: dict[str, Any] = {
44
+ "model": model,
45
+ "messages": [{"role": "user", "content": prompt}],
46
+ "temperature": 0.2,
47
+ "max_tokens": 256,
48
+ "thinking_budget": 256,
49
+ }
50
+
51
+ async with httpx.AsyncClient(timeout=60.0) as http:
52
+ resp = await http.post(
53
+ url,
54
+ headers={
55
+ "Authorization": f"Bearer {api_key}",
56
+ "Content-Type": "application/json",
57
+ },
58
+ json=payload,
59
+ )
60
+ resp.raise_for_status()
61
+ data = resp.json()
62
+ # Print assistant content (compact)
63
+ try:
64
+ msg = data.get("choices", [{}])[0].get("message", {})
65
+ print(msg.get("content") or data)
66
+ except Exception:
67
+ print(data)
68
+
69
+
70
+ if __name__ == "__main__":
71
+ asyncio.run(main())
72
+
73
+
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ """One-shot inference for Qwen3 (and Coder) models via the Synth backend proxy.
3
+
4
+ Usage examples:
5
+
6
+ SYNTH_API_KEY=sk_... BACKEND_BASE_URL=https://agent-learning.onrender.com/api \
7
+ uv run python examples/qwen_coder/infer_via_synth.py \
8
+ --model Qwen/Qwen3-Coder-30B-A3B-Instruct \
9
+ --prompt "Write a Python function to reverse a string." \
10
+ --max-tokens 128 --temperature 0.2
11
+
12
+ Optionally you can point to a specific inference host (e.g., your vLLM or task-app proxy):
13
+
14
+ ... infer_via_synth.py --inference-url https://your-host/api/inference
15
+
16
+ The script defaults the backend base URL to the hosted service if BACKEND_BASE_URL is not set.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import asyncio
23
+ import os
24
+ from typing import Any
25
+
26
+ from synth_ai.inference.client import InferenceClient
27
+
28
+
29
+ def _default_backend() -> str:
30
+ raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
31
+ return raw if raw.endswith("/api") else (raw + "/api")
32
+
33
+
34
+ async def main() -> None:
35
+ p = argparse.ArgumentParser(description=__doc__)
36
+ p.add_argument(
37
+ "--model",
38
+ default=os.getenv("MODEL", "Qwen/Qwen3-Coder-30B-A3B-Instruct"),
39
+ help="Base or ft:<id> model identifier",
40
+ )
41
+ p.add_argument(
42
+ "--prompt",
43
+ default="Write a Python function to reverse a string.",
44
+ help="User prompt text",
45
+ )
46
+ p.add_argument("--max-tokens", type=int, default=256)
47
+ p.add_argument("--temperature", type=float, default=0.2)
48
+ p.add_argument(
49
+ "--inference-url",
50
+ default=os.getenv("INFERENCE_URL"),
51
+ help="Optional backend inference base (e.g., https://host/api/inference)",
52
+ )
53
+ p.add_argument(
54
+ "--timeout", type=float, default=60.0, help="HTTP timeout seconds for backend calls"
55
+ )
56
+ args = p.parse_args()
57
+
58
+ backend = _default_backend()
59
+ api_key = os.getenv("SYNTH_API_KEY", "").strip()
60
+ if not api_key:
61
+ raise SystemExit("SYNTH_API_KEY required (export it or pass via env-file to uvx)")
62
+
63
+ client = InferenceClient(base_url=backend, api_key=api_key, timeout=args.timeout)
64
+
65
+ body: dict[str, Any] = {
66
+ "model": args.model,
67
+ "messages": [{"role": "user", "content": args.prompt}],
68
+ "max_tokens": int(args.max_tokens),
69
+ "temperature": float(args.temperature),
70
+ }
71
+ if args.inference_url:
72
+ # Backend supports forwarding to a specific host when provided
73
+ body["inference_url"] = str(args.inference_url)
74
+
75
+ resp = await client.create_chat_completion(**body)
76
+ try:
77
+ msg = resp.get("choices", [{}])[0].get("message", {})
78
+ content = msg.get("content")
79
+ print(content or resp)
80
+ except Exception:
81
+ print(resp)
82
+
83
+
84
+ if __name__ == "__main__":
85
+ asyncio.run(main())
86
+
87
+
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Optional: pass a .env path as first arg; otherwise relies on current env
5
+ ENV_FILE=${1:-}
6
+
7
+ if [[ -n "${ENV_FILE}" ]]; then
8
+ if [[ ! -f "${ENV_FILE}" ]]; then
9
+ echo "Env file not found: ${ENV_FILE}" >&2
10
+ exit 1
11
+ fi
12
+ set -a; source "${ENV_FILE}"; set +a
13
+ fi
14
+
15
+ # Use prod proxy smoke (base or ft:... via MODEL env)
16
+ uv run python examples/qwen_coder/infer_prod_proxy.py
17
+
18
+
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Optional: pass a .env path as first arg; otherwise relies on current env
5
+ ENV_FILE=${1:-}
6
+
7
+ if [[ -n "${ENV_FILE}" ]]; then
8
+ if [[ ! -f "${ENV_FILE}" ]]; then
9
+ echo "Env file not found: ${ENV_FILE}" >&2
10
+ exit 1
11
+ fi
12
+ set -a; source "${ENV_FILE}"; set +a
13
+ fi
14
+
15
+ uvx synth-ai train \
16
+ --type sft \
17
+ --config examples/qwen_coder/configs/coder_lora_30b.toml \
18
+ --dataset examples/qwen_coder/ft_data/coder_sft.small.jsonl \
19
+ --env-file "${ENV_FILE:-}"
20
+
21
+
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ """Submit a full-parameter SFT job for Qwen/Qwen3-1.7B via Synth API."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import asyncio
7
+ import os
8
+ from typing import Any
9
+
10
+ from examples.qwen_coder._shared import (
11
+ ensure_tiny_dataset,
12
+ optional_validation_dataset,
13
+ resolve_output_path,
14
+ )
15
+ from synth_ai.learning.client import LearningClient
16
+
17
+
18
+ def _backend() -> str:
19
+ raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
20
+ return raw if raw.endswith("/api") else (raw + "/api")
21
+
22
+
23
+ async def main() -> None:
24
+ api_key = os.getenv("SYNTH_API_KEY", "").strip()
25
+ if not api_key:
26
+ raise SystemExit("SYNTH_API_KEY required in env")
27
+
28
+ backend = _backend()
29
+ client = LearningClient(base_url=backend, api_key=api_key, timeout=60.0)
30
+
31
+ data_path = ensure_tiny_dataset()
32
+ file_id = await client.upload_training_file(str(data_path))
33
+
34
+ validation_file_id: str | None = None
35
+ val_path = optional_validation_dataset()
36
+ if val_path and val_path.exists():
37
+ validation_file_id = await client.upload_training_file(str(val_path))
38
+
39
+ hyper: dict[str, Any] = {
40
+ "n_epochs": int(os.getenv("QWEN_CODER_FULL_EPOCHS", "1")),
41
+ "per_device_batch": int(os.getenv("QWEN_CODER_FULL_PER_DEVICE", "1")),
42
+ "gradient_accumulation_steps": int(os.getenv("QWEN_CODER_FULL_ACCUM", "8")),
43
+ "sequence_length": int(os.getenv("QWEN_CODER_FULL_SEQ_LEN", "4096")),
44
+ "learning_rate": float(os.getenv("QWEN_CODER_FULL_LR", "2e-5")),
45
+ "warmup_ratio": float(os.getenv("QWEN_CODER_FULL_WARMUP", "0.05")),
46
+ "train_kind": os.getenv("QWEN_CODER_FULL_TRAIN_KIND", "full"),
47
+ }
48
+
49
+ metadata = {
50
+ "example": "qwen_coder_full_17b",
51
+ "effective_config": {
52
+ "compute": {
53
+ "gpu_type": os.getenv("SYNTH_GPU_TYPE", "H100"),
54
+ "gpu_count": int(os.getenv("SYNTH_GPU_COUNT", "4")),
55
+ "nodes": int(os.getenv("SYNTH_GPU_NODES", "1")),
56
+ }
57
+ },
58
+ }
59
+
60
+ job = await client.create_job(
61
+ training_type="sft_offline",
62
+ model=os.getenv("QWEN_CODER_FULL_MODEL", "Qwen/Qwen3-1.7B"),
63
+ training_file_id=file_id,
64
+ hyperparameters=hyper,
65
+ metadata=metadata,
66
+ validation_file=validation_file_id,
67
+ )
68
+ job_id = str(job.get("id") or job.get("job_id") or "").strip()
69
+ if not job_id:
70
+ raise SystemExit(f"Invalid create_job response: {job}")
71
+
72
+ await client.start_job(job_id)
73
+
74
+ timeout_seconds = float(os.getenv("SYNTH_TIMEOUT", "7200"))
75
+ poll_interval = float(os.getenv("QWEN_CODER_FULL_POLL_INTERVAL", "10"))
76
+
77
+ job_final = await client.poll_until_terminal(
78
+ job_id,
79
+ interval_seconds=poll_interval,
80
+ max_seconds=timeout_seconds,
81
+ )
82
+
83
+ status = str(job_final.get("status"))
84
+ print(f"Job status: {status}")
85
+ result_model = (
86
+ job_final.get("result", {}).get("model_id")
87
+ if isinstance(job_final.get("result"), dict)
88
+ else None
89
+ )
90
+ print(f"Model ID: {result_model}")
91
+ try:
92
+ out_file = resolve_output_path("ft_model_id_full.txt")
93
+ text = (result_model or "").strip()
94
+ if text:
95
+ out_file.write_text(text + "\n", encoding="utf-8")
96
+ print(f"Wrote {out_file} with ft model id")
97
+ except Exception as exc:
98
+ print(f"Warning: failed to write ft_model_id_full.txt: {exc}")
99
+
100
+
101
+ if __name__ == "__main__":
102
+ asyncio.run(main())
103
+
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ """Submit a LoRA SFT job for Qwen/Qwen3-Coder-30B-A3B-Instruct via Synth API.
3
+
4
+ Steps:
5
+ - Generate a tiny coder dataset if missing
6
+ - Upload the JSONL
7
+ - Create the job with coder LoRA hyperparameters
8
+ - Start and poll until terminal, then print the resulting model id
9
+
10
+ Env:
11
+ SYNTH_API_KEY (required)
12
+ BACKEND_BASE_URL (defaults to https://agent-learning.onrender.com/api)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import os
19
+ from typing import Any
20
+
21
+ from examples.qwen_coder._shared import (
22
+ ensure_tiny_dataset,
23
+ optional_validation_dataset,
24
+ resolve_output_path,
25
+ )
26
+ from synth_ai.learning.client import LearningClient
27
+
28
+
29
+ def _backend() -> str:
30
+ raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
31
+ return raw if raw.endswith("/api") else (raw + "/api")
32
+
33
+
34
+ async def main() -> None:
35
+ api_key = os.getenv("SYNTH_API_KEY", "").strip()
36
+ if not api_key:
37
+ raise SystemExit("SYNTH_API_KEY required in env")
38
+
39
+ backend = _backend()
40
+ client = LearningClient(base_url=backend, api_key=api_key, timeout=60.0)
41
+
42
+ # Ensure dataset exists
43
+ data_path = ensure_tiny_dataset()
44
+
45
+ # Upload training file
46
+ file_id = await client.upload_training_file(str(data_path))
47
+
48
+ # Optional validation file if present alongside training set
49
+ val_path = optional_validation_dataset()
50
+ validation_file_id: str | None = None
51
+ if val_path and val_path.exists():
52
+ validation_file_id = await client.upload_training_file(str(val_path))
53
+
54
+ # Minimal hyperparameters for LoRA SFT (aligned with coder_lora_30b.toml)
55
+ hyper: dict[str, Any] = {
56
+ "n_epochs": 1,
57
+ "per_device_batch": 1,
58
+ "gradient_accumulation_steps": 64,
59
+ "sequence_length": 4096,
60
+ "learning_rate": 5e-6,
61
+ "warmup_ratio": 0.03,
62
+ "train_kind": "peft",
63
+ }
64
+
65
+ # Create job
66
+ job = await client.create_job(
67
+ training_type="sft_offline",
68
+ model="Qwen/Qwen3-Coder-30B-A3B-Instruct",
69
+ training_file_id=file_id,
70
+ hyperparameters=hyper,
71
+ metadata={
72
+ "example": "qwen_coder_lora_30b",
73
+ # Include effective compute hints for backend routing/validation
74
+ "effective_config": {
75
+ "compute": {"gpu_type": "H100", "gpu_count": 4, "nodes": 1}
76
+ },
77
+ },
78
+ validation_file=validation_file_id,
79
+ )
80
+ job_id = str(job.get("id"))
81
+ if not job_id:
82
+ raise SystemExit(f"Invalid create_job response: {job}")
83
+
84
+ # Start
85
+ await client.start_job(job_id)
86
+
87
+ # Poll until terminal
88
+ job_final = await client.poll_until_terminal(job_id, interval_seconds=5.0, max_seconds=7200)
89
+ status = str(job_final.get("status"))
90
+ print(f"Job status: {status}")
91
+ # Print resulting model id if available and write to ft_data/ft_model_id.txt
92
+ result_model = (
93
+ job_final.get("result", {}).get("model_id")
94
+ if isinstance(job_final.get("result"), dict)
95
+ else None
96
+ )
97
+ print(f"Model ID: {result_model}")
98
+ try:
99
+ out_file = resolve_output_path("ft_model_id.txt")
100
+ text = (result_model or "").strip()
101
+ if text:
102
+ out_file.write_text(text + "\n", encoding="utf-8")
103
+ print(f"Wrote {out_file} with ft model id")
104
+ except Exception as exc:
105
+ # Best-effort write; don't crash if filesystem issues
106
+ print(f"Warning: failed to write ft_model_id.txt: {exc}")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ asyncio.run(main())
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ """Create a capped subset of a JSONL dataset for quick runs."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ from pathlib import Path
8
+
9
+
10
+ def main() -> None:
11
+ p = argparse.ArgumentParser(description=__doc__)
12
+ p.add_argument("src", help="Source JSONL path")
13
+ p.add_argument("dst", help="Destination JSONL path")
14
+ p.add_argument("--n", type=int, default=200, help="Max examples to keep")
15
+ args = p.parse_args()
16
+
17
+ src = Path(args.src)
18
+ if not src.exists():
19
+ raise SystemExit(f"No such file: {src}")
20
+ dst = Path(args.dst)
21
+ dst.parent.mkdir(parents=True, exist_ok=True)
22
+
23
+ kept = 0
24
+ with src.open("r", encoding="utf-8") as fin, dst.open("w", encoding="utf-8") as fout:
25
+ for line in fin:
26
+ if kept >= args.n:
27
+ break
28
+ if not line.strip():
29
+ continue
30
+ fout.write(line)
31
+ kept += 1
32
+ print(f"Wrote {dst} with {kept} lines")
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
37
+
38
+
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env python3
2
+ """Validate that a JSONL file parses and contains chat-like records.
3
+
4
+ Checks first N lines (default 50) for objects with `messages` including an
5
+ assistant response (role == "assistant").
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import json
12
+ from pathlib import Path
13
+
14
+
15
+ def main() -> None:
16
+ p = argparse.ArgumentParser(description=__doc__)
17
+ p.add_argument("path", help="Path to JSONL file")
18
+ p.add_argument("--n", type=int, default=50, help="Number of lines to sample")
19
+ args = p.parse_args()
20
+
21
+ src = Path(args.path)
22
+ if not src.exists():
23
+ raise SystemExit(f"No such file: {src}")
24
+
25
+ checked = 0
26
+ ok = 0
27
+ with src.open("r", encoding="utf-8") as f:
28
+ for i, line in enumerate(f, start=1):
29
+ if i > args.n:
30
+ break
31
+ line = line.strip()
32
+ if not line:
33
+ continue
34
+ checked += 1
35
+ try:
36
+ obj = json.loads(line)
37
+ except Exception as exc:
38
+ raise SystemExit(f"Line {i} is not valid JSON: {exc}")
39
+ msgs = obj.get("messages") if isinstance(obj, dict) else None
40
+ if not isinstance(msgs, list):
41
+ raise SystemExit(f"Line {i} missing 'messages' list")
42
+ has_assistant = any(
43
+ isinstance(m, dict) and m.get("role") == "assistant" and m.get("content")
44
+ for m in msgs
45
+ )
46
+ if has_assistant:
47
+ ok += 1
48
+
49
+ if checked == 0:
50
+ raise SystemExit("No lines checked; file empty?")
51
+ if ok == 0:
52
+ raise SystemExit("No assistant messages found in sampled lines")
53
+ print(f"Validated: {ok}/{checked} sampled lines contain assistant messages")
54
+
55
+
56
+ if __name__ == "__main__":
57
+ main()
58
+
59
+
@@ -6,7 +6,7 @@ num_episodes = 50
6
6
  seed_start = 0
7
7
 
8
8
  [policy]
9
- inference_url = "http://localhost:8000/api/inference"
9
+ inference_url = "https://agent-learning.onrender.com/api/inference"
10
10
  max_tokens = 128
11
11
  temperature = 0.0
12
12
 
@@ -11,7 +11,7 @@ base = "Qwen/Qwen3-1.7B"
11
11
 
12
12
  [policy]
13
13
  model = "Qwen/Qwen3-1.7B"
14
- inference_url = "http://localhost:8000/api/inference"
14
+ inference_url = "https://agent-learning.onrender.com/api/inference"
15
15
  max_tokens = 1028
16
16
  temperature = 0.2
17
17
 
@@ -20,10 +20,12 @@ def extract_examples(dataset: Any, *, limit: int | None) -> list[dict[str, str]]
20
20
  solution = item.get("solution") or ""
21
21
  if isinstance(solution, list):
22
22
  solution = "\n".join(str(part) for part in solution)
23
- examples.append({
24
- "problem": problem,
25
- "solution": solution,
26
- })
23
+ examples.append(
24
+ {
25
+ "problem": problem,
26
+ "solution": solution,
27
+ }
28
+ )
27
29
  return examples
28
30
 
29
31
 
@@ -35,12 +37,26 @@ def write_jsonl(path: Path, rows: list[dict[str, str]]) -> None:
35
37
 
36
38
 
37
39
  def main() -> None:
38
- parser = argparse.ArgumentParser(description="Download MATH dataset splits to JSONL for offline use")
39
- parser.add_argument("--output-dir", default="examples/rl/data", help="Directory to write <split>.jsonl files")
40
- parser.add_argument("--dataset", default="nlile/hendrycks-MATH-benchmark", help="Hugging Face dataset identifier")
41
- parser.add_argument("--config", default="algebra", help="Hugging Face dataset config (if required)")
42
- parser.add_argument("--splits", nargs="*", default=["train", "validation", "test"], help="Splits to download")
43
- parser.add_argument("--limit", type=int, default=None, help="Optional cap on examples per split")
40
+ parser = argparse.ArgumentParser(
41
+ description="Download MATH dataset splits to JSONL for offline use"
42
+ )
43
+ parser.add_argument(
44
+ "--output-dir", default="examples/rl/data", help="Directory to write <split>.jsonl files"
45
+ )
46
+ parser.add_argument(
47
+ "--dataset",
48
+ default="nlile/hendrycks-MATH-benchmark",
49
+ help="Hugging Face dataset identifier",
50
+ )
51
+ parser.add_argument(
52
+ "--config", default="algebra", help="Hugging Face dataset config (if required)"
53
+ )
54
+ parser.add_argument(
55
+ "--splits", nargs="*", default=["train", "validation", "test"], help="Splits to download"
56
+ )
57
+ parser.add_argument(
58
+ "--limit", type=int, default=None, help="Optional cap on examples per split"
59
+ )
44
60
  args = parser.parse_args()
45
61
 
46
62
  output_dir = Path(args.output_dir).expanduser()