synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (349) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
  4. examples/multi_step/crafter_rl_lora.md +29 -0
  5. examples/qwen_coder/README.md +102 -0
  6. examples/qwen_coder/_shared.py +113 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  8. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  9. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  10. examples/qwen_coder/generate_dataset.py +98 -0
  11. examples/qwen_coder/infer_ft_smoke.py +65 -0
  12. examples/qwen_coder/infer_prod_proxy.py +73 -0
  13. examples/qwen_coder/infer_via_synth.py +87 -0
  14. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  15. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  16. examples/qwen_coder/sft_full_17b.py +103 -0
  17. examples/qwen_coder/sft_lora_30b.py +110 -0
  18. examples/qwen_coder/subset_jsonl.py +39 -0
  19. examples/qwen_coder/todos.md +38 -0
  20. examples/qwen_coder/validate_jsonl.py +60 -0
  21. examples/rl/configs/eval_base_qwen.toml +1 -1
  22. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  23. examples/rl/download_dataset.py +26 -10
  24. examples/rl/run_eval.py +53 -52
  25. examples/rl/run_rl_and_save.py +29 -12
  26. examples/rl/task_app/math_single_step.py +180 -41
  27. examples/rl/task_app/math_task_app.py +14 -6
  28. examples/sft/README.md +139 -0
  29. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  30. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  31. examples/sft/evaluate.py +117 -0
  32. examples/sft/export_dataset.py +117 -0
  33. examples/sft/generate_traces.py +162 -0
  34. examples/swe/__init__.py +12 -0
  35. examples/swe/task_app/README.md +105 -0
  36. examples/swe/task_app/__init__.py +2 -0
  37. examples/swe/task_app/grpo_swe_mini.py +571 -0
  38. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  39. examples/swe/task_app/hosted/README.md +173 -0
  40. examples/swe/task_app/hosted/__init__.py +5 -0
  41. examples/swe/task_app/hosted/branching.py +143 -0
  42. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  43. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  44. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  45. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  46. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  47. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  48. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  49. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  50. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  53. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  54. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  55. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  56. examples/swe/task_app/hosted/hosted_app.py +204 -0
  57. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  58. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  59. examples/swe/task_app/hosted/main.py +100 -0
  60. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  61. examples/swe/task_app/hosted/registry.py +195 -0
  62. examples/swe/task_app/hosted/rollout.py +1869 -0
  63. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  64. examples/swe/task_app/hosted/storage/volume.py +211 -0
  65. examples/swe/task_app/hosted/test_agents.py +161 -0
  66. examples/swe/task_app/hosted/test_service.py +137 -0
  67. examples/swe/task_app/hosted/utils.py +62 -0
  68. examples/vlm/PROPOSAL.md +53 -0
  69. examples/vlm/README.md +68 -0
  70. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  71. examples/vlm/crafter_image_only_agent.py +207 -0
  72. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  73. examples/vlm/filter_image_rows.py +63 -0
  74. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  75. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  76. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  77. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  78. examples/warming_up_to_rl/groq_test.py +15 -8
  79. examples/warming_up_to_rl/manage_secrets.py +29 -25
  80. examples/warming_up_to_rl/readme.md +9 -2
  81. examples/warming_up_to_rl/run_eval.py +137 -61
  82. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  83. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  84. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  85. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  86. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  87. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  88. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  89. examples/warming_up_to_rl/task_app/README.md +6 -2
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  115. synth_ai/__init__.py +1 -0
  116. synth_ai/api/models/supported.py +376 -0
  117. synth_ai/api/train/builders.py +157 -26
  118. synth_ai/api/train/cli.py +213 -57
  119. synth_ai/api/train/config_finder.py +65 -5
  120. synth_ai/api/train/env_resolver.py +33 -15
  121. synth_ai/api/train/pollers.py +13 -4
  122. synth_ai/api/train/supported_algos.py +139 -0
  123. synth_ai/api/train/task_app.py +5 -3
  124. synth_ai/api/train/utils.py +33 -48
  125. synth_ai/cli/__init__.py +19 -4
  126. synth_ai/cli/_modal_wrapper.py +28 -0
  127. synth_ai/cli/_typer_patch.py +49 -0
  128. synth_ai/cli/balance.py +2 -3
  129. synth_ai/cli/calc.py +1 -1
  130. synth_ai/cli/demo.py +21 -6
  131. synth_ai/cli/recent.py +2 -2
  132. synth_ai/cli/rl_demo.py +77 -17
  133. synth_ai/cli/root.py +116 -39
  134. synth_ai/cli/status.py +2 -2
  135. synth_ai/cli/task_apps.py +1699 -259
  136. synth_ai/cli/traces.py +7 -4
  137. synth_ai/cli/turso.py +73 -0
  138. synth_ai/cli/watch.py +12 -18
  139. synth_ai/core/experiment.py +0 -2
  140. synth_ai/demo_registry.py +68 -31
  141. synth_ai/demos/core/cli.py +516 -194
  142. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  143. synth_ai/demos/demo_task_apps/core.py +64 -28
  144. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  145. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  146. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  147. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  148. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  149. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  150. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  151. synth_ai/environments/examples/bandit/engine.py +12 -4
  152. synth_ai/environments/examples/bandit/taskset.py +4 -4
  153. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  154. synth_ai/environments/reproducibility/tree.py +5 -6
  155. synth_ai/environments/service/app.py +11 -12
  156. synth_ai/environments/service/core_routes.py +10 -9
  157. synth_ai/environments/stateful/engine.py +1 -1
  158. synth_ai/environments/tasks/core.py +1 -0
  159. synth_ai/environments/tasks/filters.py +5 -6
  160. synth_ai/environments/tasks/utils.py +4 -5
  161. synth_ai/evals/base.py +0 -2
  162. synth_ai/handshake.py +11 -9
  163. synth_ai/http.py +1 -1
  164. synth_ai/http_client.py +43 -11
  165. synth_ai/inference/__init__.py +0 -2
  166. synth_ai/inference/client.py +20 -6
  167. synth_ai/jobs/client.py +103 -78
  168. synth_ai/learning/__init__.py +41 -6
  169. synth_ai/learning/algorithms.py +14 -0
  170. synth_ai/learning/client.py +121 -29
  171. synth_ai/learning/config.py +2 -40
  172. synth_ai/learning/constants.py +0 -2
  173. synth_ai/learning/ft_client.py +4 -56
  174. synth_ai/learning/health.py +13 -7
  175. synth_ai/learning/jobs.py +43 -47
  176. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  177. synth_ai/learning/rl/client.py +267 -0
  178. synth_ai/learning/rl/config.py +31 -0
  179. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  180. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  181. synth_ai/learning/rl/secrets.py +13 -0
  182. synth_ai/learning/rl_client.py +2 -253
  183. synth_ai/learning/sft/__init__.py +29 -0
  184. synth_ai/learning/sft/client.py +68 -0
  185. synth_ai/learning/sft/config.py +270 -0
  186. synth_ai/learning/sft/data.py +295 -0
  187. synth_ai/learning/sse.py +25 -26
  188. synth_ai/learning/validators.py +25 -24
  189. synth_ai/lm/__init__.py +21 -47
  190. synth_ai/task/__init__.py +26 -27
  191. synth_ai/task/apps/__init__.py +18 -19
  192. synth_ai/task/auth.py +35 -23
  193. synth_ai/task/client.py +15 -13
  194. synth_ai/task/contracts.py +37 -35
  195. synth_ai/task/datasets.py +9 -6
  196. synth_ai/task/errors.py +11 -10
  197. synth_ai/task/health.py +17 -11
  198. synth_ai/task/json.py +58 -24
  199. synth_ai/task/proxy.py +15 -14
  200. synth_ai/task/rubrics.py +22 -15
  201. synth_ai/task/server.py +43 -17
  202. synth_ai/task/tracing_utils.py +12 -7
  203. synth_ai/task/validators.py +0 -1
  204. synth_ai/task/vendors.py +5 -7
  205. synth_ai/tracing_v3/__init__.py +2 -0
  206. synth_ai/tracing_v3/abstractions.py +21 -4
  207. synth_ai/tracing_v3/db_config.py +26 -1
  208. synth_ai/tracing_v3/decorators.py +18 -15
  209. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  210. synth_ai/tracing_v3/hooks.py +6 -4
  211. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  212. synth_ai/tracing_v3/replica_sync.py +1 -0
  213. synth_ai/tracing_v3/session_tracer.py +63 -16
  214. synth_ai/tracing_v3/storage/base.py +89 -1
  215. synth_ai/tracing_v3/storage/config.py +21 -8
  216. synth_ai/tracing_v3/storage/factory.py +10 -8
  217. synth_ai/tracing_v3/storage/utils.py +4 -2
  218. synth_ai/tracing_v3/turso/daemon.py +7 -2
  219. synth_ai/tracing_v3/turso/models.py +5 -2
  220. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  221. synth_ai/tracing_v3/utils.py +4 -3
  222. synth_ai/v0/api/__init__.py +8 -0
  223. synth_ai/v0/api/models/__init__.py +8 -0
  224. synth_ai/v0/api/models/supported.py +8 -0
  225. synth_ai/v0/config/__init__.py +15 -0
  226. synth_ai/v0/config/base_url.py +12 -0
  227. synth_ai/v0/lm/__init__.py +51 -0
  228. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  229. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  230. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  231. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  232. synth_ai/{lm → v0/lm}/config.py +6 -1
  233. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  234. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  235. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  236. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  237. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  238. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  239. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  240. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  241. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  242. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  243. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  244. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  245. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  246. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  247. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  248. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  249. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  250. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  251. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  252. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  253. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  255. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  256. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  257. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  258. synth_ai/v0/tracing/upload.py +32 -135
  259. synth_ai/v0/tracing_v3/__init__.py +10 -0
  260. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  261. synth_ai/v0/tracing_v3/decorators.py +3 -0
  262. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  263. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  264. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
  265. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
  266. examples/common_old/backend.py +0 -21
  267. examples/evals_old/README.md +0 -98
  268. examples/evals_old/__init__.py +0 -6
  269. examples/evals_old/compare_models.py +0 -1037
  270. examples/evals_old/example_log.md +0 -145
  271. examples/evals_old/run_demo.sh +0 -126
  272. examples/evals_old/trace_analysis.py +0 -270
  273. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  274. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  275. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  276. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  277. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  278. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  279. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  280. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  281. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  282. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  283. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  284. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  285. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  286. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  287. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  288. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  289. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  290. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  291. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  292. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  293. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  294. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  295. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  296. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  297. examples/rl_old/task_app.py +0 -962
  298. synth_ai/experimental/synth_oss.py +0 -446
  299. synth_ai/install_sqld.sh +0 -40
  300. synth_ai/learning/filtering.py +0 -0
  301. synth_ai/learning/offline/dpo.py +0 -0
  302. synth_ai/learning/offline/providers.py +0 -7
  303. synth_ai/learning/offline/sft.py +0 -0
  304. synth_ai/learning/offline/shared.py +0 -0
  305. synth_ai/learning/online/grpo.py +0 -0
  306. synth_ai/learning/online/irft.py +0 -0
  307. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  308. synth_ai/learning/prompts/gepa.py +0 -0
  309. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  310. synth_ai/learning/prompts/mipro.py +0 -289
  311. synth_ai/learning/prompts/random_search.py +0 -246
  312. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  313. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  314. synth_ai/rl/secrets.py +0 -19
  315. synth_ai/scripts/verify_rewards.py +0 -100
  316. synth_ai/tracing/__init__.py +0 -30
  317. synth_ai/tracing_v1/__init__.py +0 -33
  318. synth_ai/tracing_v3/turso/__init__.py +0 -25
  319. synth_ai/tracing_v3/turso/manager.py +0 -774
  320. synth_ai/zyk/__init__.py +0 -30
  321. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  322. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  323. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  324. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  325. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  326. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  328. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  329. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  330. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  332. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  333. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  334. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  335. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  336. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  337. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  346. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
  347. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
  348. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,22 @@
1
- from __future__ import annotations
2
-
3
1
  """Task app configuration for a single-step math reasoning environment."""
4
2
 
3
+ from __future__ import annotations
4
+
5
5
  import contextlib
6
6
  import os
7
7
  import random
8
8
  import re
9
9
  import uuid
10
+ from collections.abc import Iterable, Mapping, MutableMapping, Sequence
10
11
  from dataclasses import dataclass
11
12
  from pathlib import Path
12
- from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional, Sequence, cast
13
+ from typing import Any, cast
13
14
 
14
15
  import httpx
15
16
  from datasets import load_dataset
16
17
  from fastapi import APIRouter, HTTPException, Request
17
18
  from pydantic import BaseModel, Field
18
-
19
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
19
20
  from synth_ai.task.contracts import (
20
21
  RolloutMetrics,
21
22
  RolloutRequest,
@@ -25,9 +26,9 @@ from synth_ai.task.contracts import (
25
26
  TaskInfo,
26
27
  )
27
28
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
29
+ from synth_ai.task.errors import http_exception
28
30
  from synth_ai.task.rubrics import Rubric, load_rubric
29
31
  from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
30
- from synth_ai.task.errors import http_exception
31
32
  from synth_ai.task.tracing_utils import (
32
33
  build_tracer_factory,
33
34
  resolve_sft_output_dir,
@@ -35,13 +36,14 @@ from synth_ai.task.tracing_utils import (
35
36
  tracing_env_enabled,
36
37
  )
37
38
  from synth_ai.task.vendors import normalize_vendor_keys
38
- from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
39
39
  from synth_ai.tracing_v3.session_tracer import SessionTracer
40
40
 
41
41
  REPO_ROOT = Path(__file__).resolve().parents[3]
42
42
 
43
- _modal_volume_candidate = Path(os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")).expanduser()
44
- _modal_volume_root: Optional[Path] = None
43
+ _modal_volume_candidate = Path(
44
+ os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")
45
+ ).expanduser()
46
+ _modal_volume_root: Path | None = None
45
47
  try:
46
48
  _modal_volume_candidate.mkdir(parents=True, exist_ok=True)
47
49
  _modal_volume_root = _modal_volume_candidate
@@ -55,7 +57,9 @@ if _modal_volume_root is not None:
55
57
  local_dataset_dir.mkdir(parents=True, exist_ok=True)
56
58
  os.environ.setdefault("MATH_DATASET_LOCAL_DIR", str(local_dataset_dir))
57
59
  else:
58
- hf_cache_path = Path(os.getenv("MATH_DATASET_CACHE_DIR", str(REPO_ROOT / ".cache" / "hf-datasets")) ).expanduser()
60
+ hf_cache_path = Path(
61
+ os.getenv("MATH_DATASET_CACHE_DIR", str(REPO_ROOT / ".cache" / "hf-datasets"))
62
+ ).expanduser()
59
63
 
60
64
  hf_cache_path.mkdir(parents=True, exist_ok=True)
61
65
  os.environ.setdefault("MATH_DATASET_CACHE_DIR", str(hf_cache_path))
@@ -101,7 +105,7 @@ MATH_DATASET_SPEC = TaskDatasetSpec(
101
105
  _BOXED_MARKERS: tuple[str, ...] = ("\\boxed", "boxed")
102
106
 
103
107
 
104
- def _extract_boxed(text: str) -> Optional[str]:
108
+ def _extract_boxed(text: str) -> str | None:
105
109
  if not text:
106
110
  return None
107
111
  for marker in _BOXED_MARKERS:
@@ -170,9 +174,9 @@ class MathDataset:
170
174
  self.name = name
171
175
  self.config = config
172
176
  self.splits = [split for split in splits if split]
173
- self._cache: Dict[str, Any] = {}
177
+ self._cache: dict[str, Any] = {}
174
178
  self._local_dir = os.getenv("MATH_DATASET_LOCAL_DIR")
175
- self._hf_token: Optional[str] = None
179
+ self._hf_token: str | None = None
176
180
  for key in HF_TOKEN_ENV_KEYS:
177
181
  value = os.getenv(key)
178
182
  if value:
@@ -182,7 +186,7 @@ class MathDataset:
182
186
  break
183
187
  # No multi-candidate fallback: enforce explicit dataset id
184
188
 
185
- def _local_file_for_split(self, split: str) -> Optional[Path]:
189
+ def _local_file_for_split(self, split: str) -> Path | None:
186
190
  specific = os.getenv(f"MATH_DATASET_LOCAL_{split.upper()}_FILE")
187
191
  if specific:
188
192
  path = Path(specific).expanduser()
@@ -203,11 +207,13 @@ class MathDataset:
203
207
  if split not in self._cache:
204
208
  local_file = self._local_file_for_split(split)
205
209
  if local_file is not None:
206
- dataset = load_dataset("json", data_files=str(local_file), cache_dir=str(HF_DATASETS_CACHE))
210
+ dataset = load_dataset(
211
+ "json", data_files=str(local_file), cache_dir=str(HF_DATASETS_CACHE)
212
+ )
207
213
  self._cache[split] = dataset["train"]
208
214
  else:
209
215
  try:
210
- load_kwargs: Dict[str, Any] = {"split": split}
216
+ load_kwargs: dict[str, Any] = {"split": split}
211
217
  if self.config:
212
218
  load_kwargs["name"] = self.config
213
219
  if self._hf_token:
@@ -221,7 +227,7 @@ class MathDataset:
221
227
  tmp_path = target.with_name(target.name + ".tmp")
222
228
  try:
223
229
  local_dir.mkdir(parents=True, exist_ok=True)
224
- getattr(ds, "to_json")(str(tmp_path))
230
+ ds.to_json(str(tmp_path))
225
231
  tmp_path.replace(target)
226
232
  except Exception:
227
233
  with contextlib.suppress(FileNotFoundError):
@@ -235,7 +241,7 @@ class MathDataset:
235
241
  raise RuntimeError(" ".join(hints)) from exc
236
242
  return self._cache[split]
237
243
 
238
- def sample(self, *, split: str, index: Optional[int] = None) -> Dict[str, Any]:
244
+ def sample(self, *, split: str, index: int | None = None) -> dict[str, Any]:
239
245
  dataset = self._load_split(split)
240
246
  if len(dataset) == 0:
241
247
  raise RuntimeError(f"Dataset split '{split}' is empty")
@@ -301,9 +307,7 @@ class MathDataset:
301
307
  except Exception as exc:
302
308
  errors.append(f"{split}: {exc}")
303
309
  if errors:
304
- raise RuntimeError(
305
- "Dataset preparation failed:\n" + "\n".join(errors)
306
- )
310
+ raise RuntimeError("Dataset preparation failed:\n" + "\n".join(errors))
307
311
 
308
312
 
309
313
  @dataclass
@@ -322,9 +326,9 @@ class MathEnvironmentManager:
322
326
 
323
327
  def __init__(self, dataset: MathDataset) -> None:
324
328
  self.dataset = dataset
325
- self._states: Dict[str, MathEnvState] = {}
329
+ self._states: dict[str, MathEnvState] = {}
326
330
 
327
- def create(self, *, split: str, index: Optional[int], seed: Optional[int]) -> MathEnvState:
331
+ def create(self, *, split: str, index: int | None, seed: int | None) -> MathEnvState:
328
332
  if index is None and seed is not None:
329
333
  index = seed
330
334
  sample = self.dataset.sample(split=split, index=index)
@@ -350,11 +354,11 @@ class MathEnvironmentManager:
350
354
 
351
355
 
352
356
  class InitializePayload(BaseModel):
353
- seed: Optional[int] = None
354
- config: Dict[str, Any] = Field(default_factory=dict)
357
+ seed: int | None = None
358
+ config: dict[str, Any] = Field(default_factory=dict)
355
359
 
356
360
 
357
- def _observation_from_state(state: MathEnvState) -> Dict[str, Any]:
361
+ def _observation_from_state(state: MathEnvState) -> dict[str, Any]:
358
362
  return {
359
363
  "problem": state.problem,
360
364
  "split": state.split,
@@ -362,7 +366,9 @@ def _observation_from_state(state: MathEnvState) -> Dict[str, Any]:
362
366
  }
363
367
 
364
368
 
365
- def _score_submission(state: MathEnvState, tool_calls: Sequence[Mapping[str, Any]]) -> tuple[float, str, bool]:
369
+ def _score_submission(
370
+ state: MathEnvState, tool_calls: Sequence[Mapping[str, Any]]
371
+ ) -> tuple[float, str, bool]:
366
372
  if not tool_calls:
367
373
  return REWARD_NEGATIVE_NO_TOOL, "missing_tool_call", False
368
374
  call = tool_calls[0]
@@ -374,14 +380,61 @@ def _score_submission(state: MathEnvState, tool_calls: Sequence[Mapping[str, Any
374
380
  if not answer:
375
381
  return REWARD_NEGATIVE_NO_ANSWER, "blank_answer", False
376
382
  is_correct = answer == state.answer
377
- return (REWARD_POSITIVE if is_correct else 0.0), ("correct" if is_correct else "incorrect"), is_correct
383
+ return (
384
+ (REWARD_POSITIVE if is_correct else 0.0),
385
+ ("correct" if is_correct else "incorrect"),
386
+ is_correct,
387
+ )
378
388
 
379
389
 
380
390
  math_router = APIRouter()
381
391
 
382
392
 
393
+ def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[dict[str, Any]]:
394
+ """Return a compact, log-friendly preview of tool calls.
395
+
396
+ Truncates long fields to avoid noisy logs and leaking excessive content.
397
+ """
398
+ preview: list[dict[str, Any]] = []
399
+ for call in list(tool_calls or [])[:3]:
400
+ args = dict(call.get("args") or {})
401
+ answer = str(args.get("answer") or "")
402
+ # Hard truncate to keep logs compact
403
+ answer_short = answer[:120] + ("…" if len(answer) > 120 else "")
404
+ preview.append(
405
+ {
406
+ "tool": call.get("tool"),
407
+ "answer": answer_short,
408
+ }
409
+ )
410
+ return preview
411
+
412
+
413
+ def _event_and_outcome_components(
414
+ tool_calls: Sequence[Mapping[str, Any]], *, correct: bool, reward: float
415
+ ) -> dict[str, float]:
416
+ """Approximate component-wise scores for RL-style logs.
417
+
418
+ - env: task-level scalar reward (our single-step outcome)
419
+ - rubric_event: 1.0 if a valid tool call with non-empty answer was made else 0.0
420
+ - rubric_outcome: 1.0 if final answer was correct else 0.0
421
+ """
422
+ has_valid_tool = False
423
+ if tool_calls:
424
+ first = tool_calls[0] or {}
425
+ if str(first.get("tool") or "") == TOOL_NAME:
426
+ args = first.get("args") or {}
427
+ ans = str(args.get("answer") or "").strip()
428
+ has_valid_tool = bool(ans)
429
+ return {
430
+ "env": float(reward),
431
+ "rubric_event": 1.0 if has_valid_tool else 0.0,
432
+ "rubric_outcome": 1.0 if bool(correct) else 0.0,
433
+ }
434
+
435
+
383
436
  @math_router.post("/env/math/initialize")
384
- async def initialize_env(request: Request, payload: InitializePayload) -> Dict[str, Any]:
437
+ async def initialize_env(request: Request, payload: InitializePayload) -> dict[str, Any]:
385
438
  manager: MathEnvironmentManager = request.app.state.math_env_manager
386
439
  split = str(payload.config.get("split") or DEFAULT_SPLIT)
387
440
  seed = payload.seed
@@ -397,7 +450,7 @@ async def initialize_env(request: Request, payload: InitializePayload) -> Dict[s
397
450
 
398
451
 
399
452
  @math_router.post("/env/math/step")
400
- async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
453
+ async def step_env(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
401
454
  manager: MathEnvironmentManager = request.app.state.math_env_manager
402
455
  env_id = str(payload.get("env_id") or "")
403
456
  if not env_id:
@@ -410,6 +463,26 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
410
463
  action = payload.get("action") or {}
411
464
  tool_calls = action.get("tool_calls") or payload.get("tool_calls") or []
412
465
  reward, status, correct = _score_submission(state, tool_calls)
466
+ with contextlib.suppress(Exception):
467
+ print(
468
+ "[MATH_STEP] env_id=",
469
+ state.env_id,
470
+ " split=",
471
+ state.split,
472
+ " index=",
473
+ state.index,
474
+ " calls=",
475
+ _preview_tool_calls(tool_calls),
476
+ " reward=",
477
+ reward,
478
+ " status=",
479
+ status,
480
+ " correct=",
481
+ correct,
482
+ " components=",
483
+ _event_and_outcome_components(tool_calls, correct=correct, reward=reward),
484
+ flush=True,
485
+ )
413
486
  state.done = True
414
487
 
415
488
  observation = _observation_from_state(state)
@@ -427,7 +500,7 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
427
500
 
428
501
 
429
502
  @math_router.post("/env/math/terminate")
430
- async def terminate_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
503
+ async def terminate_env(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
431
504
  manager: MathEnvironmentManager = request.app.state.math_env_manager
432
505
  env_id = str(payload.get("env_id") or "")
433
506
  if env_id:
@@ -448,7 +521,9 @@ def _resolve_inference_url(base_url: str) -> str:
448
521
  return f"{normalized}/v1/chat/completions"
449
522
 
450
523
 
451
- async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping[str, Any]) -> tuple[list[Dict[str, Any]], Dict[str, Any]]:
524
+ async def _call_inference(
525
+ policy_config: Mapping[str, Any], observation: Mapping[str, Any]
526
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
452
527
  inference_url = str(policy_config.get("inference_url") or "").rstrip("/")
453
528
  if not inference_url:
454
529
  raise RuntimeError("policy.config.inference_url required for rollout")
@@ -480,7 +555,7 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
480
555
  },
481
556
  ]
482
557
 
483
- payload: Dict[str, Any] = {
558
+ payload: dict[str, Any] = {
484
559
  "model": model,
485
560
  "messages": messages,
486
561
  "tools": [
@@ -549,7 +624,7 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
549
624
  function = call.get("function") or {}
550
625
  name = function.get("name")
551
626
  arguments = function.get("arguments")
552
- parsed_args: Dict[str, Any]
627
+ parsed_args: dict[str, Any]
553
628
  if isinstance(arguments, str):
554
629
  try:
555
630
  import json
@@ -562,6 +637,15 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
562
637
  else:
563
638
  parsed_args = {}
564
639
  tool_calls.append({"tool": name, "args": parsed_args})
640
+ # Lightweight provider-side logging
641
+ with contextlib.suppress(Exception):
642
+ print(
643
+ "[MATH_INFER] model=",
644
+ model,
645
+ " calls=",
646
+ _preview_tool_calls(tool_calls),
647
+ flush=True,
648
+ )
565
649
  return tool_calls, data
566
650
 
567
651
 
@@ -576,11 +660,13 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
576
660
  "index": sample["index"],
577
661
  }
578
662
 
579
- tool_calls: list[Dict[str, Any]] = []
580
- inference_payload: Dict[str, Any] | None = None
581
- error_info: Dict[str, Any] = {}
663
+ tool_calls: list[dict[str, Any]] = []
664
+ inference_payload: dict[str, Any] | None = None
665
+ error_info: dict[str, Any] = {}
582
666
  try:
583
- tool_calls, inference_payload = await _call_inference(request.policy.config or {}, observation)
667
+ tool_calls, inference_payload = await _call_inference(
668
+ request.policy.config or {}, observation
669
+ )
584
670
  except HTTPException as http_err:
585
671
  tool_calls = []
586
672
  error_info = {"error": http_err.detail, "code": http_err.status_code}
@@ -600,6 +686,28 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
600
686
  tool_calls,
601
687
  )
602
688
 
689
+ # Log a concise summary so we can debug reward=0 issues in production
690
+ with contextlib.suppress(Exception):
691
+ print(
692
+ "[MATH_ROLLOUT] run=",
693
+ request.run_id,
694
+ " split=",
695
+ sample["split"],
696
+ " index=",
697
+ sample["index"],
698
+ " calls=",
699
+ _preview_tool_calls(tool_calls),
700
+ " reward=",
701
+ reward,
702
+ " status=",
703
+ status,
704
+ " correct=",
705
+ correct,
706
+ " components=",
707
+ _event_and_outcome_components(tool_calls, correct=correct, reward=reward),
708
+ flush=True,
709
+ )
710
+
603
711
  step = RolloutStep(
604
712
  obs=observation,
605
713
  tool_calls=tool_calls,
@@ -610,6 +718,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
610
718
  "status": status,
611
719
  "correct": correct,
612
720
  "raw_solution": sample["raw_solution"],
721
+ "tool_call_preview": _preview_tool_calls(tool_calls),
613
722
  **error_info,
614
723
  },
615
724
  )
@@ -634,6 +743,34 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
634
743
  details={"status": status, "correct": correct},
635
744
  )
636
745
 
746
+ # Include a minimal trace when requested or tracing is enabled via env
747
+ include_trace = bool(
748
+ (request.record and getattr(request.record, "return_trace", False))
749
+ or os.getenv("TASKAPP_TRACING_ENABLED")
750
+ )
751
+ trace_payload = None
752
+ if include_trace:
753
+ try:
754
+ # Minimal structured trace for assertions
755
+ trace_payload = {
756
+ "session_id": str(uuid.uuid4()),
757
+ "events_count": 1,
758
+ "decision_rewards": [reward],
759
+ "lm_calls": (
760
+ [{"prompt": str(observation.get("problem", "")), "response": str(tool_calls)}]
761
+ if tool_calls
762
+ else []
763
+ ),
764
+ "metadata": {
765
+ "env": "math_single_step",
766
+ "split": sample["split"],
767
+ "index": sample["index"],
768
+ "status": status,
769
+ },
770
+ }
771
+ except Exception:
772
+ trace_payload = None
773
+
637
774
  return RolloutResponse(
638
775
  run_id=request.run_id,
639
776
  trajectories=[trajectory],
@@ -641,7 +778,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
641
778
  metrics=metrics,
642
779
  aborted=False,
643
780
  ops_executed=2,
644
- trace=None,
781
+ trace=trace_payload,
645
782
  )
646
783
 
647
784
 
@@ -739,7 +876,7 @@ EVENTS_RUBRIC: Rubric = cast(
739
876
  )
740
877
 
741
878
 
742
- def describe_taskset(dataset: MathDataset) -> Dict[str, Any]:
879
+ def describe_taskset(dataset: MathDataset) -> dict[str, Any]:
743
880
  return {
744
881
  **MATH_DATASET_SPEC.model_dump(),
745
882
  "hf_dataset": DATASET_NAME,
@@ -775,10 +912,12 @@ def build_config() -> TaskAppConfig:
775
912
 
776
913
  tracing_enabled = tracing_env_enabled()
777
914
  tracing_db_url = resolve_tracing_db_url()
778
- tracer_factory = build_tracer_factory(SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url)
915
+ tracer_factory = build_tracer_factory(
916
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
917
+ )
779
918
  sft_output_dir = resolve_sft_output_dir()
780
919
 
781
- app_state: Dict[str, Any] = {
920
+ app_state: dict[str, Any] = {
782
921
  "math_dataset": dataset,
783
922
  "math_env_manager": MathEnvironmentManager(dataset),
784
923
  "tracing_enabled": tracing_enabled,
@@ -8,10 +8,10 @@ from pathlib import Path
8
8
  from fastapi.exceptions import RequestValidationError
9
9
  from fastapi.responses import JSONResponse
10
10
  from starlette.requests import Request
11
-
11
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
12
12
  from synth_ai.task.server import create_task_app, run_task_app
13
+
13
14
  from .math_single_step import build_config
14
- from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
15
15
 
16
16
 
17
17
  def fastapi_app():
@@ -40,7 +40,10 @@ def fastapi_app():
40
40
  async def health(request: Request):
41
41
  env_key = normalize_environment_api_key()
42
42
  if not env_key:
43
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
43
+ return JSONResponse(
44
+ status_code=503,
45
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
46
+ )
44
47
  if not is_api_key_header_authorized(request):
45
48
  prefix = _log_env_key_prefix("health", env_key)
46
49
  content = {"status": "healthy", "authorized": False}
@@ -53,7 +56,10 @@ def fastapi_app():
53
56
  async def health_rollout(request: Request):
54
57
  env_key = normalize_environment_api_key()
55
58
  if not env_key:
56
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
59
+ return JSONResponse(
60
+ status_code=503,
61
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
62
+ )
57
63
  if not is_api_key_header_authorized(request):
58
64
  prefix = _log_env_key_prefix("health/rollout", env_key)
59
65
  content = {"status": "healthy", "authorized": False}
@@ -67,7 +73,7 @@ def fastapi_app():
67
73
  try:
68
74
  hdr = request.headers
69
75
  snapshot = {
70
- "path": str(getattr(request, "url").path),
76
+ "path": str(request.url.path),
71
77
  "have_x_api_key": bool(hdr.get("x-api-key")),
72
78
  "have_x_api_keys": bool(hdr.get("x-api-keys")),
73
79
  "have_authorization": bool(hdr.get("authorization")),
@@ -76,7 +82,9 @@ def fastapi_app():
76
82
  print("[422] validation", snapshot, flush=True)
77
83
  except Exception:
78
84
  pass
79
- return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
85
+ return JSONResponse(
86
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
87
+ )
80
88
 
81
89
  return app
82
90
 
examples/sft/README.md ADDED
@@ -0,0 +1,139 @@
1
+ ### Supervised Fine-Tuning for Crafter
2
+
3
+ This folder provides a minimal, reusable SFT workflow that pulls out the SFT step from `examples/warming_up_to_rl/` and focuses it on LoRA/QLoRA. We've also added guidance for running full finetuning (FFT) so you can compare adapters against end-to-end weight updates.
4
+
5
+ It supports distilling Groq (or other vendor) rollouts into JSONL using tracing and then training a small base model like `Qwen/Qwen3-0.6B`.
6
+
7
+ ---
8
+
9
+ ### 0) Load environment from .env.dev (recommended)
10
+
11
+ Use your dev env file so keys/URLs are sourced consistently:
12
+
13
+ ```bash
14
+ # Example path; update to your actual dev env
15
+ set -a && source /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev && set +a
16
+ ```
17
+
18
+ This ensures `ENVIRONMENT_API_KEY`, `GROQ_API_KEY`, and (optionally) `BACKEND_BASE_URL` are available to the steps below.
19
+
20
+ ---
21
+
22
+ ### 1) Collect traces and export SFT JSONL
23
+
24
+ You can generate traces with the Crafter task app and then export them to SFT JSONL using the existing exporter:
25
+
26
+ ```bash
27
+ # Serve the task app locally with tracing enabled (example)
28
+ uvx synth-ai serve grpo-crafter \
29
+ --trace traces/v3 \
30
+ --trace-db traces/v3/synth_ai.db \
31
+ --port 8001
32
+
33
+ # Or run traced local rollouts to accumulate data
34
+ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
35
+ --episodes 50 --max-turns 10
36
+
37
+ # Export SFT dataset from the trace DB
38
+ uv run python examples/warming_up_to_rl/export_trace_sft.py \
39
+ --db traces/v3/synth_ai.db \
40
+ --min-unique 0 \
41
+ --output examples/sft/ft_data/crafter_traces.jsonl
42
+ ```
43
+
44
+ Notes:
45
+ - The exporter uses achievements and event rewards to filter high-signal steps. Combine `--min-unique`, `--min-outcome-reward`, `--event-reward`, and `--require-achievement` to control data quality.
46
+ - You can restrict to sessions from certain providers/models with `--provider`/`--model`.
47
+ - Use `--limit` while debugging to reduce dataset size quickly.
48
+
49
+ ---
50
+
51
+ ### 2a) Train LoRA (QLoRA) on Qwen/Qwen3-0.6B
52
+
53
+ Use the standard CLI. Do not use a custom Python finetuning script. Point the CLI at your `.env.dev` so it picks up keys automatically:
54
+
55
+ ```bash
56
+ uvx synth-ai train \
57
+ --type sft \
58
+ --config examples/sft/configs/crafter_lora_qwen0p6b.toml \
59
+ --dataset examples/sft/ft_data/crafter_traces.jsonl \
60
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
61
+ ```
62
+
63
+ The config sets `training.use_qlora = true` and `hyperparameters.train_kind = "peft"` to request LoRA adapters.
64
+
65
+ Experiment tips:
66
+ - The backend currently defaults to a LoRA rank of 16. If you need other ranks, generate the payload with `--dry-run`, add `"lora_rank": <value>` (and optional `"lora_alpha"`, `"lora_dropout"`) under `hyperparameters`, and submit it via the API until the CLI exposes these knobs directly.
67
+ - Duplicate the TOML and adjust `hyperparameters.warmup_ratio`, `learning_rate`, or `gradient_accumulation_steps` to keep the global batch size comparable across datasets.
68
+
69
+ ---
70
+
71
+ ### 2b) Train Full Finetune (FFT) on Qwen/Qwen3-0.6B
72
+
73
+ Full finetuning updates all weights and uses a near-identical CLI flow with the LoRA toggle disabled. The helper config lives alongside the LoRA sample:
74
+
75
+ ```bash
76
+ uvx synth-ai train \
77
+ --type sft \
78
+ --config examples/sft/configs/crafter_fft_qwen0p6b.toml \
79
+ --dataset examples/sft/ft_data/crafter_traces.jsonl \
80
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
81
+ ```
82
+
83
+ Key differences vs LoRA:
84
+ - `training.use_qlora = false` and `hyperparameters.train_kind = "fft"` request a full-weight update.
85
+ - `per_device_batch` defaults to 1 to keep memory use comfortable on a single H100; raise gradually as you confirm headroom.
86
+ - FFT runs slower per step. Consider trimming the dataset with `--examples` or the exporter filters for quick baselines.
87
+
88
+ If you want the 4B Crafter FFT baseline from the RL examples, reuse `examples/warming_up_to_rl/configs/crafter_fft_4b.toml` with the same CLI command.
89
+
90
+ ---
91
+
92
+ ### 3) Evaluate the fine-tuned models
93
+
94
+ After the job completes, list your fine-tuned models and evaluate them in the Crafter loop:
95
+
96
+ ```bash
97
+ # List models
98
+ uv run python - <<'PY'
99
+ import asyncio
100
+ import os
101
+ from synth_ai.learning.client import LearningClient
102
+
103
+ backend = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api")
104
+ api_key = os.getenv("SYNTH_API_KEY", "")
105
+ async def main():
106
+ client = LearningClient(backend, api_key)
107
+ models = await client.list_fine_tuned_models()
108
+ for m in models:
109
+ print(m)
110
+ asyncio.run(main())
111
+ PY
112
+
113
+ # Evaluate in the Crafter eval loop (example via warming_up_to_rl)
114
+ TASK_APP_URL=http://localhost:8001 \
115
+ uv run python examples/warming_up_to_rl/run_eval.py \
116
+ --toml examples/warming_up_to_rl/configs/eval_local_vllm.toml \
117
+ --model ft:YOUR_FT_MODEL_ID \
118
+ --use-rollout
119
+ ```
120
+
121
+ ---
122
+
123
+ ### 4) Plan comparison runs
124
+
125
+ Keep runs comparable by adjusting one axis at a time and logging the settings in your experiment tracker (spreadsheet, weights & biases, etc.).
126
+
127
+ - **LoRA rank sweeps:** start from `crafter_lora_qwen0p6b.toml`, clone it per rank (e.g., `r=4,8,16,64`). For now add the desired `lora_rank` in the job payload manually (see note above) and include it in the run name.
128
+ - **Dataset size:** duplicate the exported JSONL and slice with `head -n`, or pass `--examples N` to the CLI for quick subsamples. Track the effective token count using the exporter logs.
129
+ - **Data quality:** increase `--min-unique`, require specific achievements, or exclude low-reward sessions with `export_trace_sft.py`. Capture the filter tuple in your run metadata so evaluations stay reproducible.
130
+ - **FFT vs LoRA:** run both configs on the same dataset/cardinality so differences reflect the training method rather than the data.
131
+
132
+ For each sweep, use consistent evaluation seeds and write down throughput (tokens/sec) so you can weigh quality vs cost.
133
+
134
+ ---
135
+
136
+ ### Files
137
+ - `configs/crafter_lora_qwen0p6b.toml`: LoRA/QLoRA SFT config for `Qwen/Qwen3-0.6B`.
138
+ - `configs/crafter_fft_qwen0p6b.toml`: Full-finetune SFT config for `Qwen/Qwen3-0.6B`.
139
+ - `ft_data/`: place your exported JSONL here (ignored by VCS).
@@ -0,0 +1,44 @@
1
+ [job]
2
+ model = "Qwen/Qwen3-0.6B"
3
+ # Prefer passing --dataset at runtime for repeatability
4
+ # data = "examples/sft/ft_data/crafter_traces.jsonl"
5
+
6
+ [compute]
7
+ gpu_type = "H100"
8
+ gpu_count = 1
9
+ nodes = 1
10
+
11
+ [data]
12
+ topology = {}
13
+ # Optional validation set if you have one locally
14
+ # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
15
+
16
+ [training]
17
+ mode = "sft_offline"
18
+ use_qlora = false
19
+
20
+ [training.validation]
21
+ enabled = true
22
+ evaluation_strategy = "steps"
23
+ eval_steps = 50
24
+ save_best_model_at_end = true
25
+ metric_for_best_model = "val.loss"
26
+ greater_is_better = false
27
+
28
+ [hyperparameters]
29
+ n_epochs = 1
30
+ train_kind = "fft"
31
+ per_device_batch = 1
32
+ gradient_accumulation_steps = 32
33
+ sequence_length = 4096
34
+ learning_rate = 1e-5
35
+ warmup_ratio = 0.03
36
+ weight_decay = 0.01
37
+
38
+ [hyperparameters.parallelism]
39
+ use_deepspeed = true
40
+ deepspeed_stage = 2
41
+ fsdp = false
42
+ bf16 = true
43
+ fp16 = false
44
+ activation_checkpointing = true