synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ """Legacy entrypoint for the math single-step task app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from fastapi.exceptions import RequestValidationError
9
+ from fastapi.responses import JSONResponse
10
+ from starlette.requests import Request
11
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
12
+ from synth_ai.task.server import create_task_app, run_task_app
13
+
14
+ from .math_single_step import build_config
15
+
16
+
17
+ def fastapi_app():
18
+ """Return a FastAPI application for hosting the math task app."""
19
+
20
+ app = create_task_app(build_config())
21
+
22
+ # Replace default health endpoints with auth-tolerant handlers.
23
+ filtered_routes = []
24
+ for route in app.router.routes:
25
+ path = getattr(route, "path", None)
26
+ methods = getattr(route, "methods", set()) or set()
27
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
28
+ continue
29
+ filtered_routes.append(route)
30
+ app.router.routes = filtered_routes
31
+
32
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
33
+ if not env_key:
34
+ return None
35
+ prefix = env_key[: max(1, len(env_key) // 2)]
36
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
37
+ return prefix
38
+
39
+ @app.get("/health")
40
+ async def health(request: Request):
41
+ env_key = normalize_environment_api_key()
42
+ if not env_key:
43
+ return JSONResponse(
44
+ status_code=503,
45
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
46
+ )
47
+ if not is_api_key_header_authorized(request):
48
+ prefix = _log_env_key_prefix("health", env_key)
49
+ content = {"status": "healthy", "authorized": False}
50
+ if prefix:
51
+ content["expected_api_key_prefix"] = prefix
52
+ return JSONResponse(status_code=200, content=content)
53
+ return {"status": "healthy", "authorized": True}
54
+
55
+ @app.get("/health/rollout")
56
+ async def health_rollout(request: Request):
57
+ env_key = normalize_environment_api_key()
58
+ if not env_key:
59
+ return JSONResponse(
60
+ status_code=503,
61
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
62
+ )
63
+ if not is_api_key_header_authorized(request):
64
+ prefix = _log_env_key_prefix("health/rollout", env_key)
65
+ content = {"status": "healthy", "authorized": False}
66
+ if prefix:
67
+ content["expected_api_key_prefix"] = prefix
68
+ return JSONResponse(status_code=200, content=content)
69
+ return {"ok": True, "authorized": True}
70
+
71
+ @app.exception_handler(RequestValidationError)
72
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
73
+ try:
74
+ hdr = request.headers
75
+ snapshot = {
76
+ "path": str(request.url.path),
77
+ "have_x_api_key": bool(hdr.get("x-api-key")),
78
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
79
+ "have_authorization": bool(hdr.get("authorization")),
80
+ "errors": exc.errors()[:5],
81
+ }
82
+ print("[422] validation", snapshot, flush=True)
83
+ except Exception:
84
+ pass
85
+ return JSONResponse(
86
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
87
+ )
88
+
89
+ return app
90
+
91
+
92
+ if __name__ == "__main__":
93
+ parser = argparse.ArgumentParser(description="Run the math single-step task app locally")
94
+ parser.add_argument("--host", default="0.0.0.0")
95
+ parser.add_argument("--port", type=int, default=8101)
96
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
97
+ parser.add_argument(
98
+ "--env-file",
99
+ action="append",
100
+ default=[],
101
+ help="Path to .env file to load (can be specified multiple times)",
102
+ )
103
+ args = parser.parse_args()
104
+
105
+ run_task_app(
106
+ build_config,
107
+ host=args.host,
108
+ port=args.port,
109
+ reload=args.reload,
110
+ env_files=args.env_file or [],
111
+ )
examples/sft/README.md CHANGED
@@ -27,7 +27,7 @@ You can generate traces with the Crafter task app and then export them to SFT JS
27
27
  # Serve the task app locally with tracing enabled (example)
28
28
  uvx synth-ai serve grpo-crafter \
29
29
  --trace traces/v3 \
30
- --trace-db traces/v3/synth_ai.db \
30
+ --trace-db traces/v3/task_app_traces_<timestamp>.db \
31
31
  --port 8001
32
32
 
33
33
  # Or run traced local rollouts to accumulate data
@@ -36,9 +36,9 @@ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
36
36
 
37
37
  # Export SFT dataset from the trace DB
38
38
  uv run python examples/warming_up_to_rl/export_trace_sft.py \
39
- --db traces/v3/synth_ai.db \
39
+ --db traces/v3/task_app_traces_<timestamp>.db \
40
40
  --min-unique 0 \
41
- --output examples/sft/ft_data/crafter_traces.jsonl
41
+ --output examples/sft/ft_data/crafter_sft.jsonl
42
42
  ```
43
43
 
44
44
  Notes:
@@ -56,7 +56,7 @@ Use the standard CLI. Do not use a custom Python finetuning script. Point the CL
56
56
  uvx synth-ai train \
57
57
  --type sft \
58
58
  --config examples/sft/configs/crafter_lora_qwen0p6b.toml \
59
- --dataset examples/sft/ft_data/crafter_traces.jsonl \
59
+ --dataset examples/sft/ft_data/crafter_sft.jsonl \
60
60
  --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
61
61
  ```
62
62
 
@@ -76,7 +76,7 @@ Full finetuning updates all weights and uses a near-identical CLI flow with the
76
76
  uvx synth-ai train \
77
77
  --type sft \
78
78
  --config examples/sft/configs/crafter_fft_qwen0p6b.toml \
79
- --dataset examples/sft/ft_data/crafter_traces.jsonl \
79
+ --dataset examples/sft/ft_data/crafter_sft.jsonl \
80
80
  --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
81
81
  ```
82
82
 
@@ -1,7 +1,9 @@
1
+ type = "sft"
2
+
1
3
  [job]
2
4
  model = "Qwen/Qwen3-0.6B"
3
5
  # Prefer passing --dataset at runtime for repeatability
4
- # data = "examples/sft/ft_data/crafter_traces.jsonl"
6
+ # data = "examples/sft/ft_data/crafter_sft.jsonl"
5
7
 
6
8
  [compute]
7
9
  gpu_type = "H100"
@@ -11,7 +13,7 @@ nodes = 1
11
13
  [data]
12
14
  topology = {}
13
15
  # Optional validation set if you have one locally
14
- # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
16
+ # validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
15
17
 
16
18
  [training]
17
19
  mode = "sft_offline"
@@ -1,7 +1,9 @@
1
+ type = "sft"
2
+
1
3
  [job]
2
4
  model = "Qwen/Qwen3-0.6B"
3
5
  # Optionally set here, but prefer passing --dataset at runtime
4
- # data = "examples/sft/ft_data/crafter_traces.jsonl"
6
+ # data = "examples/sft/ft_data/crafter_sft.jsonl"
5
7
 
6
8
  [compute]
7
9
  gpu_type = "H100"
@@ -12,7 +14,7 @@ nodes = 1
12
14
  # Forwarded into metadata.effective_config
13
15
  topology = {}
14
16
  # Optional validation set if you have one locally
15
- # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
17
+ # validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
16
18
 
17
19
  [training]
18
20
  mode = "lora"
@@ -42,4 +44,3 @@ fsdp = false
42
44
  bf16 = true
43
45
  fp16 = false
44
46
  activation_checkpointing = true
45
-
examples/sft/evaluate.py CHANGED
@@ -11,6 +11,7 @@ from __future__ import annotations
11
11
  import argparse
12
12
  import asyncio
13
13
  import os
14
+ from contextlib import suppress
14
15
  from dataclasses import dataclass
15
16
  from typing import Any
16
17
 
@@ -44,6 +45,7 @@ def _ops(n: int) -> list[str]:
44
45
 
45
46
 
46
47
  def _request(seed: int, a: EvalArgs) -> RolloutRequest:
48
+ from synth_ai.task.contracts import RolloutMode
47
49
  return RolloutRequest(
48
50
  run_id=f"eval-{seed}",
49
51
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
@@ -53,6 +55,7 @@ def _request(seed: int, a: EvalArgs) -> RolloutRequest:
53
55
  ),
54
56
  ops=_ops(a.max_llm_calls),
55
57
  record=RolloutRecordConfig(trajectories=True, return_trace=False, trace_format="compact"),
58
+ mode=RolloutMode.EVAL,
56
59
  )
57
60
 
58
61
 
@@ -102,10 +105,8 @@ async def main() -> None:
102
105
  for r in results:
103
106
  ers = r.get("episode_returns") or []
104
107
  if isinstance(ers, list) and ers:
105
- try:
108
+ with suppress(Exception):
106
109
  flat_returns.append(float(ers[0]))
107
- except Exception:
108
- pass
109
110
  if flat_returns:
110
111
  mean_ret = sum(flat_returns) / len(flat_returns)
111
112
  print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
@@ -114,4 +115,3 @@ async def main() -> None:
114
115
  if __name__ == "__main__":
115
116
  asyncio.run(main())
116
117
 
117
-
@@ -20,12 +20,17 @@ from examples.warming_up_to_rl.export_trace_sft import (
20
20
  parse_event_filters,
21
21
  write_jsonl,
22
22
  )
23
+ from synth_ai.tracing_v3.constants import TRACE_DB_DIR, canonical_trace_db_name
23
24
 
24
25
 
25
26
  def main() -> None:
26
27
  p = argparse.ArgumentParser(description=__doc__)
27
- p.add_argument("--db", type=Path, default=Path("traces/v3/synth_ai.db"))
28
- p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_traces.jsonl"))
28
+ p.add_argument(
29
+ "--db",
30
+ type=Path,
31
+ default=TRACE_DB_DIR / canonical_trace_db_name(),
32
+ )
33
+ p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_sft.jsonl"))
29
34
  p.add_argument("--model", action="append", dest="models")
30
35
  p.add_argument("--provider", action="append", dest="providers")
31
36
  p.add_argument("--min-unique", type=int, default=0)
@@ -113,5 +118,3 @@ def main() -> None:
113
118
 
114
119
  if __name__ == "__main__":
115
120
  main()
116
-
117
-
@@ -42,6 +42,7 @@ def _build_ops(max_llm_calls: int) -> list[str]:
42
42
 
43
43
 
44
44
  def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_key: str, *, max_llm_calls: int, return_trace: bool) -> RolloutRequest:
45
+ from synth_ai.task.contracts import RolloutMode
45
46
  policy_cfg: dict[str, Any] = {
46
47
  "model": model,
47
48
  "inference_url": inference_url,
@@ -54,6 +55,7 @@ def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_k
54
55
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_cfg),
55
56
  ops=_build_ops(max_llm_calls),
56
57
  record=record,
58
+ mode=RolloutMode.EVAL,
57
59
  )
58
60
 
59
61
 
@@ -38,7 +38,7 @@ uvx synth-ai serve swe-mini \
38
38
  --port 8020 \
39
39
  --env-file .env \
40
40
  --trace traces/v3 \
41
- --trace-db traces/v3/synth_ai.db
41
+ --trace-db traces/v3/task_app_traces_<timestamp>.db
42
42
  ```
43
43
 
44
44
  This avoids interactive prompts (useful for CI) and loads `ENVIRONMENT_API_KEY`, `OPENAI_API_KEY`, etc. from `.env`.
@@ -484,6 +484,7 @@ def build_config() -> TaskAppConfig:
484
484
 
485
485
  legacy_request = LegacyRolloutRequest(
486
486
  run_id=request.run_id,
487
+ mode=request.mode, # Preserve mode for nested requests
487
488
  env=LegacyRolloutEnvSpec(
488
489
  env_id=request.env.env_id,
489
490
  env_name=env_spec.env_name or "swe-mini",
@@ -555,7 +556,6 @@ register_task_app(
555
556
  description="mini-swe-agent task app with rollout + proxy endpoints",
556
557
  config_factory=build_config,
557
558
  aliases=("mini-swe", "swe-mini-task"),
558
- env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
559
559
  modal=ModalDeploymentConfig(
560
560
  app_name="swe-mini-task-app",
561
561
  python_version="3.11",
@@ -114,23 +114,11 @@ if __name__ == "__main__":
114
114
  parser.add_argument("--host", default="0.0.0.0")
115
115
  parser.add_argument("--port", type=int, default=8020)
116
116
  parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
117
- parser.add_argument(
118
- "--env-file",
119
- action="append",
120
- default=[],
121
- help="Additional .env files to load before startup",
122
- )
123
117
  args = parser.parse_args()
124
118
 
125
- default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
126
- env_files = [str(default_env)] if default_env.exists() else []
127
- env_files.extend(args.env_file or [])
128
-
129
119
  run_task_app(
130
120
  build_task_app_config,
131
121
  host=args.host,
132
122
  port=args.port,
133
123
  reload=args.reload,
134
- env_files=env_files,
135
124
  )
136
-
@@ -776,7 +776,7 @@ class MiniSweEnvironmentWrapper:
776
776
  or os.getenv("SWE_REX_MODAL_SANDBOX_KWARGS")
777
777
  )
778
778
  modal_kwargs: dict[str, Any] = {}
779
- if isinstance(modal_kwargs_raw, (dict, list)):
779
+ if isinstance(modal_kwargs_raw, dict | list):
780
780
  modal_kwargs = dict(modal_kwargs_raw or {})
781
781
  elif isinstance(modal_kwargs_raw, str) and modal_kwargs_raw.strip():
782
782
  try:
@@ -841,9 +841,9 @@ class MiniSweEnvironmentWrapper:
841
841
  instance_image_tag=instance_image_tag,
842
842
  env_image_tag=env_image_tag,
843
843
  model_name=model_name,
844
- Command=Command,
845
- WriteFileRequest=WriteFileRequest,
846
- ReadFileRequest=ReadFileRequest,
844
+ command_cls=Command,
845
+ write_file_request_cls=WriteFileRequest,
846
+ read_file_request_cls=ReadFileRequest,
847
847
  )
848
848
  try:
849
849
  return self._run_coroutine_blocking(coro)
@@ -867,9 +867,9 @@ class MiniSweEnvironmentWrapper:
867
867
  instance_image_tag: str,
868
868
  env_image_tag: str,
869
869
  model_name: str,
870
- Command,
871
- WriteFileRequest,
872
- ReadFileRequest,
870
+ command_cls,
871
+ write_file_request_cls,
872
+ read_file_request_cls,
873
873
  ) -> dict[str, Any]:
874
874
  deployment = deployment_config.get_deployment()
875
875
  await deployment.start()
@@ -880,7 +880,7 @@ class MiniSweEnvironmentWrapper:
880
880
 
881
881
  # Ensure working directory exists.
882
882
  mkdir_resp = await runtime.execute(
883
- Command(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
883
+ command_cls(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
884
884
  )
885
885
  if mkdir_resp.exit_code not in (0, None):
886
886
  logger.warning("Failed to ensure remote directory %s (exit=%s)", remote_root, mkdir_resp.exit_code)
@@ -888,8 +888,8 @@ class MiniSweEnvironmentWrapper:
888
888
  # Upload dataset & predictions.
889
889
  dataset_blob = json.dumps([instance], ensure_ascii=False)
890
890
  predictions_blob = json.dumps({instance_id: prediction}, ensure_ascii=False)
891
- await runtime.write_file(WriteFileRequest(path=dataset_remote_path, content=dataset_blob))
892
- await runtime.write_file(WriteFileRequest(path=predictions_remote_path, content=predictions_blob))
891
+ await runtime.write_file(write_file_request_cls(path=dataset_remote_path, content=dataset_blob))
892
+ await runtime.write_file(write_file_request_cls(path=predictions_remote_path, content=predictions_blob))
893
893
 
894
894
  eval_cmd = [
895
895
  "python",
@@ -921,7 +921,7 @@ class MiniSweEnvironmentWrapper:
921
921
 
922
922
  command_timeout = max(eval_timeout + 900, 1200)
923
923
  response = await runtime.execute(
924
- Command(
924
+ command_cls(
925
925
  command=eval_cmd,
926
926
  timeout=command_timeout,
927
927
  cwd=remote_root,
@@ -945,7 +945,7 @@ class MiniSweEnvironmentWrapper:
945
945
  for filename in ("report.json", "test_output.txt", "run_instance.log", "patch.diff"):
946
946
  remote_path = f"{remote_log_dir}/{filename}"
947
947
  try:
948
- content = await runtime.read_file(ReadFileRequest(path=remote_path))
948
+ content = await runtime.read_file(read_file_request_cls(path=remote_path))
949
949
  except Exception:
950
950
  continue
951
951
  if getattr(content, "content", None):
@@ -1073,7 +1073,7 @@ class MiniSweEnvironmentWrapper:
1073
1073
  return value
1074
1074
  if isinstance(value, str):
1075
1075
  return value.strip().lower() in {"1", "true", "yes", "on"}
1076
- if isinstance(value, (int, float)):
1076
+ if isinstance(value, int | float):
1077
1077
  return bool(value)
1078
1078
  return False # pragma: no cover - defensive default
1079
1079
 
@@ -343,8 +343,6 @@ async def step_policy(
343
343
  inf_req = meta["inference_request"]
344
344
  msgs = inf_req["messages"]
345
345
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
346
- system_messages: list[str] = []
347
- user_messages: list[str] = []
348
346
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
349
347
  sys_text = msgs[0]["content"]
350
348
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
@@ -12,6 +12,7 @@ from fastapi import APIRouter, HTTPException, Request, status
12
12
  from pydantic import BaseModel
13
13
  from synth_ai.lm.vendors.base import BaseLMResponse
14
14
  from synth_ai.task.tracing_utils import unique_sft_path
15
+ from synth_ai.task.contracts import RolloutMode
15
16
  from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
16
17
  from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
17
18
  from synth_ai.tracing_v3.session_tracer import SessionTracer
@@ -120,6 +121,7 @@ class RolloutRequest(BaseModel):
120
121
  # Optional run/session context
121
122
  training_session_id: str | None = None
122
123
  synth_base_url: str | None = None
124
+ mode: RolloutMode # Required: explicit RL vs EVAL mode
123
125
 
124
126
 
125
127
  class RolloutStep(BaseModel):
@@ -886,14 +888,6 @@ async def execute_rollout(
886
888
  logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
887
889
  tracing_context = RolloutTracingContext(tracer_instance, request, req)
888
890
  await tracing_context.start_session()
889
- # Print whether tracing is active for this rollout
890
- try:
891
- print(
892
- f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
893
- flush=True,
894
- )
895
- except Exception:
896
- pass
897
891
 
898
892
  # Register run
899
893
  registry.register_run(request.run_id)