synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ """Legacy entrypoint for the math single-step task app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from fastapi.exceptions import RequestValidationError
9
+ from fastapi.responses import JSONResponse
10
+ from starlette.requests import Request
11
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
12
+ from synth_ai.task.server import create_task_app, run_task_app
13
+
14
+ from .math_single_step import build_config
15
+
16
+
17
+ def fastapi_app():
18
+ """Return a FastAPI application for hosting the math task app."""
19
+
20
+ app = create_task_app(build_config())
21
+
22
+ # Replace default health endpoints with auth-tolerant handlers.
23
+ filtered_routes = []
24
+ for route in app.router.routes:
25
+ path = getattr(route, "path", None)
26
+ methods = getattr(route, "methods", set()) or set()
27
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
28
+ continue
29
+ filtered_routes.append(route)
30
+ app.router.routes = filtered_routes
31
+
32
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
33
+ if not env_key:
34
+ return None
35
+ prefix = env_key[: max(1, len(env_key) // 2)]
36
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
37
+ return prefix
38
+
39
+ @app.get("/health")
40
+ async def health(request: Request):
41
+ env_key = normalize_environment_api_key()
42
+ if not env_key:
43
+ return JSONResponse(
44
+ status_code=503,
45
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
46
+ )
47
+ if not is_api_key_header_authorized(request):
48
+ prefix = _log_env_key_prefix("health", env_key)
49
+ content = {"status": "healthy", "authorized": False}
50
+ if prefix:
51
+ content["expected_api_key_prefix"] = prefix
52
+ return JSONResponse(status_code=200, content=content)
53
+ return {"status": "healthy", "authorized": True}
54
+
55
+ @app.get("/health/rollout")
56
+ async def health_rollout(request: Request):
57
+ env_key = normalize_environment_api_key()
58
+ if not env_key:
59
+ return JSONResponse(
60
+ status_code=503,
61
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
62
+ )
63
+ if not is_api_key_header_authorized(request):
64
+ prefix = _log_env_key_prefix("health/rollout", env_key)
65
+ content = {"status": "healthy", "authorized": False}
66
+ if prefix:
67
+ content["expected_api_key_prefix"] = prefix
68
+ return JSONResponse(status_code=200, content=content)
69
+ return {"ok": True, "authorized": True}
70
+
71
+ @app.exception_handler(RequestValidationError)
72
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
73
+ try:
74
+ hdr = request.headers
75
+ snapshot = {
76
+ "path": str(request.url.path),
77
+ "have_x_api_key": bool(hdr.get("x-api-key")),
78
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
79
+ "have_authorization": bool(hdr.get("authorization")),
80
+ "errors": exc.errors()[:5],
81
+ }
82
+ print("[422] validation", snapshot, flush=True)
83
+ except Exception:
84
+ pass
85
+ return JSONResponse(
86
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
87
+ )
88
+
89
+ return app
90
+
91
+
92
+ if __name__ == "__main__":
93
+ parser = argparse.ArgumentParser(description="Run the math single-step task app locally")
94
+ parser.add_argument("--host", default="0.0.0.0")
95
+ parser.add_argument("--port", type=int, default=8101)
96
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
97
+ parser.add_argument(
98
+ "--env-file",
99
+ action="append",
100
+ default=[],
101
+ help="Path to .env file to load (can be specified multiple times)",
102
+ )
103
+ args = parser.parse_args()
104
+
105
+ run_task_app(
106
+ build_config,
107
+ host=args.host,
108
+ port=args.port,
109
+ reload=args.reload,
110
+ env_files=args.env_file or [],
111
+ )
@@ -4,7 +4,7 @@
4
4
  # This script demonstrates a reactive agent in the Crafter environment
5
5
 
6
6
  echo "🚀 Starting Crafter agent demo with Gemini 1.5 Flash..."
7
- echo "Make sure the synth-ai service is running: uvx synth-ai serve"
7
+ echo "Make sure the synth-ai service is running: uvx synth-ai deploy --runtime uvicorn"
8
8
  echo ""
9
9
 
10
- uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
10
+ uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
examples/sft/README.md CHANGED
@@ -25,9 +25,9 @@ You can generate traces with the Crafter task app and then export them to SFT JS
25
25
 
26
26
  ```bash
27
27
  # Serve the task app locally with tracing enabled (example)
28
- uvx synth-ai serve grpo-crafter \
28
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter \
29
29
  --trace traces/v3 \
30
- --trace-db traces/v3/synth_ai.db \
30
+ --trace-db traces/v3/task_app_traces_<timestamp>.db \
31
31
  --port 8001
32
32
 
33
33
  # Or run traced local rollouts to accumulate data
@@ -36,9 +36,9 @@ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
36
36
 
37
37
  # Export SFT dataset from the trace DB
38
38
  uv run python examples/warming_up_to_rl/export_trace_sft.py \
39
- --db traces/v3/synth_ai.db \
39
+ --db traces/v3/task_app_traces_<timestamp>.db \
40
40
  --min-unique 0 \
41
- --output examples/sft/ft_data/crafter_traces.jsonl
41
+ --output examples/sft/ft_data/crafter_sft.jsonl
42
42
  ```
43
43
 
44
44
  Notes:
@@ -56,7 +56,7 @@ Use the standard CLI. Do not use a custom Python finetuning script. Point the CL
56
56
  uvx synth-ai train \
57
57
  --type sft \
58
58
  --config examples/sft/configs/crafter_lora_qwen0p6b.toml \
59
- --dataset examples/sft/ft_data/crafter_traces.jsonl \
59
+ --dataset examples/sft/ft_data/crafter_sft.jsonl \
60
60
  --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
61
61
  ```
62
62
 
@@ -76,7 +76,7 @@ Full finetuning updates all weights and uses a near-identical CLI flow with the
76
76
  uvx synth-ai train \
77
77
  --type sft \
78
78
  --config examples/sft/configs/crafter_fft_qwen0p6b.toml \
79
- --dataset examples/sft/ft_data/crafter_traces.jsonl \
79
+ --dataset examples/sft/ft_data/crafter_sft.jsonl \
80
80
  --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
81
81
  ```
82
82
 
@@ -1,7 +1,12 @@
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "fft"
5
+
1
6
  [job]
2
7
  model = "Qwen/Qwen3-0.6B"
3
8
  # Prefer passing --dataset at runtime for repeatability
4
- # data = "examples/sft/ft_data/crafter_traces.jsonl"
9
+ # data = "examples/sft/ft_data/crafter_sft.jsonl"
5
10
 
6
11
  [compute]
7
12
  gpu_type = "H100"
@@ -11,7 +16,7 @@ nodes = 1
11
16
  [data]
12
17
  topology = {}
13
18
  # Optional validation set if you have one locally
14
- # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
19
+ # validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
15
20
 
16
21
  [training]
17
22
  mode = "sft_offline"
@@ -1,7 +1,12 @@
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "qlora"
5
+
1
6
  [job]
2
7
  model = "Qwen/Qwen3-0.6B"
3
8
  # Optionally set here, but prefer passing --dataset at runtime
4
- # data = "examples/sft/ft_data/crafter_traces.jsonl"
9
+ # data = "examples/sft/ft_data/crafter_sft.jsonl"
5
10
 
6
11
  [compute]
7
12
  gpu_type = "H100"
@@ -12,7 +17,7 @@ nodes = 1
12
17
  # Forwarded into metadata.effective_config
13
18
  topology = {}
14
19
  # Optional validation set if you have one locally
15
- # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
20
+ # validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
16
21
 
17
22
  [training]
18
23
  mode = "lora"
@@ -42,4 +47,3 @@ fsdp = false
42
47
  bf16 = true
43
48
  fp16 = false
44
49
  activation_checkpointing = true
45
-
examples/sft/evaluate.py CHANGED
@@ -11,6 +11,7 @@ from __future__ import annotations
11
11
  import argparse
12
12
  import asyncio
13
13
  import os
14
+ from contextlib import suppress
14
15
  from dataclasses import dataclass
15
16
  from typing import Any
16
17
 
@@ -104,10 +105,8 @@ async def main() -> None:
104
105
  for r in results:
105
106
  ers = r.get("episode_returns") or []
106
107
  if isinstance(ers, list) and ers:
107
- try:
108
+ with suppress(Exception):
108
109
  flat_returns.append(float(ers[0]))
109
- except Exception:
110
- pass
111
110
  if flat_returns:
112
111
  mean_ret = sum(flat_returns) / len(flat_returns)
113
112
  print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
@@ -116,4 +115,3 @@ async def main() -> None:
116
115
  if __name__ == "__main__":
117
116
  asyncio.run(main())
118
117
 
119
-
@@ -20,12 +20,17 @@ from examples.warming_up_to_rl.export_trace_sft import (
20
20
  parse_event_filters,
21
21
  write_jsonl,
22
22
  )
23
+ from synth_ai.tracing_v3.constants import TRACE_DB_DIR, canonical_trace_db_name
23
24
 
24
25
 
25
26
  def main() -> None:
26
27
  p = argparse.ArgumentParser(description=__doc__)
27
- p.add_argument("--db", type=Path, default=Path("traces/v3/synth_ai.db"))
28
- p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_traces.jsonl"))
28
+ p.add_argument(
29
+ "--db",
30
+ type=Path,
31
+ default=TRACE_DB_DIR / canonical_trace_db_name(),
32
+ )
33
+ p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_sft.jsonl"))
29
34
  p.add_argument("--model", action="append", dest="models")
30
35
  p.add_argument("--provider", action="append", dest="providers")
31
36
  p.add_argument("--min-unique", type=int, default=0)
@@ -113,5 +118,3 @@ def main() -> None:
113
118
 
114
119
  if __name__ == "__main__":
115
120
  main()
116
-
117
-
@@ -28,17 +28,17 @@ endpoints.
28
28
  ## Using the task app
29
29
 
30
30
  ```
31
- uvx synth-ai serve swe-mini --port 8020
31
+ uvx synth-ai deploy --runtime uvicorn swe-mini --port 8020
32
32
  ```
33
33
 
34
34
  ### Recommended: non-interactive serve + .env
35
35
 
36
36
  ```bash
37
- uvx synth-ai serve swe-mini \
37
+ uvx synth-ai deploy --runtime uvicorn swe-mini \
38
38
  --port 8020 \
39
39
  --env-file .env \
40
40
  --trace traces/v3 \
41
- --trace-db traces/v3/synth_ai.db
41
+ --trace-db traces/v3/task_app_traces_<timestamp>.db
42
42
  ```
43
43
 
44
44
  This avoids interactive prompts (useful for CI) and loads `ENVIRONMENT_API_KEY`, `OPENAI_API_KEY`, etc. from `.env`.
@@ -60,6 +60,36 @@ Execution is handled by mini-swe's environment classes. Configure execution via
60
60
  `SWE_MINI_ENVIRONMENT_CLASS` (`local`, `docker`, `singularity`, …) and pass
61
61
  additional keyword arguments with `SWE_MINI_ENVIRONMENT_KWARGS` (JSON).
62
62
 
63
+ ### Morph Cloud backend
64
+
65
+ The task app now ships with a Morph-powered environment class so you can run
66
+ mini-SWE rollouts in managed sandboxes. When `MORPH_API_KEY` is present the app
67
+ defaults to this backend automatically unless you override
68
+ `SWE_MINI_ENVIRONMENT_CLASS`.
69
+
70
+ 1. Install the optional dependencies: `pip install "synth-ai[swe]"`.
71
+ 2. Export your API key: `export MORPH_API_KEY=...`.
72
+ 3. Point the task app at Morph by setting:
73
+
74
+ ```bash
75
+ export SWE_MINI_ENVIRONMENT_CLASS=morph
76
+ export SWE_MINI_ENVIRONMENT_KWARGS='{
77
+ "snapshot_id": "snap_your_pre_baked_swebench_image",
78
+ "cwd": "/workspace/swebench",
79
+ "env": {"PIP_PROGRESS_BAR": "off"},
80
+ "metadata": {"project": "synth-ai", "task": "swe-mini"}
81
+ }'
82
+ ```
83
+
84
+ If you do not have a pre-built snapshot, provide `"image_id"` (defaults to
85
+ `morphvm-minimal`) along with resource hints (`"vcpus"`, `"memory_mb"`,
86
+ `"disk_mb"`). You can also set `SWE_MINI_MORPH_SNAPSHOT_ID` globally.
87
+
88
+ During cleanup the backend deletes the remote workspace and stops the Morph
89
+ instance automatically. All shell commands (including submissions) now execute
90
+ inside the Morph sandbox, enabling RL workflows that require persistent remote
91
+ compute.
92
+
63
93
  ### Tracing & SFT
64
94
 
65
95
  Tracing works the same as Crafter; pass `--trace` / `--trace-db` to the CLI or
@@ -404,6 +404,10 @@ def _ensure_env_has_task(
404
404
  if not instance_id:
405
405
  raise ValueError("mini-swe rollout request requires env.config.instance_id")
406
406
  config["task"] = dataset.get(instance_id)
407
+ env_cfg = dict(config.get("environment") or {})
408
+ if "environment_class" not in env_cfg and os.getenv("MORPH_API_KEY"):
409
+ env_cfg["environment_class"] = "morph"
410
+ config["environment"] = env_cfg
407
411
  return env_spec.model_copy(update={"config": config})
408
412
 
409
413
 
@@ -556,7 +560,6 @@ register_task_app(
556
560
  description="mini-swe-agent task app with rollout + proxy endpoints",
557
561
  config_factory=build_config,
558
562
  aliases=("mini-swe", "swe-mini-task"),
559
- env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
560
563
  modal=ModalDeploymentConfig(
561
564
  app_name="swe-mini-task-app",
562
565
  python_version="3.11",
@@ -114,23 +114,11 @@ if __name__ == "__main__":
114
114
  parser.add_argument("--host", default="0.0.0.0")
115
115
  parser.add_argument("--port", type=int, default=8020)
116
116
  parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
117
- parser.add_argument(
118
- "--env-file",
119
- action="append",
120
- default=[],
121
- help="Additional .env files to load before startup",
122
- )
123
117
  args = parser.parse_args()
124
118
 
125
- default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
126
- env_files = [str(default_env)] if default_env.exists() else []
127
- env_files.extend(args.env_file or [])
128
-
129
119
  run_task_app(
130
120
  build_task_app_config,
131
121
  host=args.host,
132
122
  port=args.port,
133
123
  reload=args.reload,
134
- env_files=env_files,
135
124
  )
136
-
@@ -46,7 +46,7 @@ class CrafterReActAgent:
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
48
  "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
49
+ "\n"
50
50
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
51
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
52
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -18,6 +18,7 @@ from typing import Any
18
18
  from minisweagent.environments import get_environment
19
19
  from synth_ai.environments.environment.tools import EnvToolCall
20
20
 
21
+ from examples.swe.task_app.morph_backend import MorphSandboxBackend
21
22
  from .shared import summarise_history
22
23
  from .tools import TOOLS_SCHEMA
23
24
 
@@ -25,8 +26,9 @@ logger = logging.getLogger(__name__)
25
26
 
26
27
 
27
28
  def _environment_type_from_config(config: dict[str, Any]) -> str:
29
+ default = "morph" if os.getenv("MORPH_API_KEY") else "local"
28
30
  value = (config or {}).get("environment_class") or os.getenv(
29
- "SWE_MINI_ENVIRONMENT_CLASS", "local"
31
+ "SWE_MINI_ENVIRONMENT_CLASS", default
30
32
  )
31
33
  return str(value).strip() or "local"
32
34
 
@@ -91,6 +93,7 @@ class MiniSweEnvironmentWrapper:
91
93
  self._local_workspace_dir: Path | None = None
92
94
  self._remote_workspace: str | None = None
93
95
  self._cleanup_workspace = False
96
+ self._using_morph_backend = False
94
97
 
95
98
  if self.environment_type == "local":
96
99
  workspace = self._prepare_local_workspace(kwargs)
@@ -117,11 +120,11 @@ class MiniSweEnvironmentWrapper:
117
120
  timeout = self.env_config.get("timeout")
118
121
  if timeout and "timeout" not in kwargs:
119
122
  kwargs["timeout"] = int(timeout)
120
- if self.repo_url and "image" not in kwargs:
123
+ if self.environment_type in {"docker", "bubblewrap"} and self.repo_url and "image" not in kwargs:
121
124
  image = self.metadata.get("image_name") or os.getenv("SWE_MINI_DOCKER_IMAGE")
122
125
  if image:
123
126
  kwargs["image"] = image
124
- if self.environment_type in {"docker", "bubblewrap"}:
127
+ if self.environment_type in {"docker", "bubblewrap", "morph"}:
125
128
  remote_env = dict(kwargs.get("env") or {})
126
129
  remote_env.setdefault("GIT_TERMINAL_PROMPT", "0")
127
130
  kwargs["env"] = remote_env
@@ -131,13 +134,34 @@ class MiniSweEnvironmentWrapper:
131
134
  self.environment_type,
132
135
  kwargs,
133
136
  )
134
- self.env = get_environment(
135
- {
136
- "environment_class": self.environment_type,
137
- **kwargs,
138
- },
139
- default_type="local",
140
- )
137
+ if self.environment_type == "morph":
138
+ morph_kwargs = dict(kwargs)
139
+ image_value = morph_kwargs.pop("image", None)
140
+ if image_value and "image_id" not in morph_kwargs:
141
+ morph_kwargs["image_id"] = image_value
142
+ timeout_value = morph_kwargs.pop("timeout", None)
143
+ if timeout_value is not None and "startup_timeout" not in morph_kwargs:
144
+ try:
145
+ morph_kwargs["startup_timeout"] = int(timeout_value)
146
+ except Exception:
147
+ logger.warning("Invalid timeout value for morph backend: %r", timeout_value)
148
+ metadata_override = morph_kwargs.pop("metadata", {}) or {}
149
+ metadata_payload = {
150
+ "app": "swe-mini",
151
+ "instance_id": self.instance_id,
152
+ }
153
+ metadata_payload.update({str(k): str(v) for k, v in dict(metadata_override).items()})
154
+ morph_kwargs["metadata"] = metadata_payload
155
+ self.env = MorphSandboxBackend(**morph_kwargs)
156
+ self._using_morph_backend = True
157
+ else:
158
+ self.env = get_environment(
159
+ {
160
+ "environment_class": self.environment_type,
161
+ **kwargs,
162
+ },
163
+ default_type="local",
164
+ )
141
165
 
142
166
  if self.environment_type != "local":
143
167
  self._bootstrap_remote_workspace()
@@ -181,6 +205,9 @@ class MiniSweEnvironmentWrapper:
181
205
  with contextlib.suppress(Exception):
182
206
  self.env.execute(f"rm -rf {shlex.quote(self._remote_workspace)}")
183
207
  self._remote_workspace = None
208
+ if self._using_morph_backend and hasattr(self.env, "close"):
209
+ with contextlib.suppress(Exception):
210
+ self.env.close()
184
211
 
185
212
  def _resolve_repo_url(self, metadata: dict[str, Any]) -> str | None:
186
213
  candidates = [
@@ -776,7 +803,7 @@ class MiniSweEnvironmentWrapper:
776
803
  or os.getenv("SWE_REX_MODAL_SANDBOX_KWARGS")
777
804
  )
778
805
  modal_kwargs: dict[str, Any] = {}
779
- if isinstance(modal_kwargs_raw, (dict, list)):
806
+ if isinstance(modal_kwargs_raw, dict | list):
780
807
  modal_kwargs = dict(modal_kwargs_raw or {})
781
808
  elif isinstance(modal_kwargs_raw, str) and modal_kwargs_raw.strip():
782
809
  try:
@@ -841,9 +868,9 @@ class MiniSweEnvironmentWrapper:
841
868
  instance_image_tag=instance_image_tag,
842
869
  env_image_tag=env_image_tag,
843
870
  model_name=model_name,
844
- Command=Command,
845
- WriteFileRequest=WriteFileRequest,
846
- ReadFileRequest=ReadFileRequest,
871
+ command_cls=Command,
872
+ write_file_request_cls=WriteFileRequest,
873
+ read_file_request_cls=ReadFileRequest,
847
874
  )
848
875
  try:
849
876
  return self._run_coroutine_blocking(coro)
@@ -867,9 +894,9 @@ class MiniSweEnvironmentWrapper:
867
894
  instance_image_tag: str,
868
895
  env_image_tag: str,
869
896
  model_name: str,
870
- Command,
871
- WriteFileRequest,
872
- ReadFileRequest,
897
+ command_cls,
898
+ write_file_request_cls,
899
+ read_file_request_cls,
873
900
  ) -> dict[str, Any]:
874
901
  deployment = deployment_config.get_deployment()
875
902
  await deployment.start()
@@ -880,7 +907,7 @@ class MiniSweEnvironmentWrapper:
880
907
 
881
908
  # Ensure working directory exists.
882
909
  mkdir_resp = await runtime.execute(
883
- Command(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
910
+ command_cls(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
884
911
  )
885
912
  if mkdir_resp.exit_code not in (0, None):
886
913
  logger.warning("Failed to ensure remote directory %s (exit=%s)", remote_root, mkdir_resp.exit_code)
@@ -888,8 +915,8 @@ class MiniSweEnvironmentWrapper:
888
915
  # Upload dataset & predictions.
889
916
  dataset_blob = json.dumps([instance], ensure_ascii=False)
890
917
  predictions_blob = json.dumps({instance_id: prediction}, ensure_ascii=False)
891
- await runtime.write_file(WriteFileRequest(path=dataset_remote_path, content=dataset_blob))
892
- await runtime.write_file(WriteFileRequest(path=predictions_remote_path, content=predictions_blob))
918
+ await runtime.write_file(write_file_request_cls(path=dataset_remote_path, content=dataset_blob))
919
+ await runtime.write_file(write_file_request_cls(path=predictions_remote_path, content=predictions_blob))
893
920
 
894
921
  eval_cmd = [
895
922
  "python",
@@ -921,7 +948,7 @@ class MiniSweEnvironmentWrapper:
921
948
 
922
949
  command_timeout = max(eval_timeout + 900, 1200)
923
950
  response = await runtime.execute(
924
- Command(
951
+ command_cls(
925
952
  command=eval_cmd,
926
953
  timeout=command_timeout,
927
954
  cwd=remote_root,
@@ -945,7 +972,7 @@ class MiniSweEnvironmentWrapper:
945
972
  for filename in ("report.json", "test_output.txt", "run_instance.log", "patch.diff"):
946
973
  remote_path = f"{remote_log_dir}/{filename}"
947
974
  try:
948
- content = await runtime.read_file(ReadFileRequest(path=remote_path))
975
+ content = await runtime.read_file(read_file_request_cls(path=remote_path))
949
976
  except Exception:
950
977
  continue
951
978
  if getattr(content, "content", None):
@@ -1073,7 +1100,7 @@ class MiniSweEnvironmentWrapper:
1073
1100
  return value
1074
1101
  if isinstance(value, str):
1075
1102
  return value.strip().lower() in {"1", "true", "yes", "on"}
1076
- if isinstance(value, (int, float)):
1103
+ if isinstance(value, int | float):
1077
1104
  return bool(value)
1078
1105
  return False # pragma: no cover - defensive default
1079
1106
 
@@ -156,13 +156,13 @@ class OpenAIClient:
156
156
  keys_preview = sorted(processed_request.keys())
157
157
  logger.info(f"Request keys: {keys_preview}")
158
158
 
159
- # Final hard-guard for OpenAI: ensure unsupported field is not present
159
+ # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
160
160
  try:
161
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
161
+ low_url = url.lower()
162
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
162
163
  processed_request.pop("stop_after_tool_calls", None)
163
- logger.info("Removed stop_after_tool_calls for OpenAI request")
164
+ logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
164
165
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
165
- low_url = url.lower()
166
166
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
167
167
  processed_request, dict
168
168
  ):
@@ -343,8 +343,6 @@ async def step_policy(
343
343
  inf_req = meta["inference_request"]
344
344
  msgs = inf_req["messages"]
345
345
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
346
- system_messages: list[str] = []
347
- user_messages: list[str] = []
348
346
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
349
347
  sys_text = msgs[0]["content"]
350
348
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
@@ -888,14 +888,6 @@ async def execute_rollout(
888
888
  logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
889
889
  tracing_context = RolloutTracingContext(tracer_instance, request, req)
890
890
  await tracing_context.start_session()
891
- # Print whether tracing is active for this rollout
892
- try:
893
- print(
894
- f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
895
- flush=True,
896
- )
897
- except Exception:
898
- pass
899
891
 
900
892
  # Register run
901
893
  registry.register_run(request.run_id)