synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -156,13 +156,13 @@ class OpenAIClient:
156
156
  keys_preview = sorted(processed_request.keys())
157
157
  logger.info(f"Request keys: {keys_preview}")
158
158
 
159
- # Final hard-guard for OpenAI: ensure unsupported field is not present
159
+ # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
160
160
  try:
161
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
161
+ low_url = url.lower()
162
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
162
163
  processed_request.pop("stop_after_tool_calls", None)
163
- logger.info("Removed stop_after_tool_calls for OpenAI request")
164
+ logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
164
165
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
165
- low_url = url.lower()
166
166
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
167
167
  processed_request, dict
168
168
  ):
@@ -340,40 +340,6 @@ class OpenAIClient:
340
340
  pass
341
341
  except Exception:
342
342
  pass
343
- # Gracefully degrade on 422 so rollouts can still produce a trajectory
344
- if status == 422:
345
- try:
346
- # Best-effort parse of error for diagnostics
347
- err = None
348
- try:
349
- err = e.response.json()
350
- except Exception:
351
- err = {"error": "unprocessable", "detail": (text or "")[:200]}
352
- logger.warning(
353
- {
354
- "inference_422_recovered": True,
355
- "detail": err,
356
- }
357
- )
358
- except Exception:
359
- pass
360
- # Return a minimal OpenAI-compatible response with no tool_calls/content
361
- import time as _t
362
-
363
- return {
364
- "id": f"cmpl-{int(_t.time())}",
365
- "object": "chat.completion",
366
- "created": int(_t.time()),
367
- "model": processed_request.get("model") or "unknown",
368
- "choices": [
369
- {
370
- "index": 0,
371
- "message": {"role": "assistant", "content": "", "tool_calls": []},
372
- "finish_reason": "stop",
373
- }
374
- ],
375
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
376
- }
377
343
  raise
378
344
  except Exception as e:
379
345
  logger.error(f"Unexpected error calling {url}: {e}")
@@ -945,6 +945,23 @@ async def step_policy(
945
945
  except Exception as exc:
946
946
  logger.debug(f"TRACING_LLM_FAIL: {exc}")
947
947
 
948
+ if not tool_calls:
949
+ preview = ""
950
+ try:
951
+ preview = str(meta.get("raw_response") or "")[:400]
952
+ except Exception:
953
+ preview = "<unavailable>"
954
+ logger.error(
955
+ {
956
+ "rollout.policy_step": True,
957
+ "policy_id": request.policy_id,
958
+ "error": "no_tool_calls",
959
+ "inference_url": meta.get("inference_url"),
960
+ "raw_preview": preview,
961
+ }
962
+ )
963
+ raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
964
+
948
965
  return PolicyStepResponse(
949
966
  tool_calls=tool_calls,
950
967
  meta=meta,
@@ -251,14 +251,16 @@ class RolloutTracingContext:
251
251
  await self.tracer.initialize()
252
252
  except Exception as exc:
253
253
  logger.debug("TRACING_INIT_FAIL: %s", exc)
254
+ # Hard fail: tracing requested but cannot initialize
255
+ raise
254
256
  try:
255
257
  await self.tracer.start_session(
256
258
  session_id=self.run_id, metadata=dict(self.metadata_base)
257
259
  )
258
260
  except Exception as exc:
259
261
  logger.warning("TRACING_START_FAIL: %s", exc)
260
- self.enabled = False
261
- self.tracer = None
262
+ # Hard fail: tracing requested but cannot start session
263
+ raise
262
264
 
263
265
  async def start_decision(self, turn_number: int) -> None:
264
266
  self.current_turn = turn_number
@@ -0,0 +1,178 @@
1
+ """Utility classes for running swe-mini environments on Morph Cloud."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import shlex
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Dict
11
+
12
+ _IMPORT_ERROR: Exception | None = None
13
+
14
+ try: # pragma: no cover - optional dependency
15
+ from morphcloud.api import MorphCloudClient
16
+ except Exception as exc: # pragma: no cover - optional dependency
17
+ MorphCloudClient = None # type: ignore[assignment]
18
+ _IMPORT_ERROR = exc
19
+
20
+
21
+ def _quote_env_var(key: str, value: str) -> str:
22
+ """Return a safe shell export statement."""
23
+ return f"export {key}={shlex.quote(value)}"
24
+
25
+
26
+ def _now() -> float:
27
+ return time.time()
28
+
29
+
30
+ @dataclass
31
+ class MorphSandboxBackend:
32
+ """Thin wrapper around Morph Cloud instances for command execution.
33
+
34
+ The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
35
+ we expose an ``execute`` method that matches the mini-swe environment shape.
36
+ """
37
+
38
+ snapshot_id: str | None = None
39
+ image_id: str | None = None
40
+ cwd: str = "/workspace"
41
+ env: Dict[str, str] | None = None
42
+ metadata: Dict[str, str] | None = None
43
+ vcpus: int = 4
44
+ memory_mb: int = 8192
45
+ disk_mb: int = 65536
46
+ startup_timeout: int = 600
47
+
48
+ _client: MorphCloudClient = field(init=False)
49
+ _instance: Any = field(init=False, default=None)
50
+ _last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
51
+ _started_at: float | None = field(init=False, default=None)
52
+
53
+ def __post_init__(self) -> None:
54
+ if MorphCloudClient is None: # pragma: no cover - optional dependency
55
+ raise RuntimeError(
56
+ "morphcloud package is required for Morph environments. "
57
+ "Install with `pip install morphcloud`."
58
+ ) from _IMPORT_ERROR
59
+
60
+ api_key = os.getenv("MORPH_API_KEY", "")
61
+ if not api_key:
62
+ raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
63
+
64
+ # Normalise metadata/env early to avoid shared references.
65
+ self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
66
+ self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
67
+ self.cwd = self.cwd or "/workspace"
68
+
69
+ self._client = MorphCloudClient()
70
+
71
+ # Public API -----------------------------------------------------------------
72
+
73
+ def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
74
+ """Execute ``command`` inside the Morph instance."""
75
+ if not command.strip():
76
+ command = "true"
77
+
78
+ instance = self._ensure_instance()
79
+
80
+ script_parts = []
81
+ for key, value in self.env.items():
82
+ script_parts.append(_quote_env_var(key, value))
83
+ if self.cwd:
84
+ script_parts.append(f"cd {shlex.quote(self.cwd)}")
85
+ script_parts.append(command)
86
+
87
+ script = " && ".join(script_parts)
88
+ if timeout:
89
+ wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
90
+ else:
91
+ wrapped = script
92
+
93
+ shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
94
+ started = _now()
95
+ result = instance.exec(shell_cmd)
96
+ duration = _now() - started
97
+
98
+ payload = {
99
+ "output": (result.stdout or ""),
100
+ "stderr": (result.stderr or ""),
101
+ "returncode": getattr(result, "exit_code", None),
102
+ "duration": duration,
103
+ }
104
+ self._last_exec = payload
105
+ return payload
106
+
107
+ def close(self) -> None:
108
+ """Stops the Morph instance if one is running."""
109
+ instance = getattr(self, "_instance", None)
110
+ if not instance:
111
+ return
112
+ try:
113
+ instance.stop()
114
+ except Exception: # pragma: no cover - best-effort shutdown
115
+ pass
116
+ finally:
117
+ self._instance = None
118
+
119
+ # Internal helpers -----------------------------------------------------------
120
+
121
+ def _ensure_instance(self):
122
+ instance = getattr(self, "_instance", None)
123
+ if instance is not None:
124
+ return instance
125
+
126
+ snapshot_id = (
127
+ self.snapshot_id
128
+ or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
129
+ or os.getenv("MORPH_SNAPSHOT_ID")
130
+ )
131
+ metadata = dict(self.metadata)
132
+
133
+ if snapshot_id:
134
+ instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
135
+ else:
136
+ image_id = (
137
+ self.image_id
138
+ or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
139
+ or os.getenv("MORPH_IMAGE_ID")
140
+ or "morphvm-minimal"
141
+ )
142
+ snapshot = self._client.snapshots.create(
143
+ image_id=image_id,
144
+ vcpus=self.vcpus,
145
+ memory=self.memory_mb,
146
+ disk_size=self.disk_mb,
147
+ )
148
+ instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
149
+ self.snapshot_id = snapshot.id
150
+
151
+ self._instance = instance
152
+ self._started_at = _now()
153
+ self._wait_until_ready(instance)
154
+ self._ensure_cwd(instance)
155
+ return instance
156
+
157
+ def _wait_until_ready(self, instance) -> None:
158
+ deadline = _now() + float(self.startup_timeout)
159
+ while True:
160
+ try:
161
+ instance.wait_until_ready()
162
+ break
163
+ except Exception as exc: # pragma: no cover - SDK may raise while polling
164
+ if _now() > deadline:
165
+ raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
166
+ time.sleep(5.0)
167
+
168
+ def _ensure_cwd(self, instance) -> None:
169
+ if not self.cwd:
170
+ return
171
+ try:
172
+ instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
173
+ except Exception as exc: # pragma: no cover - surface friendly error
174
+ raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
175
+
176
+ def __del__(self) -> None: # pragma: no cover - defensive cleanup
177
+ with contextlib.suppress(Exception):
178
+ self.close()
@@ -0,0 +1,6 @@
1
+ """Banking77 task app package."""
2
+
3
+ from .banking77_task_app import build_config
4
+
5
+ __all__ = ["build_config"]
6
+