synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
synth_ai/cli/task_apps.py CHANGED
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import argparse
3
4
  import ast
5
+ import asyncio
4
6
  import contextlib
5
7
  import functools
6
8
  import hashlib
@@ -8,18 +10,112 @@ import importlib
8
10
  import importlib.util
9
11
  import inspect
10
12
  import os
11
- import signal
13
+ import shlex
12
14
  import shutil
15
+ import signal
13
16
  import subprocess
14
17
  import sys
15
18
  import tempfile
19
+ import textwrap
20
+ import time
21
+ import types
22
+ from collections.abc import Callable, Iterable, Iterator, Sequence
16
23
  from dataclasses import dataclass
24
+ from datetime import UTC, datetime
17
25
  from pathlib import Path
18
- from typing import Callable, Iterable, Sequence
26
+ from typing import Any, cast
27
+
28
+ try: # Python 3.11+
29
+ import tomllib as _toml
30
+ except Exception: # pragma: no cover - fallback
31
+ _toml = None # type: ignore
19
32
 
20
33
  import click
21
- from synth_ai.task.apps import ModalDeploymentConfig, TaskAppConfig, TaskAppEntry, registry
22
- from synth_ai.task.server import run_task_app
34
+ from click.exceptions import Abort
35
+ from synth_ai.cli.commands.eval import core as eval_core
36
+ from synth_ai.cli.commands.filter import core as filter_core
37
+
38
+ # Tracing imports - make conditional for optional dependencies
39
+ try:
40
+ from synth_ai.tracing_v3 import ( # type: ignore[import-untyped]
41
+ BaseEvent,
42
+ EnvironmentEvent,
43
+ RuntimeEvent,
44
+ SessionEventMarkovBlanketMessage,
45
+ SessionMessageContent,
46
+ SessionTimeStep,
47
+ SessionTracer,
48
+ TimeRecord,
49
+ )
50
+ from synth_ai.tracing_v3 import ( # type: ignore[import-untyped]
51
+ SessionTrace as V3SessionTrace,
52
+ )
53
+ _TRACING_AVAILABLE = True
54
+ except (ImportError, ModuleNotFoundError, TypeError):
55
+ # Tracing system not available (missing optional dependencies)
56
+ BaseEvent = EnvironmentEvent = RuntimeEvent = None # type: ignore
57
+ SessionEventMarkovBlanketMessage = SessionMessageContent = None # type: ignore
58
+ SessionTimeStep = SessionTracer = TimeRecord = None # type: ignore
59
+ V3SessionTrace = None # type: ignore
60
+ _TRACING_AVAILABLE = False
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Dynamic imports to avoid hard dependencies during type checking.
64
+ # ---------------------------------------------------------------------------
65
+ ModalDeploymentConfigType = TaskAppConfigType = TaskAppEntryType = Any
66
+
67
+ try: # Resolve base URL defaults lazily
68
+ _config_module = cast(
69
+ Any, importlib.import_module("synth_ai.config.base_url")
70
+ )
71
+ PROD_BASE_URL_DEFAULT = cast(str, _config_module.PROD_BASE_URL_DEFAULT)
72
+ except Exception: # pragma: no cover - fallback
73
+ PROD_BASE_URL_DEFAULT = "https://agent-learning.onrender.com"
74
+
75
+ try:
76
+ _task_apps_module = cast(Any, importlib.import_module("synth_ai.task.apps"))
77
+ ModalDeploymentConfig = cast(
78
+ type[ModalDeploymentConfigType], _task_apps_module.ModalDeploymentConfig
79
+ )
80
+ TaskAppConfig = cast(type[TaskAppConfigType], _task_apps_module.TaskAppConfig)
81
+ TaskAppEntry = cast(type[TaskAppEntryType], _task_apps_module.TaskAppEntry)
82
+ registry = _task_apps_module.registry
83
+ except Exception as exc: # pragma: no cover - critical dependency
84
+ raise RuntimeError("Unable to load task app registry") from exc
85
+
86
+ try:
87
+ _task_server_module = cast(Any, importlib.import_module("synth_ai.task.server"))
88
+ create_task_app = cast(Callable[..., Any], _task_server_module.create_task_app)
89
+ run_task_app = cast(Callable[..., Any], _task_server_module.run_task_app)
90
+ except Exception as exc: # pragma: no cover - critical dependency
91
+ raise RuntimeError("Unable to load task app server utilities") from exc
92
+
93
+
94
+ def _load_demo_directory() -> Path | None:
95
+ """Return the demo task apps directory if available."""
96
+
97
+ try:
98
+ module = cast(
99
+ Any, importlib.import_module("synth_ai.demos.demo_task_apps.core")
100
+ )
101
+ loader = cast(Callable[[], str | Path | None], module.load_demo_dir)
102
+ demo_dir = loader()
103
+ if isinstance(demo_dir, str | Path):
104
+ demo_path = Path(demo_dir)
105
+ if demo_path.exists():
106
+ return demo_path.resolve()
107
+ except Exception:
108
+ return None
109
+ return None
110
+
111
+
112
+ def _maybe_import(name: str) -> Any:
113
+ """Safely import a module by name and return it, or None on failure."""
114
+
115
+ try:
116
+ return importlib.import_module(name)
117
+ except Exception:
118
+ return None
23
119
 
24
120
  REPO_ROOT = Path(__file__).resolve().parents[2]
25
121
 
@@ -42,6 +138,25 @@ DEFAULT_SEARCH_RELATIVE = (
42
138
  )
43
139
 
44
140
 
141
+ def _pearson(xs: Sequence[float], ys: Sequence[float]) -> float | None:
142
+ if len(xs) != len(ys) or len(xs) < 2:
143
+ return None
144
+ mean_x = sum(xs) / len(xs)
145
+ mean_y = sum(ys) / len(ys)
146
+ num = 0.0
147
+ denom_x = 0.0
148
+ denom_y = 0.0
149
+ for x, y in zip(xs, ys, strict=False):
150
+ dx = x - mean_x
151
+ dy = y - mean_y
152
+ num += dx * dy
153
+ denom_x += dx * dx
154
+ denom_y += dy * dy
155
+ if denom_x <= 0 or denom_y <= 0:
156
+ return None
157
+ return num / (denom_x ** 0.5 * denom_y ** 0.5)
158
+
159
+
45
160
  @dataclass
46
161
  class AppChoice:
47
162
  app_id: str
@@ -50,12 +165,12 @@ class AppChoice:
50
165
  source: str
51
166
  description: str | None = None
52
167
  aliases: tuple[str, ...] = ()
53
- entry: TaskAppEntry | None = None
54
- entry_loader: Callable[[], TaskAppEntry] | None = None
168
+ entry: TaskAppEntryType | None = None
169
+ entry_loader: Callable[[], TaskAppEntryType] | None = None
55
170
  modal_script: Path | None = None
56
171
  lineno: int | None = None
57
172
 
58
- def ensure_entry(self) -> TaskAppEntry:
173
+ def ensure_entry(self) -> TaskAppEntryType:
59
174
  if self.entry is not None:
60
175
  return self.entry
61
176
  if self.entry_loader is None:
@@ -65,18 +180,311 @@ class AppChoice:
65
180
  return entry
66
181
 
67
182
 
183
+ @dataclass
184
+ class JudgeSpec:
185
+ name: str
186
+ fn: Callable[..., Any]
187
+ kwargs: dict[str, Any]
188
+
189
+
190
+ def _parse_datetime_for_trace(value: Any) -> datetime | None:
191
+ if isinstance(value, datetime):
192
+ return value if value.tzinfo else value.replace(tzinfo=UTC)
193
+ if isinstance(value, str):
194
+ value = value.replace("Z", "+00:00")
195
+ try:
196
+ dt = datetime.fromisoformat(value)
197
+ except ValueError:
198
+ try:
199
+ dt = datetime.fromtimestamp(float(value), tz=UTC)
200
+ except Exception:
201
+ return None
202
+ return dt if dt.tzinfo else dt.replace(tzinfo=UTC)
203
+ if isinstance(value, int | float):
204
+ return datetime.fromtimestamp(float(value), tz=UTC)
205
+ return None
206
+
207
+
208
+ def _time_record_from_dict(payload: dict[str, Any] | None) -> TimeRecord:
209
+ payload = payload or {}
210
+ event_time = payload.get("event_time")
211
+ if not isinstance(event_time, int | float):
212
+ try:
213
+ event_time = float(event_time)
214
+ except Exception:
215
+ event_time = float(time.time())
216
+ message_time = payload.get("message_time")
217
+ if message_time is not None:
218
+ try:
219
+ message_time = int(message_time)
220
+ except Exception:
221
+ message_time = None
222
+ return TimeRecord(event_time=event_time, message_time=message_time)
223
+
224
+
225
+ def _event_from_dict(payload: dict[str, Any]) -> BaseEvent:
226
+ base_kwargs = {
227
+ "system_instance_id": payload.get("system_instance_id", ""),
228
+ "time_record": _time_record_from_dict(payload.get("time_record")),
229
+ "metadata": payload.get("metadata") or {},
230
+ "event_metadata": payload.get("event_metadata"),
231
+ }
232
+ if "actions" in payload:
233
+ return RuntimeEvent(actions=payload.get("actions") or [], **base_kwargs)
234
+ if any(key in payload for key in ("reward", "terminated", "truncated")):
235
+ return EnvironmentEvent(
236
+ reward=float(payload.get("reward", 0.0) or 0.0),
237
+ terminated=bool(payload.get("terminated", False)),
238
+ truncated=bool(payload.get("truncated", False)),
239
+ system_state_before=payload.get("system_state_before"),
240
+ system_state_after=payload.get("system_state_after"),
241
+ **base_kwargs,
242
+ )
243
+ # Check for LM CAIS event fields
244
+ if any(key in payload for key in ("model_name", "provider", "call_records")):
245
+ from synth_ai.tracing_v3.abstractions import LMCAISEvent
246
+ # Note: call_records are left as dicts - the storage layer will handle serialization
247
+ call_records = payload.get("call_records") or []
248
+ return LMCAISEvent(
249
+ model_name=payload.get("model_name", ""),
250
+ provider=payload.get("provider", ""),
251
+ input_tokens=payload.get("input_tokens"),
252
+ output_tokens=payload.get("output_tokens"),
253
+ total_tokens=payload.get("total_tokens"),
254
+ cost_usd=payload.get("cost_usd"),
255
+ latency_ms=payload.get("latency_ms"),
256
+ span_id=payload.get("span_id"),
257
+ trace_id=payload.get("trace_id"),
258
+ call_records=call_records,
259
+ **base_kwargs,
260
+ )
261
+ return BaseEvent(**base_kwargs)
262
+
263
+
264
+ def _markov_message_from_dict(payload: dict[str, Any]) -> SessionEventMarkovBlanketMessage:
265
+ content_payload = payload.get("content") or {}
266
+ content = SessionMessageContent(
267
+ text=content_payload.get("text"),
268
+ json_payload=content_payload.get("json_payload"),
269
+ )
270
+ raw_type = (payload.get("message_type") or "").lower()
271
+ original_type = payload.get("message_type") or raw_type
272
+
273
+ if raw_type in ("observation", "policy_system_prompt"):
274
+ normalized_type = "system"
275
+ elif raw_type in ("action", "policy_tool_call"):
276
+ normalized_type = "assistant"
277
+ elif raw_type in {"user", "assistant", "system", "tool_use", "tool_result"}:
278
+ normalized_type = raw_type
279
+ else:
280
+ normalized_type = "system"
281
+
282
+ metadata = dict(payload.get("metadata") or {})
283
+ metadata["original_message_type"] = original_type
284
+
285
+ return SessionEventMarkovBlanketMessage(
286
+ content=content,
287
+ message_type=normalized_type,
288
+ time_record=_time_record_from_dict(payload.get("time_record")),
289
+ metadata=metadata,
290
+ )
291
+
292
+
293
+ def _step_from_dict(payload: dict[str, Any]) -> SessionTimeStep:
294
+ events = [
295
+ _event_from_dict(event)
296
+ for event in payload.get("events", [])
297
+ if isinstance(event, dict)
298
+ ]
299
+ messages = [
300
+ _markov_message_from_dict(msg)
301
+ for msg in payload.get("markov_blanket_messages", [])
302
+ if isinstance(msg, dict)
303
+ ]
304
+ timestamp = _parse_datetime_for_trace(payload.get("timestamp")) or datetime.now(UTC)
305
+ completed_at = _parse_datetime_for_trace(payload.get("completed_at"))
306
+ return SessionTimeStep(
307
+ step_id=payload.get("step_id", ""),
308
+ step_index=int(payload.get("step_index", 0) or 0),
309
+ timestamp=timestamp,
310
+ turn_number=payload.get("turn_number"),
311
+ events=events,
312
+ markov_blanket_messages=messages,
313
+ step_metadata=payload.get("step_metadata") or {},
314
+ completed_at=completed_at,
315
+ )
316
+
317
+
318
+ def _session_trace_from_dict(payload: dict[str, Any]) -> V3SessionTrace | None:
319
+ if not isinstance(payload, dict):
320
+ return None
321
+ steps = [
322
+ _step_from_dict(step)
323
+ for step in payload.get("session_time_steps", [])
324
+ if isinstance(step, dict)
325
+ ]
326
+ events = [
327
+ _event_from_dict(event)
328
+ for event in payload.get("event_history", [])
329
+ if isinstance(event, dict)
330
+ ]
331
+ markov_history = [
332
+ _markov_message_from_dict(msg)
333
+ for msg in payload.get("markov_blanket_message_history", [])
334
+ if isinstance(msg, dict)
335
+ ]
336
+ created_at = _parse_datetime_for_trace(payload.get("created_at")) or datetime.now(UTC)
337
+ metadata = payload.get("metadata") or {}
338
+ session_metadata = payload.get("session_metadata")
339
+ return V3SessionTrace(
340
+ session_id=payload.get("session_id", ""),
341
+ created_at=created_at,
342
+ session_time_steps=steps,
343
+ event_history=events,
344
+ markov_blanket_message_history=markov_history,
345
+ metadata=metadata,
346
+ session_metadata=session_metadata,
347
+ )
348
+
349
+
350
+ async def _store_trace(
351
+ tracer: SessionTracer | None,
352
+ trace_namespace: dict[str, Any] | None,
353
+ extra_metadata: dict[str, Any] | None = None,
354
+ ):
355
+ import logging
356
+ _logger = logging.getLogger(__name__)
357
+
358
+ _logger.info(f"[STORE_TRACE_DEBUG] Called with tracer={tracer is not None}, trace_namespace={trace_namespace is not None}")
359
+
360
+ if tracer is None or not isinstance(trace_namespace, dict):
361
+ _logger.warning(f"[STORE_TRACE_DEBUG] Early return: tracer={tracer is not None}, trace_namespace type={type(trace_namespace)}")
362
+ return
363
+
364
+ _logger.info(f"[STORE_TRACE_DEBUG] trace_namespace keys: {list(trace_namespace.keys())}")
365
+
366
+ # Handle both formats:
367
+ # - With session_trace key: {"session_trace": {...}}
368
+ # - Without session_trace key (trace itself is the session): {"session_id": ..., "markov_blanket_message_history": ...}
369
+ session_payload = trace_namespace.get("session_trace")
370
+ if not isinstance(session_payload, dict):
371
+ # If no session_trace key, assume "full" format where trace itself is the session_trace
372
+ if "session_id" in trace_namespace:
373
+ session_payload = trace_namespace
374
+ _logger.info("[STORE_TRACE_DEBUG] Using trace_namespace directly as session_payload (no session_trace key)")
375
+ else:
376
+ _logger.warning(f"[STORE_TRACE_DEBUG] No session_trace found or wrong type: {type(session_payload)}")
377
+ return
378
+
379
+ _logger.info(f"[STORE_TRACE_DEBUG] session_payload keys: {list(session_payload.keys())}")
380
+ msg_count = len(session_payload.get("markov_blanket_message_history", []))
381
+ _logger.info(f"[STORE_TRACE_DEBUG] Found {msg_count} messages in session_payload")
382
+
383
+ trace_obj = _session_trace_from_dict(session_payload)
384
+ if trace_obj is None:
385
+ _logger.warning("[STORE_TRACE_DEBUG] _session_trace_from_dict returned None")
386
+ return
387
+
388
+ _logger.info(f"[STORE_TRACE_DEBUG] Created SessionTrace object with {len(trace_obj.markov_blanket_message_history)} messages")
389
+
390
+ if tracer.db is None:
391
+ await tracer.initialize()
392
+ meta = dict(trace_obj.metadata or {})
393
+ if extra_metadata:
394
+ meta.update(extra_metadata)
395
+ trace_obj.metadata = meta
396
+
397
+ _logger.info(f"[STORE_TRACE_DEBUG] Calling insert_session_trace for session_id={trace_obj.session_id}")
398
+ await tracer.db.insert_session_trace(trace_obj)
399
+ _logger.info("[STORE_TRACE_DEBUG] Successfully inserted trace")
400
+
401
+ def _temporary_sys_path(paths: Sequence[Path]):
402
+ """Context manager to prepend entries to sys.path temporarily."""
403
+
404
+ @contextlib.contextmanager
405
+ def _manager() -> Iterator[None]:
406
+ added: list[str] = []
407
+ for p in paths:
408
+ try:
409
+ resolved = str(p.resolve())
410
+ except Exception:
411
+ continue
412
+ if resolved in sys.path:
413
+ continue
414
+ sys.path.insert(0, resolved)
415
+ added.append(resolved)
416
+ try:
417
+ yield None
418
+ finally:
419
+ for entry in added:
420
+ with contextlib.suppress(ValueError):
421
+ sys.path.remove(entry)
422
+
423
+ return _manager()
424
+
425
+
426
+ def _possible_module_names(
427
+ path: Path, module_search_roots: Sequence[Path]
428
+ ) -> list[tuple[str, Path]]:
429
+ """Return potential module names based on candidate roots."""
430
+
431
+ candidates: list[tuple[str, Path]] = []
432
+ for root in module_search_roots:
433
+ try:
434
+ resolved_root = root.resolve()
435
+ except Exception:
436
+ continue
437
+ if not resolved_root.exists() or not path.is_relative_to(resolved_root):
438
+ continue
439
+ relative = path.relative_to(resolved_root)
440
+ stem = relative.with_suffix("")
441
+ parts = list(stem.parts)
442
+ if not parts:
443
+ continue
444
+ module_name = ".".join(parts)
445
+ if module_name:
446
+ candidates.append((module_name, resolved_root))
447
+ return candidates
448
+
449
+
450
+ def _ensure_parent_namespace(module_name: str, search_root: Path) -> None:
451
+ """Ensure namespace packages exist for dotted module names."""
452
+
453
+ parts = module_name.split(".")
454
+ for depth in range(1, len(parts)):
455
+ parent_name = ".".join(parts[:depth])
456
+ if parent_name in sys.modules:
457
+ continue
458
+ parent_module = types.ModuleType(parent_name)
459
+ candidate_dir = search_root.joinpath(*parts[:depth])
460
+ try:
461
+ resolved = candidate_dir.resolve()
462
+ except Exception:
463
+ resolved = search_root.resolve()
464
+ parent_module.__path__ = [str(resolved)] # type: ignore[attr-defined]
465
+ sys.modules[parent_name] = parent_module
466
+
467
+
68
468
  def _should_ignore_path(path: Path) -> bool:
69
469
  return any(part in DEFAULT_IGNORE_DIRS for part in path.parts)
70
470
 
71
471
 
72
472
  def _candidate_search_roots() -> list[Path]:
473
+ """Only search for task apps in the current working directory and subdirectories."""
73
474
  roots: list[Path] = []
475
+
476
+ demo_path = _load_demo_directory()
477
+ if demo_path is not None and demo_path.is_dir():
478
+ roots.append(demo_path)
479
+
480
+ # Allow explicit search paths via environment variable
74
481
  env_paths = os.environ.get("SYNTH_TASK_APP_SEARCH_PATH")
75
482
  if env_paths:
76
483
  for chunk in env_paths.split(os.pathsep):
77
484
  if chunk:
78
485
  roots.append(Path(chunk).expanduser())
79
486
 
487
+ # Always include current working directory
80
488
  cwd = Path.cwd().resolve()
81
489
  roots.append(cwd)
82
490
 
@@ -86,16 +494,8 @@ def _candidate_search_roots() -> list[Path]:
86
494
  except Exception:
87
495
  continue
88
496
  roots.append(candidate)
89
- if REPO_ROOT not in (None, candidate):
90
- try:
91
- repo_candidate = (REPO_ROOT / rel).resolve()
92
- except Exception:
93
- repo_candidate = None
94
- if repo_candidate:
95
- roots.append(repo_candidate)
96
-
97
- roots.append(REPO_ROOT)
98
497
 
498
+ # Remove duplicates while preserving order
99
499
  seen: set[Path] = set()
100
500
  ordered: list[Path] = []
101
501
  for root in roots:
@@ -119,21 +519,27 @@ class _TaskAppConfigVisitor(ast.NodeVisitor):
119
519
  app_id = _extract_app_id(node)
120
520
  if app_id:
121
521
  self.matches.append((app_id, getattr(node, "lineno", 0)))
522
+ elif _is_register_task_app_call(node):
523
+ app_id = _extract_register_app_id(node)
524
+ if app_id:
525
+ self.matches.append((app_id, getattr(node, "lineno", 0)))
122
526
  self.generic_visit(node)
123
527
 
124
528
 
125
529
  def _is_task_app_config_call(node: ast.Call) -> bool:
126
530
  func = node.func
127
- if isinstance(func, ast.Name) and func.id == "TaskAppConfig":
128
- return True
129
- if isinstance(func, ast.Attribute) and func.attr == "TaskAppConfig":
130
- return True
131
- return False
531
+ return (isinstance(func, ast.Name) and func.id == "TaskAppConfig") or (
532
+ isinstance(func, ast.Attribute) and func.attr == "TaskAppConfig"
533
+ )
132
534
 
133
535
 
134
536
  def _extract_app_id(node: ast.Call) -> str | None:
135
537
  for kw in node.keywords:
136
- if kw.arg == "app_id" and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
538
+ if (
539
+ kw.arg == "app_id"
540
+ and isinstance(kw.value, ast.Constant)
541
+ and isinstance(kw.value.value, str)
542
+ ):
137
543
  return kw.value.value
138
544
  if node.args:
139
545
  first = node.args[0]
@@ -142,6 +548,29 @@ def _extract_app_id(node: ast.Call) -> str | None:
142
548
  return None
143
549
 
144
550
 
551
+ def _is_register_task_app_call(node: ast.Call) -> bool:
552
+ func = node.func
553
+ return (isinstance(func, ast.Name) and func.id == "register_task_app") or (
554
+ isinstance(func, ast.Attribute) and func.attr == "register_task_app"
555
+ )
556
+
557
+
558
+ def _extract_register_app_id(node: ast.Call) -> str | None:
559
+ # Look for entry=TaskAppEntry(app_id="...", ...)
560
+ for kw in node.keywords:
561
+ if kw.arg == "entry" and isinstance(kw.value, ast.Call):
562
+ entry_call = kw.value
563
+ if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
564
+ for entry_kw in entry_call.keywords:
565
+ if (
566
+ entry_kw.arg == "app_id"
567
+ and isinstance(entry_kw.value, ast.Constant)
568
+ and isinstance(entry_kw.value.value, str)
569
+ ):
570
+ return entry_kw.value.value
571
+ return None
572
+
573
+
145
574
  class _ModalAppVisitor(ast.NodeVisitor):
146
575
  def __init__(self) -> None:
147
576
  self.app_aliases: set[str] = set()
@@ -168,7 +597,11 @@ class _ModalAppVisitor(ast.NodeVisitor):
168
597
  if name:
169
598
  self.matches.append((name, getattr(node, "lineno", 0)))
170
599
  elif isinstance(func, ast.Attribute):
171
- if isinstance(func.value, ast.Name) and func.value.id in self.modal_aliases and func.attr == "App":
600
+ if (
601
+ isinstance(func.value, ast.Name)
602
+ and func.value.id in self.modal_aliases
603
+ and func.attr == "App"
604
+ ):
172
605
  name = _extract_modal_app_name(node)
173
606
  if name:
174
607
  self.matches.append((name, getattr(node, "lineno", 0)))
@@ -177,7 +610,11 @@ class _ModalAppVisitor(ast.NodeVisitor):
177
610
 
178
611
  def _extract_modal_app_name(node: ast.Call) -> str | None:
179
612
  for kw in node.keywords:
180
- if kw.arg in {"name", "app_name"} and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
613
+ if (
614
+ kw.arg in {"name", "app_name"}
615
+ and isinstance(kw.value, ast.Constant)
616
+ and isinstance(kw.value.value, str)
617
+ ):
181
618
  return kw.value.value
182
619
  if node.args:
183
620
  first = node.args[0]
@@ -186,12 +623,14 @@ def _extract_modal_app_name(node: ast.Call) -> str | None:
186
623
  return None
187
624
 
188
625
 
189
- @functools.lru_cache(maxsize=1)
190
626
  def _collect_task_app_choices() -> list[AppChoice]:
627
+ # Clear registry to avoid duplicate registration errors
628
+ registry.clear()
629
+
191
630
  choices: list[AppChoice] = []
192
631
  with contextlib.suppress(Exception):
193
- import synth_ai.demos.demo_task_apps # noqa: F401
194
- choices.extend(_collect_registered_choices())
632
+ _maybe_import("synth_ai.demos.demo_task_apps")
633
+ # Only use discovered task apps, not registered ones (since we moved them to examples)
195
634
  choices.extend(_collect_scanned_task_configs())
196
635
  choices.extend(_collect_modal_scripts())
197
636
 
@@ -210,6 +649,7 @@ def _collect_task_app_choices() -> list[AppChoice]:
210
649
  continue
211
650
  unique[key] = choice
212
651
  ordered.append(choice)
652
+ ordered.sort(key=_app_choice_sort_key)
213
653
  return ordered
214
654
 
215
655
 
@@ -240,6 +680,10 @@ def _collect_scanned_task_configs() -> list[AppChoice]:
240
680
  results: list[AppChoice] = []
241
681
  seen: set[tuple[str, Path]] = set()
242
682
  for root in _candidate_search_roots():
683
+ try:
684
+ root_resolved = root.resolve()
685
+ except Exception:
686
+ continue
243
687
  if not root.exists() or not root.is_dir():
244
688
  continue
245
689
  for path in root.rglob("*.py"):
@@ -269,7 +713,11 @@ def _collect_scanned_task_configs() -> list[AppChoice]:
269
713
  path=path.resolve(),
270
714
  source="discovered",
271
715
  description=f"TaskAppConfig in {path.name} (line {lineno})",
272
- entry_loader=lambda p=path.resolve(), a=app_id: _load_entry_from_path(p, a),
716
+ entry_loader=lambda p=path.resolve(),
717
+ a=app_id,
718
+ roots=(root_resolved,): _load_entry_from_path(
719
+ p, a, module_search_roots=roots
720
+ ),
273
721
  lineno=lineno,
274
722
  )
275
723
  )
@@ -316,15 +764,60 @@ def _collect_modal_scripts() -> list[AppChoice]:
316
764
  return results
317
765
 
318
766
 
767
+ def _app_choice_sort_key(choice: AppChoice) -> tuple[int, int, int, int, int, str, str]:
768
+ """Ranking heuristic so wrapper-style task apps surface first."""
769
+
770
+ # Prioritize apps in the current working directory (demo or otherwise)
771
+ cwd_rank = 1
772
+ try:
773
+ cwd = Path.cwd().resolve()
774
+ if choice.path.is_relative_to(cwd):
775
+ # Check if this is directly in CWD (not in subdirectories like examples/)
776
+ try:
777
+ rel_path = choice.path.relative_to(cwd)
778
+ # If it's in the immediate directory or one level deep, prioritize it
779
+ if len(rel_path.parts) <= 2:
780
+ cwd_rank = 0
781
+ except Exception:
782
+ pass
783
+ except Exception:
784
+ pass
785
+
786
+ # Further prioritize apps in the demo directory if one is set
787
+ demo_rank = 1
788
+ demo_dir = _load_demo_directory()
789
+ if demo_dir and choice.path.is_relative_to(demo_dir):
790
+ demo_rank = 0
791
+
792
+ modal_rank = 1 if choice.modal_script else 0
793
+
794
+ name = choice.path.name.lower()
795
+ file_rank = 3
796
+ if name.endswith("_task_app.py") or name.endswith("task_app.py"):
797
+ file_rank = 0
798
+ elif name.endswith("_app.py") or "task_app" in name:
799
+ file_rank = 1
800
+ elif name.endswith(".py"):
801
+ file_rank = 2
802
+
803
+ directory_rank = 0 if choice.path.parent.name.lower() in {"task_app", "task_apps"} else 1
804
+
805
+ return (
806
+ demo_rank,
807
+ cwd_rank,
808
+ modal_rank,
809
+ file_rank,
810
+ directory_rank,
811
+ choice.app_id,
812
+ str(choice.path),
813
+ )
814
+
815
+
319
816
  def _choice_matches_identifier(choice: AppChoice, identifier: str) -> bool:
320
817
  ident = identifier.strip()
321
818
  if not ident:
322
819
  return False
323
- if ident == choice.app_id or ident == choice.label:
324
- return True
325
- if ident in choice.aliases:
326
- return True
327
- return False
820
+ return ident == choice.app_id or ident == choice.label or ident in choice.aliases
328
821
 
329
822
 
330
823
  def _choice_has_modal_support(choice: AppChoice) -> bool:
@@ -333,10 +826,128 @@ def _choice_has_modal_support(choice: AppChoice) -> bool:
333
826
  try:
334
827
  entry = choice.ensure_entry()
335
828
  except click.ClickException:
336
- return False
829
+ # If we can't load the entry, try to detect Modal support via AST parsing
830
+ return _has_modal_support_in_file(choice.path)
337
831
  return entry.modal is not None
338
832
 
339
833
 
834
+ def _has_modal_support_in_file(path: Path) -> bool:
835
+ """Detect if a file has Modal deployment support by parsing the AST."""
836
+ try:
837
+ source = path.read_text(encoding="utf-8")
838
+ tree = ast.parse(source, filename=str(path))
839
+
840
+ # Look for ModalDeploymentConfig in register_task_app calls
841
+ for node in ast.walk(tree):
842
+ if isinstance(node, ast.Call) and _is_register_task_app_call(node):
843
+ # Check if the entry has modal=ModalDeploymentConfig(...)
844
+ for kw in node.keywords:
845
+ if kw.arg == "entry" and isinstance(kw.value, ast.Call):
846
+ entry_call = kw.value
847
+ if (
848
+ isinstance(entry_call.func, ast.Name)
849
+ and entry_call.func.id == "TaskAppEntry"
850
+ ):
851
+ for entry_kw in entry_call.keywords:
852
+ if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
853
+ modal_call = entry_kw.value
854
+ if (
855
+ isinstance(modal_call.func, ast.Name)
856
+ and modal_call.func.id == "ModalDeploymentConfig"
857
+ ):
858
+ return True
859
+ except Exception:
860
+ pass
861
+ return False
862
+
863
+
864
+ def _extract_modal_config_from_file(path: Path) -> ModalDeploymentConfigType | None:
865
+ """Extract ModalDeploymentConfig from a file by parsing the AST."""
866
+ try:
867
+ source = path.read_text(encoding="utf-8")
868
+ tree = ast.parse(source, filename=str(path))
869
+
870
+ # Look for ModalDeploymentConfig in register_task_app calls
871
+ for node in ast.walk(tree):
872
+ if isinstance(node, ast.Call) and _is_register_task_app_call(node):
873
+ # Check if the entry has modal=ModalDeploymentConfig(...)
874
+ for kw in node.keywords:
875
+ if kw.arg == "entry" and isinstance(kw.value, ast.Call):
876
+ entry_call = kw.value
877
+ if (
878
+ isinstance(entry_call.func, ast.Name)
879
+ and entry_call.func.id == "TaskAppEntry"
880
+ ):
881
+ for entry_kw in entry_call.keywords:
882
+ if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
883
+ modal_call = entry_kw.value
884
+ if (
885
+ isinstance(modal_call.func, ast.Name)
886
+ and modal_call.func.id == "ModalDeploymentConfig"
887
+ ):
888
+ # Extract the arguments to ModalDeploymentConfig
889
+ return _build_modal_config_from_ast(modal_call)
890
+ except Exception:
891
+ pass
892
+ return None
893
+
894
+
895
+ def _build_modal_config_from_ast(modal_call: ast.Call) -> ModalDeploymentConfigType | None:
896
+ """Build a ModalDeploymentConfig from an AST Call node."""
897
+ try:
898
+ # Extract keyword arguments
899
+ kwargs = {}
900
+ for kw in modal_call.keywords:
901
+ if kw.arg and isinstance(kw.value, ast.Constant):
902
+ kwargs[kw.arg] = kw.value.value
903
+ elif kw.arg == "pip_packages" and isinstance(kw.value, ast.List | ast.Tuple):
904
+ # Handle pip_packages list/tuple
905
+ packages: list[str] = []
906
+ value_node = kw.value
907
+ if isinstance(value_node, ast.List | ast.Tuple):
908
+ for elt in value_node.elts:
909
+ if isinstance(elt, ast.Constant):
910
+ packages.append(elt.value)
911
+ kwargs[kw.arg] = tuple(packages)
912
+ elif kw.arg == "extra_local_dirs" and isinstance(kw.value, ast.List | ast.Tuple):
913
+ # Handle extra_local_dirs list/tuple of tuples
914
+ dirs = []
915
+ value_node = kw.value
916
+ if isinstance(value_node, ast.List | ast.Tuple):
917
+ for elt in value_node.elts:
918
+ if isinstance(elt, ast.List | ast.Tuple) and len(elt.elts) == 2:
919
+ src = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
920
+ dst = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
921
+ if src and dst:
922
+ dirs.append((src, dst))
923
+ kwargs[kw.arg] = tuple(dirs)
924
+ elif kw.arg == "secret_names" and isinstance(kw.value, ast.List | ast.Tuple):
925
+ # Handle secret_names list/tuple
926
+ secrets = []
927
+ value_node = kw.value
928
+ if isinstance(value_node, ast.List | ast.Tuple):
929
+ for elt in value_node.elts:
930
+ if isinstance(elt, ast.Constant):
931
+ secrets.append(elt.value)
932
+ kwargs[kw.arg] = tuple(secrets)
933
+ elif kw.arg == "volume_mounts" and isinstance(kw.value, ast.List | ast.Tuple):
934
+ # Handle volume_mounts list/tuple of tuples
935
+ mounts = []
936
+ value_node = kw.value
937
+ if isinstance(value_node, ast.List | ast.Tuple):
938
+ for elt in value_node.elts:
939
+ if isinstance(elt, ast.List | ast.Tuple) and len(elt.elts) == 2:
940
+ name = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
941
+ mount = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
942
+ if name and mount:
943
+ mounts.append((name, mount))
944
+ kwargs[kw.arg] = tuple(mounts)
945
+
946
+ return ModalDeploymentConfig(**kwargs)
947
+ except Exception:
948
+ return None
949
+
950
+
340
951
  def _choice_has_local_support(choice: AppChoice) -> bool:
341
952
  if choice.modal_script:
342
953
  return False
@@ -349,20 +960,29 @@ def _choice_has_local_support(choice: AppChoice) -> bool:
349
960
 
350
961
  def _format_choice(choice: AppChoice, index: int | None = None) -> str:
351
962
  prefix = f"[{index}] " if index is not None else ""
352
- rel_path: str
963
+ # Get file modification timestamp
353
964
  try:
354
- rel_path = str(choice.path.relative_to(REPO_ROOT))
965
+ from datetime import datetime
966
+
967
+ mtime = choice.path.stat().st_mtime
968
+ modified_str = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
969
+ details = f"Modified: {modified_str}"
355
970
  except Exception:
356
- rel_path = str(choice.path)
357
- details = choice.description or f"Located at {rel_path}"
358
- return f"{prefix}{choice.app_id} ({choice.source}) {details}"
971
+ # Fallback if timestamp unavailable
972
+ details = choice.description or "No timestamp available"
973
+ # Format: single line with timestamp
974
+ main_line = f"{prefix}{choice.app_id} ({choice.source}) – {details}"
975
+ return main_line
359
976
 
360
977
 
361
978
  def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
362
979
  click.echo("Select a task app:")
363
980
  for idx, choice in enumerate(choices, start=1):
364
981
  click.echo(_format_choice(choice, idx))
365
- response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
982
+ try:
983
+ response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
984
+ except (Abort, EOFError, KeyboardInterrupt) as exc:
985
+ raise click.ClickException("Task app selection cancelled by user") from exc
366
986
  if not response.isdigit():
367
987
  raise click.ClickException("Selection must be a number")
368
988
  index = int(response)
@@ -373,7 +993,7 @@ def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
373
993
 
374
994
  def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
375
995
  choices = _collect_task_app_choices()
376
- if purpose == "serve":
996
+ if purpose in {"serve", "eval"}:
377
997
  filtered = [c for c in choices if not c.modal_script]
378
998
  elif purpose in {"deploy", "modal-serve"}:
379
999
  filtered = []
@@ -383,6 +1003,8 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
383
1003
  else:
384
1004
  filtered = choices
385
1005
 
1006
+ filtered.sort(key=_app_choice_sort_key)
1007
+
386
1008
  if not filtered:
387
1009
  raise click.ClickException("No task apps discovered for this command.")
388
1010
 
@@ -391,6 +1013,9 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
391
1013
  if not matches:
392
1014
  available = ", ".join(sorted({c.app_id for c in filtered}))
393
1015
  raise click.ClickException(f"Task app '{app_id}' not found. Available: {available}")
1016
+ exact_matches = [c for c in matches if c.app_id == app_id]
1017
+ if len(exact_matches) == 1:
1018
+ return exact_matches[0]
394
1019
  if len(matches) == 1:
395
1020
  return matches[0]
396
1021
  # Prefer entries with modal support when required
@@ -410,21 +1035,160 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
410
1035
  return _prompt_user_for_choice(filtered)
411
1036
 
412
1037
 
413
- def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
414
- resolved = path.resolve()
415
- module_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
1038
+ def _import_task_app_module(
1039
+ resolved: Path,
1040
+ module_name: str,
1041
+ *,
1042
+ namespace_root: Path | None,
1043
+ sys_path_roots: Sequence[Path],
1044
+ ensure_namespace: bool = True,
1045
+ ) -> types.ModuleType:
416
1046
  spec = importlib.util.spec_from_file_location(module_name, str(resolved))
417
1047
  if spec is None or spec.loader is None:
418
1048
  raise click.ClickException(f"Unable to load Python module from {resolved}")
1049
+
419
1050
  module = importlib.util.module_from_spec(spec)
420
1051
  sys.modules[module_name] = module
1052
+
1053
+ with _temporary_sys_path(sys_path_roots):
1054
+ if ensure_namespace and namespace_root is not None and "." in module_name:
1055
+ _ensure_parent_namespace(module_name, namespace_root)
1056
+
1057
+ # Clear registry before importing to avoid duplicate registration errors
1058
+ registry.clear()
1059
+
1060
+ try:
1061
+ spec.loader.exec_module(module)
1062
+ except Exception:
1063
+ # Remove partially-imported module to avoid reuse
1064
+ sys.modules.pop(module_name, None)
1065
+ raise
1066
+
1067
+ return module
1068
+
1069
+
1070
+ @contextlib.contextmanager
1071
+ def _safe_import_context() -> Iterator[None]:
1072
+ """Guard module imports against argparse/uvicorn side effects."""
1073
+
1074
+ original_argv = sys.argv[:]
1075
+ sys.argv = [original_argv[0]] if original_argv else ["python"]
1076
+
1077
+ parser_cls = argparse.ArgumentParser
1078
+ old_parse_args = parser_cls.parse_args
1079
+
1080
+ def _parse_noargs(self, args=None, namespace=None): # type: ignore[override]
1081
+ if args is None:
1082
+ args = []
1083
+ if namespace is None:
1084
+ namespace = argparse.Namespace()
1085
+ try:
1086
+ return old_parse_args(self, args, namespace)
1087
+ except SystemExit:
1088
+ return namespace
1089
+
1090
+ parser_cls.parse_args = _parse_noargs # type: ignore[assignment]
1091
+
1092
+ uvicorn_run = None
1093
+ run_task_app_orig = None
421
1094
  try:
422
- spec.loader.exec_module(module)
423
- except Exception as exc:
424
- raise click.ClickException(f"Failed to import {resolved}: {exc}") from exc
1095
+ import uvicorn # type: ignore
1096
+
1097
+ uvicorn_run = uvicorn.run
1098
+ uvicorn.run = lambda *args, **kwargs: None # type: ignore[assignment]
1099
+ except Exception:
1100
+ uvicorn_run = None
1101
+
1102
+ try:
1103
+ _task_server_patch = cast(
1104
+ Any, importlib.import_module("synth_ai.task.server")
1105
+ )
1106
+ run_task_app_orig = cast(Callable[..., Any], _task_server_patch.run_task_app)
1107
+ _task_server_patch.run_task_app = ( # type: ignore[assignment]
1108
+ lambda *args, **kwargs: None
1109
+ )
1110
+ except Exception:
1111
+ run_task_app_orig = None
1112
+
1113
+ try:
1114
+ yield
1115
+ finally:
1116
+ sys.argv = original_argv
1117
+ parser_cls.parse_args = old_parse_args # type: ignore[assignment]
1118
+ if uvicorn_run is not None:
1119
+ try:
1120
+ import uvicorn # type: ignore
1121
+
1122
+ uvicorn.run = uvicorn_run # type: ignore[assignment]
1123
+ except Exception:
1124
+ pass
1125
+ if run_task_app_orig is not None:
1126
+ try:
1127
+ _task_server_patch = cast(
1128
+ Any, importlib.import_module("synth_ai.task.server")
1129
+ )
1130
+ _task_server_patch.run_task_app = run_task_app_orig # type: ignore[assignment]
1131
+ except Exception:
1132
+ pass
1133
+
1134
+
1135
+ def _load_entry_from_path(
1136
+ path: Path, app_id: str, module_search_roots: Sequence[Path] | None = None
1137
+ ) -> TaskAppEntryType:
1138
+ resolved = path.resolve()
1139
+ search_roots: list[Path] = []
1140
+ seen_roots: set[Path] = set()
1141
+
1142
+ def _append_root(candidate: Path) -> None:
1143
+ try:
1144
+ resolved_root = candidate.resolve()
1145
+ except Exception:
1146
+ return
1147
+ if resolved_root in seen_roots:
1148
+ return
1149
+ seen_roots.add(resolved_root)
1150
+ search_roots.append(resolved_root)
1151
+
1152
+ for root in module_search_roots or []:
1153
+ _append_root(root)
1154
+ _append_root(resolved.parent)
1155
+ _append_root(REPO_ROOT)
1156
+
1157
+ last_error: Exception | None = None
1158
+ module: types.ModuleType | None = None
1159
+
1160
+ for module_name, namespace_root in _possible_module_names(resolved, search_roots):
1161
+ try:
1162
+ with _safe_import_context():
1163
+ module = _import_task_app_module(
1164
+ resolved,
1165
+ module_name,
1166
+ namespace_root=namespace_root,
1167
+ sys_path_roots=search_roots,
1168
+ ensure_namespace=True,
1169
+ )
1170
+ break
1171
+ except Exception as exc: # pragma: no cover - best-effort fallbacks
1172
+ last_error = exc
1173
+ continue
1174
+
1175
+ if module is None:
1176
+ hashed_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
1177
+ try:
1178
+ with _safe_import_context():
1179
+ module = _import_task_app_module(
1180
+ resolved,
1181
+ hashed_name,
1182
+ namespace_root=None,
1183
+ sys_path_roots=search_roots,
1184
+ ensure_namespace=False,
1185
+ )
1186
+ except Exception as exc: # pragma: no cover - propagate meaningful error
1187
+ detail = last_error or exc
1188
+ raise click.ClickException(f"Failed to import {resolved}: {detail}") from detail
425
1189
 
426
- config_obj: TaskAppConfig | None = None
427
- factory_callable: Callable[[], TaskAppConfig] | None = None
1190
+ config_obj: TaskAppConfigType | None = None
1191
+ factory_callable: Callable[[], TaskAppConfigType] | None = None
428
1192
 
429
1193
  for attr_name in dir(module):
430
1194
  try:
@@ -433,7 +1197,11 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
433
1197
  continue
434
1198
  if isinstance(attr, TaskAppConfig) and attr.app_id == app_id:
435
1199
  config_obj = attr
436
- factory_callable = lambda cfg=attr: cfg
1200
+
1201
+ def _return_config(cfg: TaskAppConfigType = attr) -> TaskAppConfigType:
1202
+ return cfg
1203
+
1204
+ factory_callable = _return_config
437
1205
  break
438
1206
 
439
1207
  if factory_callable is None:
@@ -452,28 +1220,47 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
452
1220
  continue
453
1221
  has_required = False
454
1222
  for param in sig.parameters.values():
455
- if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD) and param.default is inspect._empty:
1223
+ if (
1224
+ param.kind
1225
+ in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
1226
+ and param.default is inspect._empty
1227
+ ):
456
1228
  has_required = True
457
1229
  break
458
1230
  if has_required:
459
1231
  continue
460
1232
  try:
461
- result = attr()
1233
+ with _safe_import_context():
1234
+ result = attr()
1235
+ except SystemExit:
1236
+ continue
462
1237
  except Exception:
463
1238
  continue
464
1239
  if isinstance(result, TaskAppConfig) and result.app_id == app_id:
465
- def _factory() -> TaskAppConfig:
466
- return attr() # type: ignore[call-arg]
467
- factory_callable = _factory
1240
+ # Bind attr to a local and close over it without exposing parameters
1241
+ bound_func: Callable[[], TaskAppConfig] = cast(Callable[[], TaskAppConfig], attr) # type: ignore[assignment]
1242
+
1243
+ def _factory_noargs(
1244
+ func: Callable[[], TaskAppConfigType] = bound_func,
1245
+ ) -> TaskAppConfigType:
1246
+ return func()
1247
+
1248
+ factory_callable = _factory_noargs
468
1249
  config_obj = result
469
1250
  break
470
1251
 
1252
+ # If no TaskAppConfig found directly, check if it was registered via register_task_app
471
1253
  if factory_callable is None or config_obj is None:
472
- raise click.ClickException(
473
- f"Could not locate TaskAppConfig for '{app_id}' in {resolved}."
474
- )
475
-
476
- modal_cfg: ModalDeploymentConfig | None = None
1254
+ try:
1255
+ # Check if the app was registered in the registry
1256
+ entry = registry.get(app_id)
1257
+ return entry
1258
+ except KeyError as exc:
1259
+ raise click.ClickException(
1260
+ f"Could not locate TaskAppConfig for '{app_id}' in {resolved}."
1261
+ ) from exc
1262
+
1263
+ modal_cfg: ModalDeploymentConfigType | None = None
477
1264
  for attr_name in dir(module):
478
1265
  try:
479
1266
  attr = getattr(module, attr_name)
@@ -483,6 +1270,10 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
483
1270
  modal_cfg = attr
484
1271
  break
485
1272
 
1273
+ # If no ModalDeploymentConfig found, try to detect it via AST parsing
1274
+ if modal_cfg is None:
1275
+ modal_cfg = _extract_modal_config_from_file(resolved)
1276
+
486
1277
  description = inspect.getdoc(module) or f"Discovered task app in {resolved.name}"
487
1278
  env_files: Iterable[str] = getattr(module, "ENV_FILES", ()) # type: ignore[arg-type]
488
1279
 
@@ -507,20 +1298,283 @@ def _resolve_env_paths_for_script(script_path: Path, explicit: Sequence[str]) ->
507
1298
  resolved.append(p)
508
1299
  return resolved
509
1300
 
1301
+ # Always prompt for env file selection instead of auto-loading defaults
510
1302
  script_dir = script_path.parent.resolve()
511
- fallback_order = [
512
- script_dir / ".env",
513
- REPO_ROOT / "examples" / "rl" / ".env",
514
- REPO_ROOT / "examples" / "warming_up_to_rl" / ".env",
515
- REPO_ROOT / ".env",
516
- ]
517
- resolved = [p for p in fallback_order if p.exists()]
518
- if resolved:
519
- return resolved
520
- created = _interactive_create_env(script_dir)
521
- if created is None:
522
- raise click.ClickException("Env file required (--env-file) for this task app")
523
- return [created]
1303
+ cwd = Path.cwd()
1304
+
1305
+ # Look for env files in current working directory first, then repo root
1306
+ env_candidates = []
1307
+
1308
+ # Add CWD env files first (prioritized)
1309
+ cwd_env_files = sorted(cwd.glob("**/*.env"))
1310
+ env_candidates.extend(cwd_env_files)
1311
+
1312
+ # Add repo root env files
1313
+ repo_env_files = sorted(REPO_ROOT.glob("**/*.env"))
1314
+ # Avoid duplicates
1315
+ for repo_file in repo_env_files:
1316
+ if repo_file not in env_candidates:
1317
+ env_candidates.append(repo_file)
1318
+
1319
+ if not env_candidates:
1320
+ created = _interactive_create_env(script_dir)
1321
+ if created is None:
1322
+ raise click.ClickException("Env file required (--env-file) for this task app")
1323
+ return [created]
1324
+
1325
+ click.echo("Select env file to load:")
1326
+ for idx, path in enumerate(env_candidates, start=1):
1327
+ click.echo(f" {idx}) {path.resolve()}")
1328
+ choice = click.prompt("Enter choice", type=click.IntRange(1, len(env_candidates)), default=1)
1329
+ return [env_candidates[choice - 1]]
1330
+
1331
+
1332
+ def _path_is_within(child: Path, parent: Path) -> bool:
1333
+ try:
1334
+ child.resolve().relative_to(parent.resolve())
1335
+ return True
1336
+ except Exception:
1337
+ return False
1338
+
1339
+
1340
+ @functools.lru_cache(maxsize=16)
1341
+ def _is_modal_shim(path_str: str) -> bool:
1342
+ """Return True if the candidate CLI path refers to the synth-ai shim."""
1343
+
1344
+ path = Path(path_str)
1345
+ try:
1346
+ resolved = path.resolve(strict=True)
1347
+ except Exception:
1348
+ resolved = path
1349
+
1350
+ if not resolved.exists() or resolved.is_dir():
1351
+ return False
1352
+
1353
+ snippet = ""
1354
+ try:
1355
+ snippet = resolved.read_bytes()[:4096].decode("utf-8", errors="ignore")
1356
+ except Exception:
1357
+ snippet = ""
1358
+
1359
+ shim_markers = (
1360
+ "synth_ai.cli._modal_wrapper",
1361
+ "from modal.__main__ import main",
1362
+ "import modal.__main__",
1363
+ "run_module('modal.__main__'",
1364
+ )
1365
+ if snippet and any(marker in snippet for marker in shim_markers):
1366
+ return True
1367
+
1368
+ try:
1369
+ size = resolved.stat().st_size
1370
+ except Exception:
1371
+ size = None
1372
+
1373
+ if (
1374
+ size is not None
1375
+ and size < 2048
1376
+ and "python" in (snippet.splitlines() or [""])[0]
1377
+ and (
1378
+ "modal.__main__" in snippet
1379
+ or "modal.__main__" in snippet.replace(" ", "")
1380
+ )
1381
+ ):
1382
+ return True
1383
+
1384
+ virtual_env = os.environ.get("VIRTUAL_ENV")
1385
+ if virtual_env and _path_is_within(resolved, Path(virtual_env)):
1386
+ return True
1387
+
1388
+ if _path_is_within(resolved, REPO_ROOT):
1389
+ return True
1390
+
1391
+ uv_tools_dir = Path.home() / ".local" / "share" / "uv" / "tools"
1392
+ return uv_tools_dir.exists() and _path_is_within(resolved, uv_tools_dir)
1393
+
1394
+
1395
+ def _find_modal_executable(modal_cli: str) -> tuple[str | None, str | None]:
1396
+ """Return the first non-shim executable and the first shim discovered on PATH."""
1397
+
1398
+ if not modal_cli:
1399
+ modal_cli = "modal"
1400
+
1401
+ candidate_path = Path(modal_cli).expanduser()
1402
+ if candidate_path.is_absolute() or len(candidate_path.parts) > 1:
1403
+ resolved_candidate = candidate_path
1404
+ if not resolved_candidate.is_absolute():
1405
+ resolved_candidate = (Path.cwd() / resolved_candidate).resolve()
1406
+ else:
1407
+ resolved_candidate = resolved_candidate.resolve()
1408
+ if not resolved_candidate.exists():
1409
+ raise click.ClickException(f"--modal-cli path does not exist: {resolved_candidate}")
1410
+ if not os.access(resolved_candidate, os.X_OK):
1411
+ raise click.ClickException(f"--modal-cli is not executable: {resolved_candidate}")
1412
+ return str(resolved_candidate), None
1413
+
1414
+ path_env = os.environ.get("PATH", "")
1415
+ if not path_env:
1416
+ return None, None
1417
+
1418
+ seen_dirs: set[str] = set()
1419
+ seen_candidates: set[str] = set()
1420
+ shim_path: str | None = None
1421
+
1422
+ for raw_entry in path_env.split(os.pathsep):
1423
+ if not raw_entry:
1424
+ continue
1425
+ try:
1426
+ resolved_entry = str(Path(raw_entry).resolve())
1427
+ except Exception:
1428
+ resolved_entry = os.path.normpath(raw_entry)
1429
+ if resolved_entry in seen_dirs:
1430
+ continue
1431
+ seen_dirs.add(resolved_entry)
1432
+
1433
+ candidate = shutil.which(modal_cli, path=raw_entry)
1434
+ if candidate is None:
1435
+ continue
1436
+ if candidate in seen_candidates:
1437
+ continue
1438
+ seen_candidates.add(candidate)
1439
+
1440
+ if _is_modal_shim(candidate):
1441
+ if shim_path is None:
1442
+ shim_path = candidate
1443
+ continue
1444
+ return candidate, shim_path
1445
+
1446
+ return None, shim_path
1447
+
1448
+
1449
+ def _modal_command_prefix(modal_cli: str) -> list[str]:
1450
+ """Resolve a command prefix for invoking the Modal CLI within the active environment."""
1451
+
1452
+ force_wrapper_env = os.environ.get("SYNTH_FORCE_MODAL_WRAPPER", "").strip().lower()
1453
+ if force_wrapper_env in {"1", "true", "yes"}:
1454
+ click.secho(
1455
+ "[modal-prefix] SYNTH_FORCE_MODAL_WRAPPER=1 -> using in-process wrapper",
1456
+ fg="yellow",
1457
+ )
1458
+ return [sys.executable, "-m", "synth_ai.cli._modal_wrapper"]
1459
+
1460
+ lookup = modal_cli or "modal"
1461
+ spec = importlib.util.find_spec("modal") if lookup == "modal" else None
1462
+
1463
+ preferred, shim_candidate = _find_modal_executable(lookup)
1464
+ if preferred is not None:
1465
+ detail = f"[modal-prefix] modal_cli={lookup} selected={preferred}"
1466
+ if lookup == "modal":
1467
+ detail += f" spec={'yes' if spec else 'no'}"
1468
+ click.secho(detail, fg="cyan")
1469
+ return [preferred]
1470
+
1471
+ if lookup != "modal":
1472
+ raise click.ClickException(f"Modal CLI not found (looked for '{lookup}')")
1473
+
1474
+ if spec is not None:
1475
+ warning = "[modal-prefix] Using synth-ai modal shim; pass --modal-cli /path/to/modal to override."
1476
+ if shim_candidate is not None:
1477
+ warning = (
1478
+ f"[modal-prefix] Using synth-ai modal shim at {shim_candidate}; "
1479
+ "pass --modal-cli /path/to/modal to override."
1480
+ )
1481
+ click.secho(warning, fg="yellow")
1482
+ click.secho(
1483
+ "[modal-prefix] modal_cli=modal selected=module-wrapper spec=yes",
1484
+ fg="yellow",
1485
+ )
1486
+ return [sys.executable, "-m", "synth_ai.cli._modal_wrapper"]
1487
+
1488
+ if shim_candidate is not None:
1489
+ raise click.ClickException(
1490
+ "Modal CLI resolution found the synth-ai shim but the 'modal' package "
1491
+ "is not importable in this environment. Install the official Modal CLI "
1492
+ "or pass --modal-cli with its path."
1493
+ )
1494
+
1495
+ raise click.ClickException(
1496
+ "Modal CLI not found. Install the 'modal' package in this environment or pass "
1497
+ "--modal-cli with an explicit path."
1498
+ )
1499
+
1500
+
1501
+ def _build_modal_app_wrapper(original_script: Path) -> tuple[Path, Path]:
1502
+ source_dir = original_script.parent.resolve()
1503
+ repo_root = REPO_ROOT
1504
+ temp_root = Path(tempfile.mkdtemp(prefix="synth_modal_app_"))
1505
+
1506
+ wrapper_source = textwrap.dedent(
1507
+ f"""
1508
+ from importlib import util as _util
1509
+ from pathlib import Path as _Path
1510
+ import sys as _sys
1511
+
1512
+ _source_dir = _Path({str(source_dir)!r}).resolve()
1513
+ _module_path = _source_dir / {original_script.name!r}
1514
+ _package_name = _source_dir.name
1515
+ _repo_root = _Path({str(repo_root)!r}).resolve()
1516
+ _synth_dir = _repo_root / "synth_ai"
1517
+
1518
+ for _path in (str(_source_dir), str(_source_dir.parent), str(_repo_root)):
1519
+ if _path not in _sys.path:
1520
+ _sys.path.insert(0, _path)
1521
+
1522
+ _spec = _util.spec_from_file_location("_synth_modal_target", str(_module_path))
1523
+ if _spec is None or _spec.loader is None:
1524
+ raise SystemExit("Unable to load modal task app from {original_script}")
1525
+ _module = _util.module_from_spec(_spec)
1526
+ _sys.modules.setdefault("_synth_modal_target", _module)
1527
+ _spec.loader.exec_module(_module)
1528
+
1529
+ try:
1530
+ from modal import App as _ModalApp
1531
+ from modal import Image as _ModalImage
1532
+ except Exception:
1533
+ _ModalApp = None # type: ignore[assignment]
1534
+ _ModalImage = None # type: ignore[assignment]
1535
+
1536
+ def _apply_local_mounts(image):
1537
+ if _ModalImage is None or not isinstance(image, _ModalImage):
1538
+ return image
1539
+ mounts = [
1540
+ (str(_source_dir), f"/root/{{_package_name}}"),
1541
+ (str(_synth_dir), "/root/synth_ai"),
1542
+ ]
1543
+ for local_path, remote_path in mounts:
1544
+ try:
1545
+ image = image.add_local_dir(local_path, remote_path=remote_path)
1546
+ except Exception:
1547
+ pass
1548
+ return image
1549
+
1550
+ if hasattr(_module, "image"):
1551
+ _module.image = _apply_local_mounts(getattr(_module, "image"))
1552
+
1553
+ _candidate = getattr(_module, "app", None)
1554
+ if _ModalApp is None or not isinstance(_candidate, _ModalApp):
1555
+ candidate_modal_app = getattr(_module, "modal_app", None)
1556
+ if _ModalApp is not None and isinstance(candidate_modal_app, _ModalApp):
1557
+ _candidate = candidate_modal_app
1558
+ setattr(_module, "app", _candidate)
1559
+
1560
+ if _ModalApp is not None and not isinstance(_candidate, _ModalApp):
1561
+ raise SystemExit(
1562
+ "Modal task app must expose an 'app = modal.App(...)' (or modal_app) attribute."
1563
+ )
1564
+
1565
+ for remote_path in ("/root/synth_ai", f"/root/{{_package_name}}"):
1566
+ if remote_path not in _sys.path:
1567
+ _sys.path.insert(0, remote_path)
1568
+
1569
+ globals().update({{k: v for k, v in vars(_module).items() if not k.startswith("__")}})
1570
+ app = getattr(_module, "app")
1571
+ """
1572
+ ).strip()
1573
+
1574
+ wrapper_path = temp_root / "__modal_wrapper__.py"
1575
+ wrapper_path.write_text(wrapper_source + "\n", encoding="utf-8")
1576
+ return wrapper_path, temp_root
1577
+
524
1578
 
525
1579
 
526
1580
  def _run_modal_script(
@@ -532,43 +1586,150 @@ def _run_modal_script(
532
1586
  modal_name: str | None = None,
533
1587
  dry_run: bool = False,
534
1588
  ) -> None:
535
- modal_path = shutil.which(modal_cli)
536
- if modal_path is None:
537
- raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
538
-
539
1589
  env_paths_list = [Path(p).resolve() for p in env_paths]
540
1590
  path_strings = [str(p) for p in env_paths_list]
541
1591
  _load_env_files_into_process(path_strings)
542
1592
  _ensure_env_values(env_paths_list, script_path.parent)
543
1593
  _load_env_values(env_paths_list)
1594
+ # Ensure ENVIRONMENT_API_KEY is uploaded to backend for this org (matches registry path behavior)
1595
+ try:
1596
+ _preflight_env_key(env_paths_list, crash_on_failure=True)
1597
+ except Exception as _pf_err:
1598
+ raise click.ClickException(str(_pf_err)) from _pf_err
1599
+
1600
+ proc_env = os.environ.copy()
1601
+ pythonpath_entries: list[str] = []
1602
+ script_dir = script_path.parent.resolve()
1603
+ pythonpath_entries.append(str(script_dir))
1604
+ if (script_dir / "__init__.py").exists():
1605
+ # Script lives inside a package; ensure the parent package directory is importable.
1606
+ pythonpath_entries.append(str(script_dir.parent.resolve()))
1607
+ pythonpath_entries.append(str(REPO_ROOT))
1608
+ existing_pp = proc_env.get("PYTHONPATH")
1609
+ if existing_pp:
1610
+ pythonpath_entries.append(existing_pp)
1611
+ unique_paths = list(dict.fromkeys(pythonpath_entries))
1612
+ proc_env["PYTHONPATH"] = os.pathsep.join(unique_paths)
1613
+
1614
+ wrapper_info: tuple[Path, Path] | None = None
1615
+ target_script = script_path
1616
+ if command in {"serve", "deploy"}:
1617
+ wrapper_path, temp_root = _build_modal_app_wrapper(script_path)
1618
+ wrapper_info = (wrapper_path, temp_root)
1619
+ target_script = wrapper_path
1620
+
1621
+ # Ensure the wrapper has access to the Synth AI source for intra-repo imports
1622
+ if "PYTHONPATH" in proc_env:
1623
+ proc_env["PYTHONPATH"] = os.pathsep.join(
1624
+ [str(REPO_ROOT)] + proc_env["PYTHONPATH"].split(os.pathsep)
1625
+ )
1626
+ else:
1627
+ proc_env["PYTHONPATH"] = str(REPO_ROOT)
544
1628
 
545
- cmd = [modal_path, command, str(script_path)]
546
- if modal_name:
1629
+ cmd = [*_modal_command_prefix(modal_cli), command, str(target_script)]
1630
+ if modal_name and command == "deploy":
547
1631
  cmd.extend(["--name", modal_name])
548
1632
  if dry_run:
549
- click.echo("Dry run: " + " ".join(cmd))
1633
+ click.echo(
1634
+ "Dry run: " + " ".join(shlex.quote(component) for component in cmd),
1635
+ err=False,
1636
+ )
550
1637
  return
1638
+ click.secho(
1639
+ "[modal-exec] " + " ".join(shlex.quote(component) for component in cmd),
1640
+ fg="cyan",
1641
+ )
551
1642
  try:
552
- subprocess.run(cmd, check=True)
1643
+ # Stream output live for better diagnostics
1644
+ proc = subprocess.Popen(
1645
+ cmd,
1646
+ stdout=subprocess.PIPE,
1647
+ stderr=subprocess.STDOUT,
1648
+ text=True,
1649
+ bufsize=1,
1650
+ env=proc_env,
1651
+ )
1652
+ task_app_url = None
1653
+ assert proc.stdout is not None
1654
+ for line in proc.stdout:
1655
+ click.echo(line, nl=False)
1656
+ if task_app_url is None and ("modal.run" in line and "=>" in line):
1657
+ parts = line.split("=>")
1658
+ if len(parts) >= 2:
1659
+ task_app_url = parts[-1].strip()
1660
+ if task_app_url and env_paths_list:
1661
+ env_file = env_paths_list[0]
1662
+ _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1663
+ click.echo(f"\n✓ Task app URL: {task_app_url}\n")
1664
+ rc = proc.wait()
1665
+ if rc != 0:
1666
+ raise subprocess.CalledProcessError(rc, cmd)
553
1667
  except subprocess.CalledProcessError as exc:
554
- raise click.ClickException(f"modal {command} failed with exit code {exc.returncode}") from exc
1668
+ raise click.ClickException(
1669
+ f"modal {command} failed with exit code {exc.returncode}"
1670
+ ) from exc
1671
+ finally:
1672
+ if wrapper_info is not None:
1673
+ wrapper_path, temp_root = wrapper_info
1674
+ with contextlib.suppress(Exception):
1675
+ wrapper_path.unlink(missing_ok=True)
1676
+ shutil.rmtree(temp_root, ignore_errors=True)
555
1677
 
556
1678
 
557
- def _preflight_env_key() -> None:
1679
+ def _preflight_env_key(env_paths: Sequence[Path] | None = None, *, crash_on_failure: bool = False) -> None:
558
1680
  try:
559
- raw_backend = os.environ.get("BACKEND_BASE_URL") or os.environ.get("SYNTH_BASE_URL") or "http://localhost:8000/api"
560
- backend_base = raw_backend.rstrip('/')
561
- if not backend_base.endswith('/api'):
562
- backend_base = backend_base + '/api'
1681
+ raw_backend = (
1682
+ os.environ.get("BACKEND_BASE_URL")
1683
+ or os.environ.get("SYNTH_BASE_URL")
1684
+ or f"{PROD_BASE_URL_DEFAULT}/api"
1685
+ )
1686
+ backend_base = raw_backend.rstrip("/")
1687
+ if not backend_base.endswith("/api"):
1688
+ backend_base = backend_base + "/api"
563
1689
  synth_key = os.environ.get("SYNTH_API_KEY") or ""
564
1690
  env_api_key = (
565
- os.environ.get("ENVIRONMENT_API_KEY")
566
- or os.environ.get("dev_environment_api_key")
567
- or os.environ.get("DEV_ENVIRONMENT_API_KEY")
568
- or ""
569
- )
1691
+ os.environ.get("ENVIRONMENT_API_KEY") or os.environ.get("DEV_ENVIRONMENT_API_KEY") or ""
1692
+ ).strip()
1693
+
1694
+ def _preview(value: str) -> str:
1695
+ if len(value) <= 10:
1696
+ return value
1697
+ return f"{value[:6]}...{value[-4:]}"
1698
+
1699
+ minted = False
1700
+ if not env_api_key:
1701
+ secrets_module = _maybe_import("synth_ai.learning.rl.secrets")
1702
+ try:
1703
+ if secrets_module is None:
1704
+ raise RuntimeError("secrets module unavailable")
1705
+ mint_env_key = secrets_module.mint_environment_api_key
1706
+ env_api_key = mint_env_key()
1707
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1708
+ os.environ.setdefault("DEV_ENVIRONMENT_API_KEY", env_api_key)
1709
+ minted = True
1710
+ click.echo(
1711
+ f"[preflight] minted ENVIRONMENT_API_KEY ({_preview(env_api_key)})"
1712
+ )
1713
+ except Exception as mint_err:
1714
+ if crash_on_failure:
1715
+ raise click.ClickException(
1716
+ f"[CRITICAL] Failed to mint ENVIRONMENT_API_KEY: {mint_err}"
1717
+ ) from mint_err
1718
+ click.echo(
1719
+ f"[WARN] Failed to mint ENVIRONMENT_API_KEY automatically ({mint_err}); proceeding without upload"
1720
+ )
1721
+
1722
+ if env_api_key and not os.environ.get("ENVIRONMENT_API_KEY"):
1723
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1724
+ if env_api_key and not os.environ.get("DEV_ENVIRONMENT_API_KEY"):
1725
+ os.environ["DEV_ENVIRONMENT_API_KEY"] = env_api_key
1726
+
1727
+ if minted:
1728
+ _persist_env_api_key(env_api_key, env_paths)
1729
+
570
1730
  if synth_key and env_api_key:
571
1731
  import base64
1732
+
572
1733
  import httpx
573
1734
 
574
1735
  click.echo(f"[preflight] backend={backend_base}")
@@ -580,71 +1741,216 @@ def _preflight_env_key() -> None:
580
1741
  try:
581
1742
  from nacl.public import PublicKey, SealedBox
582
1743
 
583
- pub = PublicKey(base64.b64decode(pk, validate=True))
1744
+ # Decode public key and build sealed box
1745
+ pk_bytes = base64.b64decode(pk, validate=True)
1746
+ pub = PublicKey(pk_bytes)
584
1747
  sb = SealedBox(pub)
585
- ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode('utf-8'))).decode()
1748
+
1749
+ # Encrypt plaintext key
1750
+ ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode("utf-8"))).decode()
586
1751
  payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
587
- with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}) as c:
1752
+
1753
+ # Emit diagnostic logging (safe previews + hashes only)
1754
+ try:
1755
+ import hashlib as _hash
1756
+
1757
+ # Backend URL context
1758
+ click.echo(f"[preflight] posting to {backend_base.rstrip('/')}/v1/env-keys")
1759
+
1760
+ # Public key diagnostics
1761
+ pk_sha256 = _hash.sha256(pk_bytes).hexdigest()
1762
+ click.echo(
1763
+ f"[preflight] public_key: b64_len={len(pk)} sha256={pk_sha256} head={pk[:16]} tail={pk[-16:]}"
1764
+ )
1765
+
1766
+ # Plaintext diagnostics (never print full secret)
1767
+ _plain = env_api_key
1768
+ _plen = len(_plain)
1769
+ _ppref = (_plain[:6] + "…") if _plen > 10 else _plain
1770
+ _psuf = ("…" + _plain[-4:]) if _plen > 10 else ""
1771
+ _has_ws = any(ch.isspace() for ch in _plain)
1772
+ click.echo(
1773
+ f"[preflight] plaintext: len={_plen} preview={_ppref}{_psuf} has_ws={bool(_has_ws)}"
1774
+ )
1775
+
1776
+ # Ciphertext diagnostics
1777
+ try:
1778
+ _ct_bytes = base64.b64decode(ct_b64, validate=True)
1779
+ _ct_sha256 = _hash.sha256(_ct_bytes).hexdigest()
1780
+ click.echo(
1781
+ f"[preflight] ciphertext: b64_len={len(ct_b64)} sha256={_ct_sha256} head={ct_b64[:16]} tail={ct_b64[-16:]}"
1782
+ )
1783
+ except Exception:
1784
+ click.echo("[preflight] ciphertext: invalid base64 (unexpected)")
1785
+ except Exception:
1786
+ # Best-effort logging only
1787
+ pass
1788
+ with httpx.Client(
1789
+ timeout=15.0,
1790
+ headers={
1791
+ "Authorization": f"Bearer {synth_key}",
1792
+ "Content-Type": "application/json",
1793
+ },
1794
+ ) as c:
588
1795
  click.echo("[preflight] upserting env key…")
589
1796
  up = c.post(f"{backend_base.rstrip('/')}/v1/env-keys", json=payload)
590
- click.echo(f"[preflight] upsert status={up.status_code}")
591
- click.echo("[preflight] verifying env key presence…")
592
- ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
593
- if ver.status_code == 200 and (ver.json() or {}).get("present"):
594
- click.echo("✅ ENVIRONMENT_API_KEY upserted and verified in backend")
1797
+ body_snip = ""
1798
+ try:
1799
+ body_snip = up.text[:400] if up.text else ""
1800
+ except Exception:
1801
+ body_snip = ""
1802
+ click.echo(f"[preflight] upsert status={up.status_code}{(' body='+body_snip) if body_snip else ''}")
1803
+
1804
+ # If upload succeeded (2xx), consider it successful even if verification fails
1805
+ # This handles cases where verification endpoint has issues
1806
+ if 200 <= up.status_code < 300:
1807
+ key_preview = (
1808
+ _preview(env_api_key)
1809
+ )
1810
+ click.echo(
1811
+ f"✅ ENVIRONMENT_API_KEY uploaded successfully ({key_preview})"
1812
+ )
1813
+
1814
+ # Try verification, but don't fail if it doesn't work
1815
+ click.echo("[preflight] verifying env key presence…")
1816
+ try:
1817
+ ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
1818
+ if ver.status_code == 200 and (ver.json() or {}).get("present"):
1819
+ click.echo("✅ Key verified in backend")
1820
+ else:
1821
+ click.echo(
1822
+ f"⚠️ Verification returned {ver.status_code}, but upload succeeded - proceeding"
1823
+ )
1824
+ except Exception as verify_err:
1825
+ click.echo(
1826
+ f"⚠️ Verification check failed ({verify_err}), but upload succeeded - proceeding"
1827
+ )
595
1828
  else:
596
- click.echo("[WARN] ENVIRONMENT_API_KEY verification failed; proceeding anyway")
597
- except Exception:
598
- click.echo("[WARN] Failed to encrypt/upload ENVIRONMENT_API_KEY; proceeding anyway")
599
- except Exception:
600
- click.echo("[WARN] Backend preflight for ENVIRONMENT_API_KEY failed; proceeding anyway")
1829
+ error_msg = (
1830
+ f"ENVIRONMENT_API_KEY upload failed with status {up.status_code}"
1831
+ + (f" body={body_snip}" if body_snip else "")
1832
+ )
1833
+ if crash_on_failure:
1834
+ raise click.ClickException(f"[CRITICAL] {error_msg}")
1835
+ click.echo(f"[WARN] {error_msg}; proceeding anyway")
1836
+ except Exception as e:
1837
+ error_msg = f"Failed to encrypt/upload ENVIRONMENT_API_KEY: {e}"
1838
+ if crash_on_failure:
1839
+ raise click.ClickException(f"[CRITICAL] {error_msg}") from e
1840
+ click.echo(f"[WARN] {error_msg}; proceeding anyway")
1841
+ except Exception as e:
1842
+ error_msg = f"Backend preflight for ENVIRONMENT_API_KEY failed: {e}"
1843
+ if crash_on_failure:
1844
+ raise click.ClickException(f"[CRITICAL] {error_msg}") from e
1845
+ click.echo(f"[WARN] {error_msg}; proceeding anyway")
601
1846
 
602
1847
 
603
1848
  def _run_modal_with_entry(
604
- entry: TaskAppEntry,
605
- modal_cfg: ModalDeploymentConfig,
1849
+ entry: TaskAppEntryType,
1850
+ modal_cfg: ModalDeploymentConfigType,
606
1851
  modal_cli: str,
607
1852
  modal_name: str | None,
608
1853
  env_paths: list[Path],
609
1854
  command: str,
610
1855
  *,
611
1856
  dry_run: bool = False,
1857
+ original_path: Path | None = None,
612
1858
  ) -> None:
613
- modal_path = shutil.which(modal_cli)
614
- if modal_path is None:
615
- raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
616
-
617
1859
  env_paths_list = [Path(p).resolve() for p in env_paths]
618
1860
  dotenv_paths = [str(p) for p in env_paths_list]
619
1861
  _load_env_files_into_process(dotenv_paths)
620
1862
  fallback_dir = env_paths_list[0].parent if env_paths_list else Path.cwd()
621
1863
  _ensure_env_values(env_paths_list, fallback_dir)
622
1864
  _load_env_values(env_paths_list)
623
- _preflight_env_key()
1865
+ _preflight_env_key(env_paths_list, crash_on_failure=True)
1866
+
1867
+ inline_secret_values: dict[str, str] = {}
1868
+ env_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
1869
+ if env_key:
1870
+ inline_secret_values["ENVIRONMENT_API_KEY"] = env_key
1871
+ inline_secret_values.setdefault("DEV_ENVIRONMENT_API_KEY", env_key)
1872
+ aliases = os.environ.get("ENVIRONMENT_API_KEY_ALIASES", "").strip()
1873
+ if aliases:
1874
+ inline_secret_values["ENVIRONMENT_API_KEY_ALIASES"] = aliases
1875
+ for vendor_key in ("GROQ_API_KEY", "OPENAI_API_KEY"):
1876
+ val = os.environ.get(vendor_key, "").strip()
1877
+ if val:
1878
+ inline_secret_values[vendor_key] = val
1879
+
1880
+ if inline_secret_values:
1881
+ preview = inline_secret_values.get("ENVIRONMENT_API_KEY", "")
1882
+ shown = f"{preview[:6]}...{preview[-4:]}" if preview and len(preview) > 10 else preview
1883
+ click.echo(f"[deploy] inline ENVIRONMENT_API_KEY prepared ({shown})")
1884
+ else:
1885
+ click.echo("[deploy] no inline ENVIRONMENT_API_KEY found; relying on Modal secrets/dotenv")
624
1886
 
625
1887
  script_path = _write_modal_entrypoint(
626
1888
  entry,
627
1889
  modal_cfg,
628
1890
  modal_name,
629
1891
  dotenv_paths=dotenv_paths,
1892
+ original_path=original_path,
1893
+ inline_secret_values=inline_secret_values,
630
1894
  )
631
- cmd = [modal_path, command, str(script_path)]
1895
+ cmd = [*_modal_command_prefix(modal_cli), command, str(script_path)]
1896
+ if modal_name and command == "deploy":
1897
+ cmd.extend(["--name", modal_name])
1898
+
1899
+ proc_env = os.environ.copy()
1900
+ pythonpath_entries: list[str] = [str(REPO_ROOT)]
1901
+ if original_path is not None:
1902
+ source_dir = Path(original_path).resolve().parent
1903
+ pythonpath_entries.insert(0, str(source_dir))
1904
+ existing_pp = proc_env.get("PYTHONPATH")
1905
+ if existing_pp:
1906
+ pythonpath_entries.append(existing_pp)
1907
+ proc_env["PYTHONPATH"] = os.pathsep.join(list(dict.fromkeys(pythonpath_entries)))
632
1908
 
633
1909
  if dry_run:
634
- click.echo("Dry run: " + " ".join(cmd))
1910
+ click.echo("Dry run: " + " ".join(shlex.quote(component) for component in cmd))
635
1911
  script_path.unlink(missing_ok=True)
636
1912
  return
1913
+ click.secho(
1914
+ "[modal-exec] " + " ".join(shlex.quote(component) for component in cmd),
1915
+ fg="cyan",
1916
+ )
637
1917
 
638
1918
  try:
639
- subprocess.run(cmd, check=True)
1919
+ # Stream output live for better diagnostics
1920
+ proc = subprocess.Popen(
1921
+ cmd,
1922
+ stdout=subprocess.PIPE,
1923
+ stderr=subprocess.STDOUT,
1924
+ text=True,
1925
+ bufsize=1,
1926
+ env=proc_env,
1927
+ )
1928
+ task_app_url = None
1929
+ assert proc.stdout is not None
1930
+ for line in proc.stdout:
1931
+ # Echo lines as they arrive
1932
+ click.echo(line, nl=False)
1933
+ # Look for lines containing modal.run URLs
1934
+ if task_app_url is None and ("modal.run" in line and "=>" in line):
1935
+ parts = line.split("=>")
1936
+ if len(parts) >= 2:
1937
+ task_app_url = parts[-1].strip()
1938
+ # Save URL immediately for convenience
1939
+ if task_app_url and env_paths_list:
1940
+ env_file = env_paths_list[0]
1941
+ _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1942
+ click.echo(f"\n✓ Task app URL: {task_app_url}\n")
1943
+ rc = proc.wait()
1944
+ if rc != 0:
1945
+ raise subprocess.CalledProcessError(rc, cmd)
640
1946
  except subprocess.CalledProcessError as exc:
641
- raise click.ClickException(f"modal {command} failed with exit code {exc.returncode}") from exc
1947
+ raise click.ClickException(
1948
+ f"modal {command} failed with exit code {exc.returncode}"
1949
+ ) from exc
642
1950
  finally:
643
1951
  script_path.unlink(missing_ok=True)
644
1952
 
645
1953
 
646
-
647
-
648
1954
  def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[str, str]:
649
1955
  values: dict[str, str] = {}
650
1956
  for p in paths:
@@ -653,15 +1959,17 @@ def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[st
653
1959
  except FileNotFoundError:
654
1960
  continue
655
1961
  for line in content.splitlines():
656
- if not line or line.lstrip().startswith('#') or '=' not in line:
1962
+ if not line or line.lstrip().startswith("#") or "=" not in line:
657
1963
  continue
658
- key, value = line.split('=', 1)
1964
+ key, value = line.split("=", 1)
659
1965
  if key and key not in values:
660
1966
  values[key.strip()] = value.strip()
661
1967
  if not allow_empty and not values:
662
1968
  raise click.ClickException("No environment values found")
663
1969
  os.environ.update({k: v for k, v in values.items() if k and v})
664
1970
  return values
1971
+
1972
+
665
1973
  def _interactive_create_env(target_dir: Path) -> Path | None:
666
1974
  env_path = (target_dir / ".env").resolve()
667
1975
  if env_path.exists():
@@ -680,9 +1988,9 @@ def _parse_env_file(path: Path) -> dict[str, str]:
680
1988
  data: dict[str, str] = {}
681
1989
  try:
682
1990
  for line in path.read_text(encoding="utf-8").splitlines():
683
- if not line or line.lstrip().startswith('#') or '=' not in line:
1991
+ if not line or line.lstrip().startswith("#") or "=" not in line:
684
1992
  continue
685
- key, value = line.split('=', 1)
1993
+ key, value = line.split("=", 1)
686
1994
  data[key.strip()] = value.strip()
687
1995
  except FileNotFoundError:
688
1996
  pass
@@ -690,13 +1998,19 @@ def _parse_env_file(path: Path) -> dict[str, str]:
690
1998
 
691
1999
 
692
2000
  def _interactive_fill_env(env_path: Path) -> Path | None:
2001
+ if not sys.stdin.isatty():
2002
+ raise click.ClickException(
2003
+ "ENVIRONMENT_API_KEY missing. Provide --env-file or run `synth-ai setup` in an interactive shell to create one."
2004
+ )
693
2005
  existing = _parse_env_file(env_path) if env_path.exists() else {}
694
2006
 
695
2007
  def _prompt(label: str, *, default: str = "", required: bool) -> str | None:
696
2008
  while True:
697
2009
  try:
698
- value = click.prompt(label, default=default, show_default=bool(default) or not required).strip()
699
- except (click.exceptions.Abort, EOFError, KeyboardInterrupt):
2010
+ value = click.prompt(
2011
+ label, default=default, show_default=bool(default) or not required
2012
+ ).strip()
2013
+ except (Abort, EOFError, KeyboardInterrupt):
700
2014
  click.echo("Aborted env creation.")
701
2015
  return None
702
2016
  if value or not required:
@@ -727,6 +2041,10 @@ def _ensure_env_values(env_paths: list[Path], fallback_dir: Path) -> None:
727
2041
  if (os.environ.get("ENVIRONMENT_API_KEY") or "").strip():
728
2042
  return
729
2043
  target = env_paths[0] if env_paths else (fallback_dir / ".env").resolve()
2044
+ click.echo(
2045
+ "⚠️ ENVIRONMENT_API_KEY not set. Run `uvx synth-ai setup`, "
2046
+ "or pass --env-file pointing at a .env with ENVIRONMENT_API_KEY."
2047
+ )
730
2048
  result = _interactive_fill_env(target)
731
2049
  if result is None:
732
2050
  raise click.ClickException("ENVIRONMENT_API_KEY required to continue")
@@ -737,44 +2055,65 @@ def _ensure_env_values(env_paths: list[Path], fallback_dir: Path) -> None:
737
2055
 
738
2056
 
739
2057
  def _deploy_entry(
740
- entry: TaskAppEntry,
2058
+ entry: TaskAppEntryType,
741
2059
  modal_name: str | None,
742
2060
  dry_run: bool,
743
2061
  modal_cli: str,
744
2062
  env_file: Sequence[str],
2063
+ original_path: Path | None = None,
745
2064
  ) -> None:
746
2065
  modal_cfg = entry.modal
747
2066
  if modal_cfg is None:
748
- raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
2067
+ raise click.ClickException(
2068
+ f"Task app '{entry.app_id}' does not define Modal deployment settings"
2069
+ )
749
2070
 
750
- env_paths = _determine_env_files(entry, env_file)
751
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
752
- _run_modal_with_entry(entry, modal_cfg, modal_cli, modal_name, env_paths, command="deploy", dry_run=dry_run)
2071
+ env_paths = _determine_env_files(entry, env_file, original_path=original_path)
2072
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
2073
+ _run_modal_with_entry(
2074
+ entry,
2075
+ modal_cfg,
2076
+ modal_cli,
2077
+ modal_name,
2078
+ env_paths,
2079
+ command="deploy",
2080
+ dry_run=dry_run,
2081
+ original_path=original_path,
2082
+ )
753
2083
 
754
2084
 
755
2085
  def _modal_serve_entry(
756
- entry: TaskAppEntry,
2086
+ entry: TaskAppEntryType,
757
2087
  modal_name: str | None,
758
2088
  modal_cli: str,
759
2089
  env_file: Sequence[str],
2090
+ original_path: Path | None = None,
760
2091
  ) -> None:
761
2092
  modal_cfg = entry.modal
762
2093
  if modal_cfg is None:
763
- raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
2094
+ raise click.ClickException(
2095
+ f"Task app '{entry.app_id}' does not define Modal deployment settings"
2096
+ )
2097
+
2098
+ env_paths = _determine_env_files(entry, env_file, original_path=original_path)
2099
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
2100
+ _run_modal_with_entry(
2101
+ entry,
2102
+ modal_cfg,
2103
+ modal_cli,
2104
+ modal_name,
2105
+ env_paths,
2106
+ command="serve",
2107
+ original_path=original_path,
2108
+ )
764
2109
 
765
- env_paths = _determine_env_files(entry, env_file)
766
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
767
- _run_modal_with_entry(entry, modal_cfg, modal_cli, modal_name, env_paths, command="serve")
768
2110
 
769
- @click.group(
770
- name='task-app',
771
- help='Utilities for serving and deploying Synth task apps.'
772
- )
2111
+ @click.group(name="task-app", help="Utilities for serving and deploying Synth task apps.")
773
2112
  def task_app_group() -> None:
774
2113
  pass
775
2114
 
776
2115
 
777
- @task_app_group.command('list')
2116
+ @task_app_group.command("list")
778
2117
  def list_apps() -> None:
779
2118
  """List registered task apps."""
780
2119
 
@@ -785,6 +2124,256 @@ def list_apps() -> None:
785
2124
  for entry in entries:
786
2125
  aliases = f" (aliases: {', '.join(entry.aliases)})" if entry.aliases else ""
787
2126
  click.echo(f"- {entry.app_id}{aliases}: {entry.description}")
2127
+
2128
+
2129
+ @task_app_group.command("validate")
2130
+ @click.argument("app_id", type=str, required=True)
2131
+ @click.option(
2132
+ "--url",
2133
+ type=str,
2134
+ default=None,
2135
+ help="Task app URL to validate (if not provided, starts a local server)",
2136
+ )
2137
+ @click.option(
2138
+ "--port",
2139
+ type=int,
2140
+ default=8765,
2141
+ help="Port to use for temporary server (default: 8765)",
2142
+ )
2143
+ @click.option(
2144
+ "--api-key",
2145
+ type=str,
2146
+ default=None,
2147
+ envvar="ENVIRONMENT_API_KEY",
2148
+ help="API key for authentication (default: $ENVIRONMENT_API_KEY)",
2149
+ )
2150
+ @click.option(
2151
+ "--min-instances",
2152
+ type=int,
2153
+ default=10,
2154
+ help="Minimum number of task instances required (default: 10)",
2155
+ )
2156
+ @click.option(
2157
+ "--verbose",
2158
+ "-v",
2159
+ is_flag=True,
2160
+ help="Show detailed information about the task app",
2161
+ )
2162
+ @click.option(
2163
+ "--json",
2164
+ "output_json",
2165
+ is_flag=True,
2166
+ help="Output results as JSON",
2167
+ )
2168
+ def validate_task_app_cmd(
2169
+ app_id: str,
2170
+ url: str | None,
2171
+ port: int,
2172
+ api_key: str | None,
2173
+ min_instances: int,
2174
+ verbose: bool,
2175
+ output_json: bool,
2176
+ ) -> None:
2177
+ """Validate a task app deployment readiness.
2178
+
2179
+ This command verifies that a task app is properly configured and ready to run
2180
+ by checking all required HTTP endpoints, authentication, and task availability.
2181
+
2182
+ By default, it starts a temporary local server for validation. You can also
2183
+ validate a remote deployment by passing --url.
2184
+
2185
+ \b
2186
+ What gets validated:
2187
+ • Root endpoint (/) responds correctly
2188
+ • Health endpoint (/health) is accessible with proper authentication
2189
+ • Info endpoint (/info) returns valid task metadata
2190
+ • Task info endpoint (/task_info) provides task instances
2191
+ • Rollout endpoint (/rollout) is registered
2192
+ • At least N task instances are available (default: 10)
2193
+
2194
+ \b
2195
+ Examples:
2196
+
2197
+ \b
2198
+ Validate grpo-crafter (starts local server automatically):
2199
+ $ synth-ai task-app validate grpo-crafter
2200
+
2201
+ \b
2202
+ Validate sokoban with verbose output:
2203
+ $ synth-ai task-app validate sokoban --verbose
2204
+
2205
+ \b
2206
+ Validate with custom port:
2207
+ $ synth-ai task-app validate sokoban --port 9000
2208
+
2209
+ \b
2210
+ Validate a remote deployment:
2211
+ $ synth-ai task-app validate grpo-crafter --url https://my-crafter.modal.run
2212
+
2213
+ \b
2214
+ Require at least 20 task instances:
2215
+ $ synth-ai task-app validate grpo-crafter --min-instances 20
2216
+
2217
+ \b
2218
+ Get JSON output for automation:
2219
+ $ synth-ai task-app validate sokoban --json
2220
+
2221
+ \b
2222
+ Common use cases:
2223
+ • Pre-deployment verification: Check task app works before deploying to Modal
2224
+ • CI/CD integration: Use --json flag for automated validation in pipelines
2225
+ • Debug failing deployments: Use --verbose to see detailed endpoint responses
2226
+ • Test API key configuration: Verify authentication is set up correctly
2227
+ """
2228
+ import socket
2229
+ import subprocess
2230
+ import tempfile
2231
+ import time
2232
+
2233
+ # Import the validate_task_app function defined in this module
2234
+ from ._validate_task_app import validate_task_app # type: ignore[attr-defined]
2235
+
2236
+ proc = None
2237
+ task_app_url = url
2238
+
2239
+ try:
2240
+ # If no URL provided, start a temporary server
2241
+ if not task_app_url:
2242
+ # Find an available port
2243
+ def is_port_available(port: int) -> bool:
2244
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2245
+ try:
2246
+ s.bind(("", port))
2247
+ return True
2248
+ except OSError:
2249
+ return False
2250
+
2251
+ while not is_port_available(port):
2252
+ port += 1
2253
+
2254
+ task_app_url = f"http://localhost:{port}"
2255
+
2256
+ if not output_json:
2257
+ click.echo(f"Starting temporary {app_id} server on port {port}...")
2258
+
2259
+ # Start the server in background
2260
+ env = os.environ.copy()
2261
+ if api_key:
2262
+ env["ENVIRONMENT_API_KEY"] = api_key
2263
+
2264
+ # Create a temporary trace DB and trace dir to avoid prompts
2265
+ import tempfile
2266
+ temp_dir = tempfile.mkdtemp()
2267
+ temp_trace_db = os.path.join(temp_dir, "validate_trace.db")
2268
+ temp_trace_dir = os.path.join(temp_dir, "traces")
2269
+ os.makedirs(temp_trace_dir, exist_ok=True)
2270
+
2271
+ proc = subprocess.Popen(
2272
+ [
2273
+ "uv",
2274
+ "run",
2275
+ "synth-ai",
2276
+ "task-app",
2277
+ "serve",
2278
+ app_id,
2279
+ "--port",
2280
+ str(port),
2281
+ "--no-reload",
2282
+ "--trace",
2283
+ temp_trace_dir,
2284
+ "--trace-db",
2285
+ temp_trace_db,
2286
+ ],
2287
+ env=env,
2288
+ stdin=subprocess.PIPE, # Add stdin to handle any prompts
2289
+ stdout=subprocess.DEVNULL if output_json else subprocess.PIPE,
2290
+ stderr=subprocess.DEVNULL if output_json else subprocess.PIPE,
2291
+ text=True,
2292
+ )
2293
+
2294
+ # Write empty input to stdin to skip any prompts
2295
+ if proc.stdin:
2296
+ try:
2297
+ proc.stdin.write("\n")
2298
+ proc.stdin.flush()
2299
+ proc.stdin.close()
2300
+ except Exception:
2301
+ pass
2302
+
2303
+ # Wait for server to be ready
2304
+ if not output_json:
2305
+ click.echo("Waiting for server to start...")
2306
+
2307
+ import httpx
2308
+ for _attempt in range(60): # 30 seconds timeout
2309
+ try:
2310
+ async def check_health():
2311
+ async with httpx.AsyncClient(timeout=2.0) as client:
2312
+ resp = await client.get(f"{task_app_url}/")
2313
+ return resp.status_code == 200
2314
+
2315
+ if asyncio.run(check_health()):
2316
+ break
2317
+ except Exception:
2318
+ pass
2319
+
2320
+ # Check if process died
2321
+ if proc.poll() is not None:
2322
+ stderr_output = ""
2323
+ if proc.stderr and not output_json:
2324
+ stderr_output = proc.stderr.read()
2325
+ click.echo(click.style("✗ Server process exited unexpectedly", fg="red"), err=True)
2326
+ if stderr_output and not output_json:
2327
+ click.echo(f"Error output:\n{stderr_output}", err=True)
2328
+ sys.exit(1)
2329
+
2330
+ time.sleep(0.5)
2331
+ else:
2332
+ click.echo(click.style("✗ Server failed to start within 30 seconds", fg="red"), err=True)
2333
+ sys.exit(1)
2334
+
2335
+ if not output_json:
2336
+ click.echo(click.style("✓ Server started", fg="green"))
2337
+ click.echo()
2338
+
2339
+ # Ensure URL doesn't have trailing slash
2340
+ task_app_url = task_app_url.rstrip("/")
2341
+
2342
+ async def _run() -> tuple[bool, dict[str, Any]]:
2343
+ return await validate_task_app(
2344
+ url=task_app_url,
2345
+ api_key=api_key,
2346
+ min_instances=min_instances,
2347
+ verbose=verbose,
2348
+ )
2349
+
2350
+ success, results = asyncio.run(_run())
2351
+
2352
+ if output_json:
2353
+ import json as _json
2354
+ click.echo(_json.dumps(results, indent=2))
2355
+
2356
+ sys.exit(0 if success else 1)
2357
+
2358
+ finally:
2359
+ # Cleanup: stop the temporary server
2360
+ if proc is not None:
2361
+ if not output_json:
2362
+ click.echo("\nStopping temporary server...")
2363
+ try:
2364
+ proc.terminate()
2365
+ proc.wait(timeout=5)
2366
+ except Exception:
2367
+ proc.kill()
2368
+
2369
+ # Cleanup temp trace DB
2370
+ if not url and 'temp_dir' in locals():
2371
+ import contextlib
2372
+ import shutil
2373
+ with contextlib.suppress(Exception):
2374
+ shutil.rmtree(temp_dir, ignore_errors=True)
2375
+
2376
+
788
2377
  def _load_env_files_into_process(paths: Sequence[str]) -> None:
789
2378
  for p in paths:
790
2379
  try:
@@ -792,9 +2381,9 @@ def _load_env_files_into_process(paths: Sequence[str]) -> None:
792
2381
  except Exception:
793
2382
  continue
794
2383
  for line in txt.splitlines():
795
- if not line or line.startswith('#') or '=' not in line:
2384
+ if not line or line.startswith("#") or "=" not in line:
796
2385
  continue
797
- k, v = line.split('=', 1)
2386
+ k, v = line.split("=", 1)
798
2387
  key = k.strip()
799
2388
  val = v.strip().strip('"').strip("'")
800
2389
  # Load into process, but allow overriding if the current value is empty
@@ -804,55 +2393,206 @@ def _load_env_files_into_process(paths: Sequence[str]) -> None:
804
2393
  os.environ[key] = val
805
2394
 
806
2395
 
807
-
808
- @click.command('serve')
809
- @click.argument('app_id', type=str, required=False)
810
- @click.option('--host', default='0.0.0.0', show_default=True)
811
- @click.option('--port', default=8001, show_default=True, type=int)
812
- @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
813
- @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
814
- @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
815
- @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
816
- @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
2396
+ @click.command("serve")
2397
+ @click.argument("app_id", type=str, required=False)
2398
+ @click.option("--host", default="0.0.0.0", show_default=True)
2399
+ @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
2400
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
2401
+ @click.option(
2402
+ "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
2403
+ )
2404
+ @click.option(
2405
+ "--force/--no-force",
2406
+ "force",
2407
+ default=False,
2408
+ help="Kill any process already bound to the selected port before starting",
2409
+ )
2410
+ @click.option(
2411
+ "--trace",
2412
+ "trace_dir",
2413
+ type=click.Path(),
2414
+ default=None,
2415
+ help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
2416
+ )
2417
+ @click.option(
2418
+ "--trace-db",
2419
+ "trace_db",
2420
+ type=click.Path(),
2421
+ default=None,
2422
+ help="Override local trace DB path (default: traces/v3/synth_ai.db)",
2423
+ )
817
2424
  def serve_command(
818
2425
  app_id: str | None,
819
2426
  host: str,
820
- port: int,
2427
+ port: int | None,
821
2428
  env_file: Sequence[str],
822
2429
  reload_flag: bool,
823
2430
  force: bool,
824
2431
  trace_dir: str | None,
825
2432
  trace_db: str | None,
826
2433
  ) -> None:
827
- choice = _select_app_choice(app_id, purpose="serve")
828
- entry = choice.ensure_entry()
829
- _serve_entry(entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
830
-
831
-
832
- @task_app_group.command('serve')
833
- @click.argument('app_id', type=str, required=False)
834
- @click.option('--host', default='0.0.0.0', show_default=True)
835
- @click.option('--port', default=8001, show_default=True, type=int)
836
- @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
837
- @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
838
- @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
839
- @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
840
- @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
2434
+ return None
2435
+
2436
+
2437
+ @task_app_group.command("info")
2438
+ @click.option(
2439
+ "--base",
2440
+ "base_url",
2441
+ default=None,
2442
+ help="Task app base URL (default: TASK_APP_BASE_URL or http://127.0.0.1:8001)",
2443
+ )
2444
+ @click.option(
2445
+ "--api-key",
2446
+ default=None,
2447
+ help="Environment API key (default: ENVIRONMENT_API_KEY or dev fallbacks)",
2448
+ )
2449
+ @click.option(
2450
+ "--seed",
2451
+ "seeds",
2452
+ multiple=True,
2453
+ type=int,
2454
+ help="Optional seed(s) to request specific instances (repeatable)",
2455
+ )
2456
+ def info_command(base_url: str | None, api_key: str | None, seeds: tuple[int, ...]) -> None:
2457
+ """Fetch Task App /task_info with authentication and print JSON."""
2458
+ import json as _json
2459
+ import os as _os
2460
+
2461
+ import requests as _requests
2462
+
2463
+ base = (base_url or _os.getenv("TASK_APP_BASE_URL") or "http://127.0.0.1:8001").rstrip("/")
2464
+
2465
+ # Resolve API key, permitting dev fallbacks
2466
+ auth_module = _maybe_import("synth_ai.task.auth")
2467
+ if auth_module is not None:
2468
+ _norm_key = getattr(auth_module, "normalize_environment_api_key", lambda: _os.getenv("ENVIRONMENT_API_KEY"))
2469
+ else:
2470
+ _norm_key = lambda: _os.getenv("ENVIRONMENT_API_KEY") # noqa: E731
2471
+ key = (api_key or _norm_key() or "").strip()
2472
+ if not key:
2473
+ raise click.ClickException("Missing API key. Provide --api-key or set ENVIRONMENT_API_KEY.")
2474
+
2475
+ headers: dict[str, str] = {"X-API-Key": key, "Authorization": f"Bearer {key}"}
2476
+ aliases = (_os.getenv("ENVIRONMENT_API_KEY_ALIASES") or "").strip()
2477
+ keys_csv = (
2478
+ ",".join([key] + [p.strip() for p in aliases.split(",") if p.strip()]) if aliases else key
2479
+ )
2480
+ if keys_csv:
2481
+ headers["X-API-Keys"] = keys_csv
2482
+
2483
+ params: list[tuple[str, str]] = []
2484
+ for s in seeds:
2485
+ params.append(("seed", str(int(s))))
2486
+
2487
+ url = f"{base}/task_info"
2488
+ try:
2489
+ r = _requests.get(url, headers=headers, params=params or None, timeout=30)
2490
+ except Exception as exc:
2491
+ raise click.ClickException(f"Request failed: {exc}") from exc
2492
+ if not (200 <= r.status_code < 300):
2493
+ ct = r.headers.get("content-type", "")
2494
+ detail = r.text
2495
+ if ct.startswith("application/json"):
2496
+ with contextlib.suppress(Exception):
2497
+ detail = _json.dumps(r.json(), indent=2)
2498
+ raise click.ClickException(f"{url} returned {r.status_code}:\n{detail}")
2499
+
2500
+ data = (
2501
+ r.json()
2502
+ if r.headers.get("content-type", "").startswith("application/json")
2503
+ else {"raw": r.text}
2504
+ )
2505
+ click.echo(_json.dumps(data, indent=2, sort_keys=True))
2506
+
2507
+
2508
+ @task_app_group.command("serve")
2509
+ @click.argument("app_id", type=str, required=False)
2510
+ @click.option("--host", default="0.0.0.0", show_default=True)
2511
+ @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
2512
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
2513
+ @click.option(
2514
+ "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
2515
+ )
2516
+ @click.option(
2517
+ "--force/--no-force",
2518
+ "force",
2519
+ default=False,
2520
+ help="Kill any process already bound to the selected port before starting",
2521
+ )
2522
+ @click.option(
2523
+ "--trace",
2524
+ "trace_dir",
2525
+ type=click.Path(),
2526
+ default=None,
2527
+ help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
2528
+ )
2529
+ @click.option(
2530
+ "--trace-db",
2531
+ "trace_db",
2532
+ type=click.Path(),
2533
+ default=None,
2534
+ help="Override local trace DB path (default: traces/v3/synth_ai.db)",
2535
+ )
841
2536
  def serve_task_group(
842
2537
  app_id: str | None,
843
2538
  host: str,
844
- port: int,
2539
+ port: int | None,
845
2540
  env_file: Sequence[str],
846
2541
  reload_flag: bool,
847
2542
  force: bool,
848
2543
  trace_dir: str | None,
849
2544
  trace_db: str | None,
850
2545
  ) -> None:
2546
+ """Serve a TaskAppConfig-based task app using uvicorn."""
2547
+ import contextlib
2548
+
2549
+ if not host:
2550
+ host = "0.0.0.0"
2551
+
2552
+ if port is None:
2553
+ port = 8001
2554
+
2555
+ # Auto-enable tracing by default
2556
+ try:
2557
+ auto_trace = os.getenv("SYNTH_AUTO_TRACE", "1")
2558
+ auto_trace_enabled = auto_trace not in {"0", "false", "False", ""}
2559
+ except Exception:
2560
+ auto_trace_enabled = True
2561
+
2562
+ if auto_trace_enabled:
2563
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2564
+ if trace_dir is None:
2565
+ default_trace_dir = (demo_base / "traces" / "v3").resolve()
2566
+ with contextlib.suppress(Exception):
2567
+ default_trace_dir.mkdir(parents=True, exist_ok=True)
2568
+ trace_dir = str(default_trace_dir)
2569
+ click.echo(f"[trace] Using trace directory: {trace_dir}")
2570
+ if trace_dir and trace_db is None:
2571
+ default_trace_db = (Path(trace_dir) / "synth_ai.db").resolve()
2572
+ with contextlib.suppress(Exception):
2573
+ default_trace_db.parent.mkdir(parents=True, exist_ok=True)
2574
+ trace_db = str(default_trace_db)
2575
+ click.echo(f"[trace] Using trace DB: {trace_db}")
2576
+
2577
+ # Select and serve the app
851
2578
  choice = _select_app_choice(app_id, purpose="serve")
852
2579
  entry = choice.ensure_entry()
853
- _serve_entry(entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
2580
+ _serve_entry(
2581
+ entry,
2582
+ host,
2583
+ port,
2584
+ env_file,
2585
+ reload_flag,
2586
+ force,
2587
+ trace_dir=trace_dir,
2588
+ trace_db=trace_db,
2589
+ )
854
2590
 
855
- def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) -> list[Path]:
2591
+
2592
+
2593
+ def _determine_env_files(
2594
+ entry: TaskAppEntryType, user_env_files: Sequence[str], *, original_path: Path | None = None
2595
+ ) -> list[Path]:
856
2596
  resolved: list[Path] = []
857
2597
  for candidate in user_env_files:
858
2598
  p = Path(candidate).expanduser()
@@ -862,19 +2602,46 @@ def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) ->
862
2602
  if resolved:
863
2603
  return resolved
864
2604
 
865
- defaults = [Path(path).expanduser() for path in (entry.env_files or []) if Path(path).expanduser().exists()]
866
- if defaults:
867
- return defaults
868
-
869
- env_candidates = sorted(REPO_ROOT.glob('**/*.env'))
870
- if not env_candidates:
871
- raise click.ClickException('No env file found. Pass --env-file explicitly.')
872
-
873
- click.echo('Select env file to load:')
874
- for idx, path in enumerate(env_candidates, start=1):
875
- click.echo(f" {idx}) {path}")
876
- choice = click.prompt('Enter choice', type=click.IntRange(1, len(env_candidates)))
877
- return [env_candidates[choice - 1]]
2605
+ declared: list[Path] = []
2606
+ for candidate in getattr(entry, "env_files", ()) or ():
2607
+ try:
2608
+ p = Path(candidate).expanduser()
2609
+ except Exception:
2610
+ continue
2611
+ if p.exists() and p.is_file():
2612
+ declared.append(p)
2613
+ if declared:
2614
+ return declared
2615
+
2616
+ def _append_candidate(collection: list[Path], candidate: Path) -> None:
2617
+ if candidate.exists() and candidate.is_file() and candidate not in collection:
2618
+ collection.append(candidate)
2619
+
2620
+ auto_candidates: list[Path] = []
2621
+
2622
+ search_dirs: list[Path] = []
2623
+ if original_path is not None:
2624
+ search_dirs.append(original_path.parent.resolve())
2625
+ for parent in original_path.parent.resolve().parents:
2626
+ search_dirs.append(parent)
2627
+ cwd = Path.cwd().resolve()
2628
+ if cwd not in search_dirs:
2629
+ search_dirs.append(cwd)
2630
+ repo_root = REPO_ROOT.resolve()
2631
+ if repo_root not in search_dirs:
2632
+ search_dirs.append(repo_root)
2633
+
2634
+ for directory in search_dirs:
2635
+ _append_candidate(auto_candidates, directory / ".env")
2636
+ for candidate in sorted(directory.glob("*.env")):
2637
+ _append_candidate(auto_candidates, candidate)
2638
+
2639
+ if auto_candidates:
2640
+ return [auto_candidates[0]]
2641
+
2642
+ raise click.ClickException(
2643
+ "No .env file discovered automatically. Pass --env-file /path/to/.env or generate one with `uvx synth-ai setup`."
2644
+ )
878
2645
 
879
2646
 
880
2647
  def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
@@ -889,7 +2656,9 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
889
2656
  return
890
2657
 
891
2658
  try:
892
- out = subprocess.run(["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False)
2659
+ out = subprocess.run(
2660
+ ["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False
2661
+ )
893
2662
  pids = [pid for pid in out.stdout.strip().splitlines() if pid]
894
2663
  except FileNotFoundError:
895
2664
  pids = []
@@ -904,7 +2673,7 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
904
2673
  try:
905
2674
  os.kill(int(pid), signal.SIGTERM)
906
2675
  except Exception as exc:
907
- raise click.ClickException(f'Failed to terminate PID {pid}: {exc}')
2676
+ raise click.ClickException(f"Failed to terminate PID {pid}: {exc}") from exc
908
2677
 
909
2678
  time.sleep(0.5)
910
2679
 
@@ -916,16 +2685,134 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
916
2685
  try:
917
2686
  os.kill(int(pid), signal.SIGKILL)
918
2687
  except Exception as exc:
919
- raise click.ClickException(f'Failed to force terminate PID {pid}: {exc}')
2688
+ raise click.ClickException(f"Failed to force terminate PID {pid}: {exc}") from exc
920
2689
  time.sleep(0.5)
921
2690
 
922
2691
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
923
2692
  in_use_after = s.connect_ex((host, port)) == 0
924
2693
  if in_use_after:
925
- raise click.ClickException(f'Port {port} is still in use after attempting to terminate processes.')
2694
+ raise click.ClickException(
2695
+ f"Port {port} is still in use after attempting to terminate processes."
2696
+ )
2697
+
2698
+
2699
+ def _save_to_env_file(env_path: Path, key: str, value: str) -> None:
2700
+ """Save or update a key-value pair in the .env file."""
2701
+ try:
2702
+ # Read existing .env
2703
+ existing_lines = []
2704
+ if env_path.exists():
2705
+ existing_lines = env_path.read_text().splitlines()
2706
+ else:
2707
+ env_path.parent.mkdir(parents=True, exist_ok=True)
2708
+
2709
+ # Check if key already exists and update it
2710
+ key_updated = False
2711
+ new_lines = []
2712
+ for line in existing_lines:
2713
+ if line.strip().startswith(f"{key}="):
2714
+ new_lines.append(f"{key}={value}")
2715
+ key_updated = True
2716
+ else:
2717
+ new_lines.append(line)
2718
+
2719
+ if key_updated:
2720
+ # Write updated lines back
2721
+ env_path.write_text("\n".join(new_lines) + "\n")
2722
+ click.echo(f"Updated {key} in {env_path}")
2723
+ else:
2724
+ # Append to .env
2725
+ with open(env_path, "a") as f:
2726
+ if existing_lines and not existing_lines[-1].strip():
2727
+ # File exists and last line is not empty
2728
+ pass
2729
+ elif existing_lines:
2730
+ # Add newline before appending
2731
+ f.write("\n")
2732
+ f.write(f"{key}={value}\n")
2733
+ click.echo(f"Saved {key} to {env_path}")
2734
+ except Exception as e:
2735
+ click.echo(f"Warning: Could not save {key} to .env: {e}", err=True)
2736
+
2737
+
2738
+ def _persist_env_api_key(env_api_key: str, env_paths: Sequence[Path] | None) -> None:
2739
+ """Persist ENVIRONMENT_API_KEY to provided env files (or default .env)."""
2740
+ targets: list[Path] = []
2741
+ seen: set[Path] = set()
2742
+ for path in env_paths or ():
2743
+ try:
2744
+ resolved = Path(path).resolve()
2745
+ except Exception:
2746
+ continue
2747
+ if resolved in seen:
2748
+ continue
2749
+ seen.add(resolved)
2750
+ targets.append(resolved)
2751
+
2752
+ if not targets:
2753
+ demo_dir = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2754
+ targets.append((demo_dir / ".env").resolve())
2755
+
2756
+ for target in targets:
2757
+ _save_to_env_file(target, "ENVIRONMENT_API_KEY", env_api_key)
2758
+
2759
+
2760
+ def _validate_required_env_keys() -> None:
2761
+ """Validate required environment keys are set, prompting if missing."""
2762
+ # Use demo directory .env file if set, otherwise current directory
2763
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2764
+ env_file = demo_base / ".env"
2765
+
2766
+ if env_file.exists():
2767
+ try:
2768
+ from dotenv import load_dotenv
2769
+
2770
+ load_dotenv(env_file, override=False)
2771
+ except Exception:
2772
+ pass # Best effort
2773
+
2774
+ env_api_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
2775
+
2776
+ if not env_api_key:
2777
+ env_api_key = input("Please enter your RL Environment API key:\n> ").strip()
2778
+ if not env_api_key:
2779
+ raise click.ClickException("RL Environment API key is required to start the server")
2780
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
2781
+ _save_to_env_file(env_file, "ENVIRONMENT_API_KEY", env_api_key)
2782
+
2783
+ # Check for Groq API key
2784
+ groq_api_key = os.environ.get("GROQ_API_KEY", "").strip()
2785
+
2786
+ if not groq_api_key:
2787
+ click.echo("\nInference API key configuration:")
2788
+ click.echo("This workflow requires a Groq API key.")
2789
+ groq_api_key = input("Groq API key (or press Enter to skip): ").strip()
2790
+ if groq_api_key:
2791
+ os.environ["GROQ_API_KEY"] = groq_api_key
2792
+ _save_to_env_file(env_file, "GROQ_API_KEY", groq_api_key)
2793
+
2794
+
2795
+ def _print_demo_next_steps_if_applicable() -> None:
2796
+ """Print next steps if currently in a demo directory."""
2797
+ try:
2798
+ cwd = Path.cwd().resolve()
2799
+ demo_dir = _load_demo_directory()
2800
+
2801
+ if demo_dir and demo_dir == cwd and (cwd / "run_local_rollout_traced.py").exists():
2802
+ click.echo("\n" + "=" * 60)
2803
+ click.echo("Next step: Collect traced rollouts")
2804
+ click.echo("=" * 60)
2805
+ click.echo("\nIn another terminal, run:")
2806
+ click.echo(f" cd {cwd}")
2807
+ click.echo(" uv run python run_local_rollout_traced.py")
2808
+ click.echo("\nRun this 5-10 times to collect diverse traces.")
2809
+ click.echo("=" * 60 + "\n")
2810
+ except Exception:
2811
+ pass
2812
+
926
2813
 
927
2814
  def _serve_entry(
928
- entry: TaskAppEntry,
2815
+ entry: TaskAppEntryType,
929
2816
  host: str,
930
2817
  port: int,
931
2818
  env_file: Sequence[str],
@@ -940,33 +2827,51 @@ def _serve_entry(
940
2827
 
941
2828
  trace_enabled = trace_dir is not None or trace_db is not None
942
2829
  if trace_enabled:
943
- os.environ['TASKAPP_TRACING_ENABLED'] = '1'
2830
+ os.environ["TASKAPP_TRACING_ENABLED"] = "1"
2831
+
2832
+ # Ensure paths are absolute relative to demo directory
2833
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2834
+
944
2835
  if trace_dir is not None:
945
2836
  dir_path = Path(trace_dir).expanduser()
2837
+ if not dir_path.is_absolute():
2838
+ dir_path = (demo_base / dir_path).resolve()
946
2839
  try:
947
2840
  dir_path.mkdir(parents=True, exist_ok=True)
948
2841
  except Exception as exc:
949
- raise click.ClickException(f"Failed to create trace directory {dir_path}: {exc}") from exc
950
- os.environ['TASKAPP_SFT_OUTPUT_DIR'] = str(dir_path)
2842
+ raise click.ClickException(
2843
+ f"Failed to create trace directory {dir_path}: {exc}"
2844
+ ) from exc
2845
+ os.environ["TASKAPP_SFT_OUTPUT_DIR"] = str(dir_path)
951
2846
  click.echo(f"Tracing enabled. SFT JSONL will be written to {dir_path}")
952
2847
  if trace_db is not None:
953
2848
  db_path = Path(trace_db).expanduser()
954
- os.environ['SQLD_DB_PATH'] = str(db_path)
955
- os.environ.pop('TURSO_LOCAL_DB_URL', None)
2849
+ if not db_path.is_absolute():
2850
+ db_path = (demo_base / db_path).resolve()
2851
+ # Construct the sqlite URL from the absolute path
2852
+ db_url = f"sqlite+aiosqlite:///{db_path}"
2853
+ os.environ["SQLD_DB_PATH"] = str(db_path)
2854
+ os.environ["TURSO_LOCAL_DB_URL"] = db_url
956
2855
  click.echo(f"Tracing DB path set to {db_path}")
957
- from synth_ai.tracing_v3.config import CONFIG as TRACE_CONFIG
958
- # recompute db_url based on current environment
959
- new_db_url = os.getenv('TURSO_LOCAL_DB_URL') or TRACE_CONFIG.db_url
960
- TRACE_CONFIG.db_url = new_db_url
961
- if new_db_url:
962
- os.environ['TURSO_LOCAL_DB_URL'] = new_db_url
963
- click.echo(f"Tracing DB URL resolved to {new_db_url}")
964
- elif os.getenv('TASKAPP_TRACING_ENABLED'):
2856
+ tracing_config_module = _maybe_import("synth_ai.tracing_v3.config")
2857
+ if tracing_config_module is not None:
2858
+ trace_config = tracing_config_module.CONFIG
2859
+ new_db_url = os.getenv("TURSO_LOCAL_DB_URL") or trace_config.db_url
2860
+ trace_config.db_url = new_db_url
2861
+ if new_db_url:
2862
+ click.echo(f"Tracing DB URL resolved to {new_db_url}")
2863
+ elif os.getenv("TASKAPP_TRACING_ENABLED"):
965
2864
  click.echo("Tracing enabled via environment variables")
966
2865
 
967
2866
  _ensure_port_free(port, host, force=force)
968
2867
 
969
- _preflight_env_key()
2868
+ _validate_required_env_keys()
2869
+ env_path_objs = [Path(p) for p in env_files if p]
2870
+ _preflight_env_key(env_path_objs)
2871
+
2872
+ # Print next steps if in demo context
2873
+ if trace_enabled:
2874
+ _print_demo_next_steps_if_applicable()
970
2875
 
971
2876
  run_task_app(
972
2877
  entry.config_factory,
@@ -977,91 +2882,168 @@ def _serve_entry(
977
2882
  )
978
2883
 
979
2884
 
980
- @task_app_group.command('deploy')
981
- @click.argument("app_id", type=str, required=False)
982
- @click.option("--name", "modal_name", default=None, help="Override Modal app name")
983
- @click.option("--dry-run", is_flag=True, help="Print modal deploy command without executing")
984
- @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
985
- @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
986
- def deploy_app(app_id: str | None, modal_name: str | None, dry_run: bool, modal_cli: str, env_file: Sequence[str]) -> None:
987
- """Deploy a task app to Modal."""
988
-
989
- choice = _select_app_choice(app_id, purpose="deploy")
990
-
991
- if choice.modal_script:
992
- env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
993
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
994
- _run_modal_script(choice.modal_script, modal_cli, "deploy", env_paths, modal_name=modal_name, dry_run=dry_run)
995
- return
996
-
997
- entry = choice.ensure_entry()
998
- _deploy_entry(entry, modal_name, dry_run, modal_cli, env_file)
999
-
1000
- @task_app_group.command('modal-serve')
1001
- @click.argument('app_id', type=str, required=False)
1002
- @click.option('--modal-cli', default='modal', help='Path to modal CLI executable')
1003
- @click.option('--name', 'modal_name', default=None, help='Override Modal app name (optional)')
1004
- @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
1005
- def modal_serve_app(app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]) -> None:
1006
- choice = _select_app_choice(app_id, purpose="modal-serve")
1007
-
1008
- if choice.modal_script:
1009
- env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
1010
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
1011
- _run_modal_script(choice.modal_script, modal_cli, "serve", env_paths, modal_name=modal_name)
1012
- return
1013
-
1014
- entry = choice.ensure_entry()
1015
- _modal_serve_entry(entry, modal_name, modal_cli, env_file)
1016
-
1017
-
1018
2885
  def _write_modal_entrypoint(
1019
- entry: TaskAppEntry,
1020
- modal_cfg: ModalDeploymentConfig,
2886
+ entry: TaskAppEntryType,
2887
+ modal_cfg: ModalDeploymentConfigType,
1021
2888
  override_name: str | None,
1022
2889
  *,
1023
2890
  dotenv_paths: Sequence[str] | None = None,
2891
+ original_path: Path | None = None,
2892
+ inline_secret_values: dict[str, str] | None = None,
1024
2893
  ) -> Path:
1025
2894
  modal_name = override_name or modal_cfg.app_name
1026
2895
 
2896
+ # For dynamically discovered apps, import the module by its package path
2897
+ # Compute the module name relative to the mounted repo root (/opt/synth_ai_repo)
2898
+ remote_file_str: str | None = None
2899
+ if original_path:
2900
+ try:
2901
+ # Build lookup of local->remote mounts
2902
+ mount_map: list[tuple[Path, Path]] = [
2903
+ (Path(local).resolve(), Path(remote))
2904
+ for (local, remote) in modal_cfg.extra_local_dirs
2905
+ ]
2906
+ orig = Path(original_path).resolve()
2907
+ for local_src, remote_dst in mount_map:
2908
+ with contextlib.suppress(Exception):
2909
+ if orig.is_relative_to(local_src): # py311+
2910
+ remote_file_str = str((remote_dst / orig.relative_to(local_src)).resolve())
2911
+ break
2912
+ try:
2913
+ rel = orig.relative_to(local_src)
2914
+ remote_file_str = str((remote_dst / rel).resolve())
2915
+ break
2916
+ except Exception:
2917
+ pass
2918
+ except Exception:
2919
+ remote_file_str = None
1027
2920
  module_name = entry.config_factory.__module__
2921
+
2922
+ # Prefer a guaranteed mount for the discovered file to avoid package import issues
2923
+ guaranteed_file_str: str | None = None
2924
+ if original_path:
2925
+ guaranteed_file_str = str(
2926
+ (Path("/opt/synth_ai_repo/__local_task_app__") / Path(original_path).stem).with_suffix(
2927
+ ".py"
2928
+ )
2929
+ )
2930
+
1028
2931
  dotenv_paths = [str(Path(path)) for path in (dotenv_paths or [])]
1029
2932
 
1030
2933
  pip_packages = list(modal_cfg.pip_packages)
1031
-
2934
+ # Ensure synth-ai (matching host version if available) is installed in the container
2935
+ synth_pkg = "synth-ai"
2936
+ host_synth = _maybe_import("synth_ai")
2937
+ if host_synth is not None:
2938
+ host_ver = getattr(host_synth, "__version__", None)
2939
+ if host_ver:
2940
+ synth_pkg = f"synth-ai=={host_ver}"
2941
+ if not any(str(p).startswith("synth-ai") for p in pip_packages):
2942
+ pip_packages.insert(0, synth_pkg)
2943
+
2944
+ apt_packages = list(modal_cfg.apt_packages)
2945
+ click.echo(f"[DEBUG] modal_cfg.apt_packages type: {type(modal_cfg.apt_packages)}")
2946
+ click.echo(f"[DEBUG] modal_cfg.apt_packages value: {modal_cfg.apt_packages}")
2947
+ click.echo(f"[DEBUG] apt_packages after list(): {apt_packages}")
2948
+
1032
2949
  local_dirs = [(str(Path(src)), dst) for src, dst in modal_cfg.extra_local_dirs]
2950
+ # Also mount the host synth_ai source if available to ensure latest code is used
2951
+ if host_synth is not None:
2952
+ try:
2953
+ host_synth_dir = Path(host_synth.__file__).resolve().parent
2954
+ sy_dst = "/opt/synth_ai_repo/synth_ai"
2955
+ candidate = (str(host_synth_dir), sy_dst)
2956
+ if candidate not in local_dirs:
2957
+ local_dirs.insert(0, candidate)
2958
+ except Exception:
2959
+ pass
2960
+ # Ensure the discovered app directory is mounted, regardless of modal_cfg
2961
+ if original_path:
2962
+ discovered_dir = str(Path(original_path).resolve().parent)
2963
+ mount_dst = "/opt/synth_ai_repo/__local_task_app__"
2964
+ if (discovered_dir, mount_dst) not in local_dirs:
2965
+ local_dirs.append((discovered_dir, mount_dst))
1033
2966
  secret_names = list(modal_cfg.secret_names)
1034
2967
  volume_mounts = [(name, mount) for name, mount in modal_cfg.volume_mounts]
2968
+ inline_secret_values = {k: v for k, v in (inline_secret_values or {}).items() if v}
1035
2969
 
1036
2970
  script = f"""from __future__ import annotations
1037
2971
 
1038
2972
  import importlib
2973
+ import importlib.util
1039
2974
  import sys
2975
+ import os
2976
+ import shutil
2977
+ import tempfile
2978
+ from pathlib import Path as _Path
2979
+ import fnmatch
1040
2980
  sys.path.insert(0, '/opt/synth_ai_repo')
1041
2981
 
1042
2982
  from modal import App, Image, Secret, Volume, asgi_app
1043
2983
 
1044
- from synth_ai.task.apps import registry
1045
- from synth_ai.task.server import create_task_app
2984
+ # Defer importing synth_ai until inside fastapi_app to avoid local import errors
1046
2985
 
1047
2986
  ENTRY_ID = {entry.app_id!r}
1048
2987
  MODAL_APP_NAME = {modal_name!r}
1049
2988
  MODULE_NAME = {module_name!r}
2989
+ MODULE_FILE = {guaranteed_file_str or remote_file_str!r}
1050
2990
  DOTENV_PATHS = {dotenv_paths!r}
2991
+ INLINE_SECRET_VALUES = {inline_secret_values!r}
1051
2992
 
1052
2993
  image = Image.debian_slim(python_version={modal_cfg.python_version!r})
1053
2994
 
2995
+ # CRITICAL: Install iverilog for Verilog task app (hardcoded to prevent config issues)
2996
+ if {entry.app_id!r} == "grpo-verilog":
2997
+ image = image.apt_install("iverilog")
2998
+
2999
+ # Install apt packages first (before pip)
3000
+ apt_packages = {apt_packages!r}
3001
+ if apt_packages:
3002
+ image = image.apt_install(*apt_packages)
3003
+
1054
3004
  pip_packages = {pip_packages!r}
1055
3005
  if pip_packages:
1056
3006
  image = image.pip_install(*pip_packages)
1057
3007
 
1058
3008
  local_dirs = {local_dirs!r}
3009
+
3010
+ def _copy_tree_filtered(src_dir: str) -> str:
3011
+ src = _Path(src_dir)
3012
+ temp_dir = _Path(tempfile.mkdtemp(prefix='synth_mount_'))
3013
+
3014
+ exclude_dirs = {".cache", ".git", "__pycache__"}
3015
+ exclude_globs = ['*.db', '*.db-journal', '*-wal', '*-shm']
3016
+
3017
+ for root, dirs, files in os.walk(src):
3018
+ rel_root = _Path(root).relative_to(src)
3019
+ # filter dirs in-place
3020
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
3021
+ # ensure target directory exists
3022
+ target_dir = (temp_dir / rel_root)
3023
+ target_dir.mkdir(parents=True, exist_ok=True)
3024
+ # copy files with filtering
3025
+ for name in files:
3026
+ if any(fnmatch.fnmatch(name, pat) for pat in exclude_globs):
3027
+ continue
3028
+ src_file = _Path(root) / name
3029
+ dst_file = target_dir / name
3030
+ try:
3031
+ shutil.copy2(src_file, dst_file)
3032
+ except Exception:
3033
+ # ignore problematic files
3034
+ continue
3035
+ return str(temp_dir)
3036
+
1059
3037
  for local_src, remote_dst in local_dirs:
1060
- image = image.add_local_dir(local_src, remote_dst)
3038
+ safe_src = _copy_tree_filtered(local_src)
3039
+ image = image.add_local_dir(safe_src, remote_dst)
1061
3040
 
1062
3041
  secrets = {secret_names!r}
1063
3042
  secret_objs = [Secret.from_name(name) for name in secrets]
1064
3043
 
3044
+ if INLINE_SECRET_VALUES:
3045
+ secret_objs.append(Secret.from_dict(INLINE_SECRET_VALUES))
3046
+
1065
3047
  if DOTENV_PATHS:
1066
3048
  secret_objs.extend(Secret.from_dotenv(path) for path in DOTENV_PATHS)
1067
3049
 
@@ -1070,13 +3052,6 @@ volume_map = {{}}
1070
3052
  for vol_name, mount_path in volume_mounts:
1071
3053
  volume_map[mount_path] = Volume.from_name(vol_name, create_if_missing=True)
1072
3054
 
1073
- importlib.import_module(MODULE_NAME)
1074
-
1075
- entry = registry.get(ENTRY_ID)
1076
- modal_cfg = entry.modal
1077
- if modal_cfg is None:
1078
- raise RuntimeError("Modal configuration missing for task app {entry.app_id}")
1079
-
1080
3055
  app = App(MODAL_APP_NAME)
1081
3056
 
1082
3057
  @app.function(
@@ -1091,17 +3066,69 @@ app = App(MODAL_APP_NAME)
1091
3066
  )
1092
3067
  @asgi_app()
1093
3068
  def fastapi_app():
3069
+ # Import the module to trigger registration (inside container)
3070
+ import os
3071
+ # Prefer mounted source over any preinstalled site-packages version
3072
+ import sys as _sys
3073
+ for k in list(_sys.modules.keys()):
3074
+ if k == 'synth_ai' or k.startswith('synth_ai.'):
3075
+ _sys.modules.pop(k, None)
3076
+ import importlib as _importlib
3077
+ _importlib.invalidate_caches()
3078
+ try:
3079
+ if MODULE_FILE and os.path.exists(MODULE_FILE):
3080
+ spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', MODULE_FILE)
3081
+ if not spec or not spec.loader:
3082
+ raise RuntimeError("Failed to prepare spec for: " + str(MODULE_FILE))
3083
+ mod = importlib.util.module_from_spec(spec)
3084
+ sys.modules[MODULE_NAME or 'task_app_module'] = mod
3085
+ spec.loader.exec_module(mod)
3086
+ else:
3087
+ try:
3088
+ importlib.import_module(MODULE_NAME)
3089
+ except Exception:
3090
+ fallback_file = '/opt/synth_ai_repo/__local_task_app__/' + (MODULE_NAME.split('.')[-1] if MODULE_NAME else 'task_app') + '.py'
3091
+ if os.path.exists(fallback_file):
3092
+ spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', fallback_file)
3093
+ if not spec or not spec.loader:
3094
+ raise RuntimeError("Failed to prepare fallback spec for: " + str(fallback_file))
3095
+ mod = importlib.util.module_from_spec(spec)
3096
+ sys.modules[MODULE_NAME or 'task_app_module'] = mod
3097
+ spec.loader.exec_module(mod)
3098
+ else:
3099
+ raise
3100
+ except Exception as e:
3101
+ raise RuntimeError("Task app import failed: " + str(e))
3102
+
3103
+ # Get the entry from registry (now that it's registered)
3104
+ from synth_ai.task.apps import registry
3105
+ from synth_ai.task.server import create_task_app
3106
+ entry = registry.get(ENTRY_ID)
3107
+ cfg = entry.modal
3108
+ if cfg is None:
3109
+ raise RuntimeError("Modal configuration missing for task app " + ENTRY_ID)
1094
3110
  config = entry.config_factory()
1095
3111
  return create_task_app(config)
1096
3112
  """
1097
3113
 
1098
- tmp = tempfile.NamedTemporaryFile("w", suffix=f"_{entry.app_id}_modal.py", delete=False)
1099
- tmp.write(script)
1100
- tmp.flush()
1101
- tmp.close()
1102
- return Path(tmp.name)
3114
+ with tempfile.NamedTemporaryFile("w", suffix=f"_{entry.app_id}_modal.py", delete=False) as tmp:
3115
+ tmp.write(script)
3116
+ tmp.flush()
3117
+ name = tmp.name
3118
+ return Path(name)
1103
3119
 
1104
3120
 
1105
3121
  def register(cli: click.Group) -> None:
1106
3122
  cli.add_command(serve_command)
1107
3123
  cli.add_command(task_app_group)
3124
+ cli.add_command(eval_command)
3125
+ cli.add_command(filter_command)
3126
+
3127
+
3128
+ eval_command = eval_core.command
3129
+
3130
+ filter_command = filter_core.command
3131
+
3132
+
3133
+ def register_eval(cli: click.Group) -> None:
3134
+ cli.add_command(eval_command)