synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (353) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1709 -243
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
  263. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  300. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  301. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  302. synth_ai/experimental/synth_oss.py +0 -446
  303. synth_ai/install_sqld.sh +0 -40
  304. synth_ai/learning/filtering.py +0 -0
  305. synth_ai/learning/offline/dpo.py +0 -0
  306. synth_ai/learning/offline/providers.py +0 -7
  307. synth_ai/learning/offline/sft.py +0 -0
  308. synth_ai/learning/offline/shared.py +0 -0
  309. synth_ai/learning/online/grpo.py +0 -0
  310. synth_ai/learning/online/irft.py +0 -0
  311. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  312. synth_ai/learning/prompts/gepa.py +0 -0
  313. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  314. synth_ai/learning/prompts/mipro.py +0 -289
  315. synth_ai/learning/prompts/random_search.py +0 -246
  316. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  317. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  318. synth_ai/rl/secrets.py +0 -19
  319. synth_ai/scripts/verify_rewards.py +0 -100
  320. synth_ai/tracing/__init__.py +0 -30
  321. synth_ai/tracing_v1/__init__.py +0 -33
  322. synth_ai/tracing_v3/turso/__init__.py +0 -25
  323. synth_ai/tracing_v3/turso/manager.py +0 -774
  324. synth_ai/zyk/__init__.py +0 -30
  325. synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
  326. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  329. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  330. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  332. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  333. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  334. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  335. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  337. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  339. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  341. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  348. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  349. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  350. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  351. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
synth_ai/cli/task_apps.py CHANGED
@@ -1,25 +1,37 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import ast
4
+ import asyncio
4
5
  import contextlib
5
- import functools
6
6
  import hashlib
7
7
  import importlib
8
8
  import importlib.util
9
9
  import inspect
10
+ import json
10
11
  import os
11
- import signal
12
12
  import shutil
13
+ import signal
13
14
  import subprocess
14
15
  import sys
15
16
  import tempfile
17
+ import textwrap
18
+ import types
19
+ from collections.abc import Callable, Iterable, Iterator, Sequence
16
20
  from dataclasses import dataclass
17
21
  from pathlib import Path
18
- from typing import Callable, Iterable, Sequence
22
+ from typing import Any, cast
23
+
24
+ try: # Python 3.11+
25
+ import tomllib as _toml
26
+ except Exception: # pragma: no cover - fallback
27
+ _toml = None # type: ignore
28
+ import uuid
19
29
 
20
30
  import click
31
+
32
+ from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
21
33
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppConfig, TaskAppEntry, registry
22
- from synth_ai.task.server import run_task_app
34
+ from synth_ai.task.server import create_task_app, run_task_app
23
35
 
24
36
  REPO_ROOT = Path(__file__).resolve().parents[2]
25
37
 
@@ -37,6 +49,8 @@ DEFAULT_IGNORE_DIRS = {
37
49
 
38
50
  DEFAULT_SEARCH_RELATIVE = (
39
51
  Path("."),
52
+ Path("examples"),
53
+ Path("synth_ai"),
40
54
  )
41
55
 
42
56
 
@@ -63,6 +77,73 @@ class AppChoice:
63
77
  return entry
64
78
 
65
79
 
80
+ def _temporary_sys_path(paths: Sequence[Path]):
81
+ """Context manager to prepend entries to sys.path temporarily."""
82
+
83
+ @contextlib.contextmanager
84
+ def _manager() -> Iterator[None]:
85
+ added: list[str] = []
86
+ for p in paths:
87
+ try:
88
+ resolved = str(p.resolve())
89
+ except Exception:
90
+ continue
91
+ if resolved in sys.path:
92
+ continue
93
+ sys.path.insert(0, resolved)
94
+ added.append(resolved)
95
+ try:
96
+ yield None
97
+ finally:
98
+ for entry in added:
99
+ with contextlib.suppress(ValueError):
100
+ sys.path.remove(entry)
101
+
102
+ return _manager()
103
+
104
+
105
+ def _possible_module_names(
106
+ path: Path, module_search_roots: Sequence[Path]
107
+ ) -> list[tuple[str, Path]]:
108
+ """Return potential module names based on candidate roots."""
109
+
110
+ candidates: list[tuple[str, Path]] = []
111
+ for root in module_search_roots:
112
+ try:
113
+ resolved_root = root.resolve()
114
+ except Exception:
115
+ continue
116
+ if not resolved_root.exists() or not path.is_relative_to(resolved_root):
117
+ continue
118
+ relative = path.relative_to(resolved_root)
119
+ stem = relative.with_suffix("")
120
+ parts = list(stem.parts)
121
+ if not parts:
122
+ continue
123
+ module_name = ".".join(parts)
124
+ if module_name:
125
+ candidates.append((module_name, resolved_root))
126
+ return candidates
127
+
128
+
129
+ def _ensure_parent_namespace(module_name: str, search_root: Path) -> None:
130
+ """Ensure namespace packages exist for dotted module names."""
131
+
132
+ parts = module_name.split(".")
133
+ for depth in range(1, len(parts)):
134
+ parent_name = ".".join(parts[:depth])
135
+ if parent_name in sys.modules:
136
+ continue
137
+ parent_module = types.ModuleType(parent_name)
138
+ candidate_dir = search_root.joinpath(*parts[:depth])
139
+ try:
140
+ resolved = candidate_dir.resolve()
141
+ except Exception:
142
+ resolved = search_root.resolve()
143
+ parent_module.__path__ = [str(resolved)] # type: ignore[attr-defined]
144
+ sys.modules[parent_name] = parent_module
145
+
146
+
66
147
  def _should_ignore_path(path: Path) -> bool:
67
148
  return any(part in DEFAULT_IGNORE_DIRS for part in path.parts)
68
149
 
@@ -70,7 +151,19 @@ def _should_ignore_path(path: Path) -> bool:
70
151
  def _candidate_search_roots() -> list[Path]:
71
152
  """Only search for task apps in the current working directory and subdirectories."""
72
153
  roots: list[Path] = []
73
-
154
+
155
+ # Prioritize demo directory if it exists
156
+ try:
157
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
158
+
159
+ demo_dir = load_demo_dir()
160
+ if demo_dir:
161
+ demo_path = Path(demo_dir)
162
+ if demo_path.exists() and demo_path.is_dir():
163
+ roots.append(demo_path.resolve())
164
+ except Exception:
165
+ pass
166
+
74
167
  # Allow explicit search paths via environment variable
75
168
  env_paths = os.environ.get("SYNTH_TASK_APP_SEARCH_PATH")
76
169
  if env_paths:
@@ -82,6 +175,13 @@ def _candidate_search_roots() -> list[Path]:
82
175
  cwd = Path.cwd().resolve()
83
176
  roots.append(cwd)
84
177
 
178
+ for rel in DEFAULT_SEARCH_RELATIVE:
179
+ try:
180
+ candidate = (cwd / rel).resolve()
181
+ except Exception:
182
+ continue
183
+ roots.append(candidate)
184
+
85
185
  # Remove duplicates while preserving order
86
186
  seen: set[Path] = set()
87
187
  ordered: list[Path] = []
@@ -97,6 +197,49 @@ def _candidate_search_roots() -> list[Path]:
97
197
  return ordered
98
198
 
99
199
 
200
+ def _eval_config_sort_key(path: Path) -> tuple[int, int, int, str]:
201
+ name = path.name.lower()
202
+ parent_names = {p.name.lower() for p in path.parents}
203
+ in_configs = 0 if "configs" in parent_names else 1
204
+ in_examples = 0 if "examples" in parent_names else 1
205
+ starts_eval = 0 if name.startswith("eval") else 1
206
+ return (in_configs, in_examples, starts_eval, str(path))
207
+
208
+
209
+ def _discover_eval_config_paths() -> list[Path]:
210
+ """Find candidate eval TOML files near the current working directory."""
211
+
212
+ candidates: list[Path] = []
213
+ seen: set[Path] = set()
214
+ search_roots = _candidate_search_roots()
215
+ for root in search_roots:
216
+ if not root.exists() or not root.is_dir():
217
+ continue
218
+ try:
219
+ root = root.resolve()
220
+ except Exception:
221
+ continue
222
+ for path in root.rglob("*.toml"):
223
+ if not path.is_file():
224
+ continue
225
+ if _should_ignore_path(path):
226
+ continue
227
+ name_lower = path.name.lower()
228
+ if "eval" not in name_lower and "evaluation" not in name_lower:
229
+ continue
230
+ try:
231
+ resolved = path.resolve()
232
+ except Exception:
233
+ continue
234
+ if resolved in seen:
235
+ continue
236
+ seen.add(resolved)
237
+ candidates.append(resolved)
238
+
239
+ candidates.sort(key=_eval_config_sort_key)
240
+ return candidates
241
+
242
+
100
243
  class _TaskAppConfigVisitor(ast.NodeVisitor):
101
244
  def __init__(self) -> None:
102
245
  self.matches: list[tuple[str, int]] = []
@@ -115,16 +258,18 @@ class _TaskAppConfigVisitor(ast.NodeVisitor):
115
258
 
116
259
  def _is_task_app_config_call(node: ast.Call) -> bool:
117
260
  func = node.func
118
- if isinstance(func, ast.Name) and func.id == "TaskAppConfig":
119
- return True
120
- if isinstance(func, ast.Attribute) and func.attr == "TaskAppConfig":
121
- return True
122
- return False
261
+ return (isinstance(func, ast.Name) and func.id == "TaskAppConfig") or (
262
+ isinstance(func, ast.Attribute) and func.attr == "TaskAppConfig"
263
+ )
123
264
 
124
265
 
125
266
  def _extract_app_id(node: ast.Call) -> str | None:
126
267
  for kw in node.keywords:
127
- if kw.arg == "app_id" and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
268
+ if (
269
+ kw.arg == "app_id"
270
+ and isinstance(kw.value, ast.Constant)
271
+ and isinstance(kw.value.value, str)
272
+ ):
128
273
  return kw.value.value
129
274
  if node.args:
130
275
  first = node.args[0]
@@ -135,11 +280,9 @@ def _extract_app_id(node: ast.Call) -> str | None:
135
280
 
136
281
  def _is_register_task_app_call(node: ast.Call) -> bool:
137
282
  func = node.func
138
- if isinstance(func, ast.Name) and func.id == "register_task_app":
139
- return True
140
- if isinstance(func, ast.Attribute) and func.attr == "register_task_app":
141
- return True
142
- return False
283
+ return (isinstance(func, ast.Name) and func.id == "register_task_app") or (
284
+ isinstance(func, ast.Attribute) and func.attr == "register_task_app"
285
+ )
143
286
 
144
287
 
145
288
  def _extract_register_app_id(node: ast.Call) -> str | None:
@@ -149,7 +292,11 @@ def _extract_register_app_id(node: ast.Call) -> str | None:
149
292
  entry_call = kw.value
150
293
  if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
151
294
  for entry_kw in entry_call.keywords:
152
- if entry_kw.arg == "app_id" and isinstance(entry_kw.value, ast.Constant) and isinstance(entry_kw.value.value, str):
295
+ if (
296
+ entry_kw.arg == "app_id"
297
+ and isinstance(entry_kw.value, ast.Constant)
298
+ and isinstance(entry_kw.value.value, str)
299
+ ):
153
300
  return entry_kw.value.value
154
301
  return None
155
302
 
@@ -180,7 +327,11 @@ class _ModalAppVisitor(ast.NodeVisitor):
180
327
  if name:
181
328
  self.matches.append((name, getattr(node, "lineno", 0)))
182
329
  elif isinstance(func, ast.Attribute):
183
- if isinstance(func.value, ast.Name) and func.value.id in self.modal_aliases and func.attr == "App":
330
+ if (
331
+ isinstance(func.value, ast.Name)
332
+ and func.value.id in self.modal_aliases
333
+ and func.attr == "App"
334
+ ):
184
335
  name = _extract_modal_app_name(node)
185
336
  if name:
186
337
  self.matches.append((name, getattr(node, "lineno", 0)))
@@ -189,7 +340,11 @@ class _ModalAppVisitor(ast.NodeVisitor):
189
340
 
190
341
  def _extract_modal_app_name(node: ast.Call) -> str | None:
191
342
  for kw in node.keywords:
192
- if kw.arg in {"name", "app_name"} and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
343
+ if (
344
+ kw.arg in {"name", "app_name"}
345
+ and isinstance(kw.value, ast.Constant)
346
+ and isinstance(kw.value.value, str)
347
+ ):
193
348
  return kw.value.value
194
349
  if node.args:
195
350
  first = node.args[0]
@@ -201,7 +356,7 @@ def _extract_modal_app_name(node: ast.Call) -> str | None:
201
356
  def _collect_task_app_choices() -> list[AppChoice]:
202
357
  # Clear registry to avoid duplicate registration errors
203
358
  registry.clear()
204
-
359
+
205
360
  choices: list[AppChoice] = []
206
361
  with contextlib.suppress(Exception):
207
362
  import synth_ai.demos.demo_task_apps # noqa: F401
@@ -224,6 +379,7 @@ def _collect_task_app_choices() -> list[AppChoice]:
224
379
  continue
225
380
  unique[key] = choice
226
381
  ordered.append(choice)
382
+ ordered.sort(key=_app_choice_sort_key)
227
383
  return ordered
228
384
 
229
385
 
@@ -254,6 +410,10 @@ def _collect_scanned_task_configs() -> list[AppChoice]:
254
410
  results: list[AppChoice] = []
255
411
  seen: set[tuple[str, Path]] = set()
256
412
  for root in _candidate_search_roots():
413
+ try:
414
+ root_resolved = root.resolve()
415
+ except Exception:
416
+ continue
257
417
  if not root.exists() or not root.is_dir():
258
418
  continue
259
419
  for path in root.rglob("*.py"):
@@ -283,7 +443,11 @@ def _collect_scanned_task_configs() -> list[AppChoice]:
283
443
  path=path.resolve(),
284
444
  source="discovered",
285
445
  description=f"TaskAppConfig in {path.name} (line {lineno})",
286
- entry_loader=lambda p=path.resolve(), a=app_id: _load_entry_from_path(p, a),
446
+ entry_loader=lambda p=path.resolve(),
447
+ a=app_id,
448
+ roots=(root_resolved,): _load_entry_from_path(
449
+ p, a, module_search_roots=roots
450
+ ),
287
451
  lineno=lineno,
288
452
  )
289
453
  )
@@ -330,15 +494,67 @@ def _collect_modal_scripts() -> list[AppChoice]:
330
494
  return results
331
495
 
332
496
 
497
+ def _app_choice_sort_key(choice: AppChoice) -> tuple[int, int, int, int, int, str, str]:
498
+ """Ranking heuristic so wrapper-style task apps surface first."""
499
+
500
+ # Prioritize apps in the current working directory (demo or otherwise)
501
+ cwd_rank = 1
502
+ try:
503
+ cwd = Path.cwd().resolve()
504
+ if choice.path.is_relative_to(cwd):
505
+ # Check if this is directly in CWD (not in subdirectories like examples/)
506
+ try:
507
+ rel_path = choice.path.relative_to(cwd)
508
+ # If it's in the immediate directory or one level deep, prioritize it
509
+ if len(rel_path.parts) <= 2:
510
+ cwd_rank = 0
511
+ except Exception:
512
+ pass
513
+ except Exception:
514
+ pass
515
+
516
+ # Further prioritize apps in the demo directory if one is set
517
+ demo_rank = 1
518
+ try:
519
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
520
+
521
+ demo_dir = load_demo_dir()
522
+ if demo_dir:
523
+ demo_path = Path(demo_dir).resolve()
524
+ if choice.path.is_relative_to(demo_path):
525
+ demo_rank = 0
526
+ except Exception:
527
+ pass
528
+
529
+ modal_rank = 1 if choice.modal_script else 0
530
+
531
+ name = choice.path.name.lower()
532
+ file_rank = 3
533
+ if name.endswith("_task_app.py") or name.endswith("task_app.py"):
534
+ file_rank = 0
535
+ elif name.endswith("_app.py") or "task_app" in name:
536
+ file_rank = 1
537
+ elif name.endswith(".py"):
538
+ file_rank = 2
539
+
540
+ directory_rank = 0 if choice.path.parent.name.lower() in {"task_app", "task_apps"} else 1
541
+
542
+ return (
543
+ demo_rank,
544
+ cwd_rank,
545
+ modal_rank,
546
+ file_rank,
547
+ directory_rank,
548
+ choice.app_id,
549
+ str(choice.path),
550
+ )
551
+
552
+
333
553
  def _choice_matches_identifier(choice: AppChoice, identifier: str) -> bool:
334
554
  ident = identifier.strip()
335
555
  if not ident:
336
556
  return False
337
- if ident == choice.app_id or ident == choice.label:
338
- return True
339
- if ident in choice.aliases:
340
- return True
341
- return False
557
+ return ident == choice.app_id or ident == choice.label or ident in choice.aliases
342
558
 
343
559
 
344
560
  def _choice_has_modal_support(choice: AppChoice) -> bool:
@@ -357,21 +573,26 @@ def _has_modal_support_in_file(path: Path) -> bool:
357
573
  try:
358
574
  source = path.read_text(encoding="utf-8")
359
575
  tree = ast.parse(source, filename=str(path))
360
-
576
+
361
577
  # Look for ModalDeploymentConfig in register_task_app calls
362
578
  for node in ast.walk(tree):
363
- if isinstance(node, ast.Call):
364
- if _is_register_task_app_call(node):
365
- # Check if the entry has modal=ModalDeploymentConfig(...)
366
- for kw in node.keywords:
367
- if kw.arg == "entry" and isinstance(kw.value, ast.Call):
368
- entry_call = kw.value
369
- if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
370
- for entry_kw in entry_call.keywords:
371
- if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
372
- modal_call = entry_kw.value
373
- if isinstance(modal_call.func, ast.Name) and modal_call.func.id == "ModalDeploymentConfig":
374
- return True
579
+ if isinstance(node, ast.Call) and _is_register_task_app_call(node):
580
+ # Check if the entry has modal=ModalDeploymentConfig(...)
581
+ for kw in node.keywords:
582
+ if kw.arg == "entry" and isinstance(kw.value, ast.Call):
583
+ entry_call = kw.value
584
+ if (
585
+ isinstance(entry_call.func, ast.Name)
586
+ and entry_call.func.id == "TaskAppEntry"
587
+ ):
588
+ for entry_kw in entry_call.keywords:
589
+ if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
590
+ modal_call = entry_kw.value
591
+ if (
592
+ isinstance(modal_call.func, ast.Name)
593
+ and modal_call.func.id == "ModalDeploymentConfig"
594
+ ):
595
+ return True
375
596
  except Exception:
376
597
  pass
377
598
  return False
@@ -382,22 +603,27 @@ def _extract_modal_config_from_file(path: Path) -> ModalDeploymentConfig | None:
382
603
  try:
383
604
  source = path.read_text(encoding="utf-8")
384
605
  tree = ast.parse(source, filename=str(path))
385
-
606
+
386
607
  # Look for ModalDeploymentConfig in register_task_app calls
387
608
  for node in ast.walk(tree):
388
- if isinstance(node, ast.Call):
389
- if _is_register_task_app_call(node):
390
- # Check if the entry has modal=ModalDeploymentConfig(...)
391
- for kw in node.keywords:
392
- if kw.arg == "entry" and isinstance(kw.value, ast.Call):
393
- entry_call = kw.value
394
- if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
395
- for entry_kw in entry_call.keywords:
396
- if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
397
- modal_call = entry_kw.value
398
- if isinstance(modal_call.func, ast.Name) and modal_call.func.id == "ModalDeploymentConfig":
399
- # Extract the arguments to ModalDeploymentConfig
400
- return _build_modal_config_from_ast(modal_call)
609
+ if isinstance(node, ast.Call) and _is_register_task_app_call(node):
610
+ # Check if the entry has modal=ModalDeploymentConfig(...)
611
+ for kw in node.keywords:
612
+ if kw.arg == "entry" and isinstance(kw.value, ast.Call):
613
+ entry_call = kw.value
614
+ if (
615
+ isinstance(entry_call.func, ast.Name)
616
+ and entry_call.func.id == "TaskAppEntry"
617
+ ):
618
+ for entry_kw in entry_call.keywords:
619
+ if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
620
+ modal_call = entry_kw.value
621
+ if (
622
+ isinstance(modal_call.func, ast.Name)
623
+ and modal_call.func.id == "ModalDeploymentConfig"
624
+ ):
625
+ # Extract the arguments to ModalDeploymentConfig
626
+ return _build_modal_config_from_ast(modal_call)
401
627
  except Exception:
402
628
  pass
403
629
  return None
@@ -411,43 +637,44 @@ def _build_modal_config_from_ast(modal_call: ast.Call) -> ModalDeploymentConfig
411
637
  for kw in modal_call.keywords:
412
638
  if kw.arg and isinstance(kw.value, ast.Constant):
413
639
  kwargs[kw.arg] = kw.value.value
414
- elif kw.arg == "pip_packages" and isinstance(kw.value, (ast.List, ast.Tuple)):
640
+ elif kw.arg == "pip_packages" and isinstance(kw.value, ast.List | ast.Tuple):
415
641
  # Handle pip_packages list/tuple
416
642
  packages = []
417
643
  for elt in kw.value.elts:
418
644
  if isinstance(elt, ast.Constant):
419
645
  packages.append(elt.value)
420
646
  kwargs[kw.arg] = tuple(packages)
421
- elif kw.arg == "extra_local_dirs" and isinstance(kw.value, (ast.List, ast.Tuple)):
647
+ elif kw.arg == "extra_local_dirs" and isinstance(kw.value, ast.List | ast.Tuple):
422
648
  # Handle extra_local_dirs list/tuple of tuples
423
649
  dirs = []
424
650
  for elt in kw.value.elts:
425
- if isinstance(elt, (ast.List, ast.Tuple)) and len(elt.elts) == 2:
651
+ if isinstance(elt, ast.List | ast.Tuple) and len(elt.elts) == 2:
426
652
  src = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
427
653
  dst = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
428
654
  if src and dst:
429
655
  dirs.append((src, dst))
430
656
  kwargs[kw.arg] = tuple(dirs)
431
- elif kw.arg == "secret_names" and isinstance(kw.value, (ast.List, ast.Tuple)):
657
+ elif kw.arg == "secret_names" and isinstance(kw.value, ast.List | ast.Tuple):
432
658
  # Handle secret_names list/tuple
433
659
  secrets = []
434
660
  for elt in kw.value.elts:
435
661
  if isinstance(elt, ast.Constant):
436
662
  secrets.append(elt.value)
437
663
  kwargs[kw.arg] = tuple(secrets)
438
- elif kw.arg == "volume_mounts" and isinstance(kw.value, (ast.List, ast.Tuple)):
664
+ elif kw.arg == "volume_mounts" and isinstance(kw.value, ast.List | ast.Tuple):
439
665
  # Handle volume_mounts list/tuple of tuples
440
666
  mounts = []
441
667
  for elt in kw.value.elts:
442
- if isinstance(elt, (ast.List, ast.Tuple)) and len(elt.elts) == 2:
668
+ if isinstance(elt, ast.List | ast.Tuple) and len(elt.elts) == 2:
443
669
  name = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
444
670
  mount = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
445
671
  if name and mount:
446
672
  mounts.append((name, mount))
447
673
  kwargs[kw.arg] = tuple(mounts)
448
-
674
+
449
675
  # Create ModalDeploymentConfig with extracted arguments
450
676
  from synth_ai.task.apps import ModalDeploymentConfig
677
+
451
678
  return ModalDeploymentConfig(**kwargs)
452
679
  except Exception:
453
680
  return None
@@ -465,20 +692,29 @@ def _choice_has_local_support(choice: AppChoice) -> bool:
465
692
 
466
693
  def _format_choice(choice: AppChoice, index: int | None = None) -> str:
467
694
  prefix = f"[{index}] " if index is not None else ""
468
- rel_path: str
695
+ # Get file modification timestamp
469
696
  try:
470
- rel_path = str(choice.path.relative_to(REPO_ROOT))
697
+ from datetime import datetime
698
+
699
+ mtime = choice.path.stat().st_mtime
700
+ modified_str = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
701
+ details = f"Modified: {modified_str}"
471
702
  except Exception:
472
- rel_path = str(choice.path)
473
- details = choice.description or f"Located at {rel_path}"
474
- return f"{prefix}{choice.app_id} ({choice.source}) {details}"
703
+ # Fallback if timestamp unavailable
704
+ details = choice.description or "No timestamp available"
705
+ # Format: single line with timestamp
706
+ main_line = f"{prefix}{choice.app_id} ({choice.source}) – {details}"
707
+ return main_line
475
708
 
476
709
 
477
710
  def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
478
711
  click.echo("Select a task app:")
479
712
  for idx, choice in enumerate(choices, start=1):
480
713
  click.echo(_format_choice(choice, idx))
481
- response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
714
+ try:
715
+ response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
716
+ except (click.exceptions.Abort, EOFError, KeyboardInterrupt) as exc:
717
+ raise click.ClickException("Task app selection cancelled by user") from exc
482
718
  if not response.isdigit():
483
719
  raise click.ClickException("Selection must be a number")
484
720
  index = int(response)
@@ -489,7 +725,7 @@ def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
489
725
 
490
726
  def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
491
727
  choices = _collect_task_app_choices()
492
- if purpose == "serve":
728
+ if purpose in {"serve", "eval"}:
493
729
  filtered = [c for c in choices if not c.modal_script]
494
730
  elif purpose in {"deploy", "modal-serve"}:
495
731
  filtered = []
@@ -499,6 +735,8 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
499
735
  else:
500
736
  filtered = choices
501
737
 
738
+ filtered.sort(key=_app_choice_sort_key)
739
+
502
740
  if not filtered:
503
741
  raise click.ClickException("No task apps discovered for this command.")
504
742
 
@@ -526,22 +764,90 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
526
764
  return _prompt_user_for_choice(filtered)
527
765
 
528
766
 
529
- def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
530
- resolved = path.resolve()
531
- module_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
767
+ def _import_task_app_module(
768
+ resolved: Path,
769
+ module_name: str,
770
+ *,
771
+ namespace_root: Path | None,
772
+ sys_path_roots: Sequence[Path],
773
+ ensure_namespace: bool = True,
774
+ ) -> types.ModuleType:
532
775
  spec = importlib.util.spec_from_file_location(module_name, str(resolved))
533
776
  if spec is None or spec.loader is None:
534
777
  raise click.ClickException(f"Unable to load Python module from {resolved}")
778
+
535
779
  module = importlib.util.module_from_spec(spec)
536
780
  sys.modules[module_name] = module
537
-
538
- # Clear registry before importing to avoid duplicate registration errors
539
- registry.clear()
540
-
541
- try:
542
- spec.loader.exec_module(module)
543
- except Exception as exc:
544
- raise click.ClickException(f"Failed to import {resolved}: {exc}") from exc
781
+
782
+ with _temporary_sys_path(sys_path_roots):
783
+ if ensure_namespace and namespace_root is not None and "." in module_name:
784
+ _ensure_parent_namespace(module_name, namespace_root)
785
+
786
+ # Clear registry before importing to avoid duplicate registration errors
787
+ registry.clear()
788
+
789
+ try:
790
+ spec.loader.exec_module(module)
791
+ except Exception:
792
+ # Remove partially-imported module to avoid reuse
793
+ sys.modules.pop(module_name, None)
794
+ raise
795
+
796
+ return module
797
+
798
+
799
+ def _load_entry_from_path(
800
+ path: Path, app_id: str, module_search_roots: Sequence[Path] | None = None
801
+ ) -> TaskAppEntry:
802
+ resolved = path.resolve()
803
+ search_roots: list[Path] = []
804
+ seen_roots: set[Path] = set()
805
+
806
+ def _append_root(candidate: Path) -> None:
807
+ try:
808
+ resolved_root = candidate.resolve()
809
+ except Exception:
810
+ return
811
+ if resolved_root in seen_roots:
812
+ return
813
+ seen_roots.add(resolved_root)
814
+ search_roots.append(resolved_root)
815
+
816
+ for root in module_search_roots or []:
817
+ _append_root(root)
818
+ _append_root(resolved.parent)
819
+ _append_root(REPO_ROOT)
820
+
821
+ last_error: Exception | None = None
822
+ module: types.ModuleType | None = None
823
+
824
+ for module_name, namespace_root in _possible_module_names(resolved, search_roots):
825
+ try:
826
+ module = _import_task_app_module(
827
+ resolved,
828
+ module_name,
829
+ namespace_root=namespace_root,
830
+ sys_path_roots=search_roots,
831
+ ensure_namespace=True,
832
+ )
833
+ break
834
+ except Exception as exc: # pragma: no cover - best-effort fallbacks
835
+ last_error = exc
836
+ continue
837
+
838
+ if module is None:
839
+ hashed_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
840
+ try:
841
+ module = _import_task_app_module(
842
+ resolved,
843
+ hashed_name,
844
+ namespace_root=None,
845
+ sys_path_roots=search_roots,
846
+ ensure_namespace=False,
847
+ )
848
+ except Exception as exc: # pragma: no cover - propagate meaningful error
849
+ detail = last_error or exc
850
+ raise click.ClickException(f"Failed to import {resolved}: {detail}") from detail
545
851
 
546
852
  config_obj: TaskAppConfig | None = None
547
853
  factory_callable: Callable[[], TaskAppConfig] | None = None
@@ -553,7 +859,11 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
553
859
  continue
554
860
  if isinstance(attr, TaskAppConfig) and attr.app_id == app_id:
555
861
  config_obj = attr
556
- factory_callable = lambda cfg=attr: cfg
862
+
863
+ def _return_config(cfg: TaskAppConfig = attr) -> TaskAppConfig:
864
+ return cfg
865
+
866
+ factory_callable = _return_config
557
867
  break
558
868
 
559
869
  if factory_callable is None:
@@ -572,7 +882,11 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
572
882
  continue
573
883
  has_required = False
574
884
  for param in sig.parameters.values():
575
- if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD) and param.default is inspect._empty:
885
+ if (
886
+ param.kind
887
+ in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
888
+ and param.default is inspect._empty
889
+ ):
576
890
  has_required = True
577
891
  break
578
892
  if has_required:
@@ -582,9 +896,15 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
582
896
  except Exception:
583
897
  continue
584
898
  if isinstance(result, TaskAppConfig) and result.app_id == app_id:
585
- def _factory() -> TaskAppConfig:
586
- return attr() # type: ignore[call-arg]
587
- factory_callable = _factory
899
+ # Bind attr to a local and close over it without exposing parameters
900
+ bound_func: Callable[[], TaskAppConfig] = cast(Callable[[], TaskAppConfig], attr) # type: ignore[assignment]
901
+
902
+ def _factory_noargs(
903
+ func: Callable[[], TaskAppConfig] = bound_func,
904
+ ) -> TaskAppConfig:
905
+ return func()
906
+
907
+ factory_callable = _factory_noargs
588
908
  config_obj = result
589
909
  break
590
910
 
@@ -594,10 +914,10 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
594
914
  # Check if the app was registered in the registry
595
915
  entry = registry.get(app_id)
596
916
  return entry
597
- except KeyError:
917
+ except KeyError as exc:
598
918
  raise click.ClickException(
599
919
  f"Could not locate TaskAppConfig for '{app_id}' in {resolved}."
600
- )
920
+ ) from exc
601
921
 
602
922
  modal_cfg: ModalDeploymentConfig | None = None
603
923
  for attr_name in dir(module):
@@ -608,7 +928,7 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
608
928
  if isinstance(attr, ModalDeploymentConfig):
609
929
  modal_cfg = attr
610
930
  break
611
-
931
+
612
932
  # If no ModalDeploymentConfig found, try to detect it via AST parsing
613
933
  if modal_cfg is None:
614
934
  modal_cfg = _extract_modal_config_from_file(resolved)
@@ -640,34 +960,131 @@ def _resolve_env_paths_for_script(script_path: Path, explicit: Sequence[str]) ->
640
960
  # Always prompt for env file selection instead of auto-loading defaults
641
961
  script_dir = script_path.parent.resolve()
642
962
  cwd = Path.cwd()
643
-
963
+
644
964
  # Look for env files in current working directory first, then repo root
645
965
  env_candidates = []
646
-
966
+
647
967
  # Add CWD env files first (prioritized)
648
- cwd_env_files = sorted(cwd.glob('**/*.env'))
968
+ cwd_env_files = sorted(cwd.glob("**/*.env"))
649
969
  env_candidates.extend(cwd_env_files)
650
-
970
+
651
971
  # Add repo root env files
652
- repo_env_files = sorted(REPO_ROOT.glob('**/*.env'))
972
+ repo_env_files = sorted(REPO_ROOT.glob("**/*.env"))
653
973
  # Avoid duplicates
654
974
  for repo_file in repo_env_files:
655
975
  if repo_file not in env_candidates:
656
976
  env_candidates.append(repo_file)
657
-
977
+
658
978
  if not env_candidates:
659
979
  created = _interactive_create_env(script_dir)
660
980
  if created is None:
661
981
  raise click.ClickException("Env file required (--env-file) for this task app")
662
982
  return [created]
663
983
 
664
- click.echo('Select env file to load:')
984
+ click.echo("Select env file to load:")
665
985
  for idx, path in enumerate(env_candidates, start=1):
666
- click.echo(f" {idx}) {path}")
667
- choice = click.prompt('Enter choice', type=click.IntRange(1, len(env_candidates)))
986
+ click.echo(f" {idx}) {path.resolve()}")
987
+ choice = click.prompt("Enter choice", type=click.IntRange(1, len(env_candidates)), default=1)
668
988
  return [env_candidates[choice - 1]]
669
989
 
670
990
 
991
+ def _modal_command_prefix(modal_cli: str) -> list[str]:
992
+ """Resolve a command prefix for invoking the Modal CLI within the active environment."""
993
+ if modal_cli == "modal" and importlib.util.find_spec("modal") is not None:
994
+ return [sys.executable, "-m", "synth_ai.cli._modal_wrapper"]
995
+
996
+ modal_path = shutil.which(modal_cli)
997
+ if modal_path is not None:
998
+ return [modal_path]
999
+
1000
+ if modal_cli == "modal":
1001
+ raise click.ClickException(
1002
+ "Modal CLI not found. Install the 'modal' package in this environment or pass "
1003
+ "--modal-cli with an explicit path."
1004
+ )
1005
+ raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
1006
+
1007
+
1008
+ def _build_modal_app_wrapper(original_script: Path) -> tuple[Path, Path]:
1009
+ source_dir = original_script.parent.resolve()
1010
+ repo_root = REPO_ROOT
1011
+ synth_src = (repo_root / "synth_ai").resolve()
1012
+ temp_root = Path(tempfile.mkdtemp(prefix="synth_modal_app_"))
1013
+
1014
+ wrapper_source = textwrap.dedent(
1015
+ f"""
1016
+ from importlib import util as _util
1017
+ from pathlib import Path as _Path
1018
+ import sys as _sys
1019
+
1020
+ _source_dir = _Path({str(source_dir)!r}).resolve()
1021
+ _module_path = _source_dir / {original_script.name!r}
1022
+ _package_name = _source_dir.name
1023
+ _repo_root = _Path({str(repo_root)!r}).resolve()
1024
+ _synth_dir = _repo_root / "synth_ai"
1025
+
1026
+ for _path in (str(_source_dir), str(_source_dir.parent), str(_repo_root)):
1027
+ if _path not in _sys.path:
1028
+ _sys.path.insert(0, _path)
1029
+
1030
+ _spec = _util.spec_from_file_location("_synth_modal_target", str(_module_path))
1031
+ if _spec is None or _spec.loader is None:
1032
+ raise SystemExit("Unable to load modal task app from {original_script}")
1033
+ _module = _util.module_from_spec(_spec)
1034
+ _sys.modules.setdefault("_synth_modal_target", _module)
1035
+ _spec.loader.exec_module(_module)
1036
+
1037
+ try:
1038
+ from modal import App as _ModalApp
1039
+ from modal import Image as _ModalImage
1040
+ except Exception:
1041
+ _ModalApp = None # type: ignore[assignment]
1042
+ _ModalImage = None # type: ignore[assignment]
1043
+
1044
+ def _apply_local_mounts(image):
1045
+ if _ModalImage is None or not isinstance(image, _ModalImage):
1046
+ return image
1047
+ mounts = [
1048
+ (str(_source_dir), f"/root/{{_package_name}}"),
1049
+ (str(_synth_dir), "/root/synth_ai"),
1050
+ ]
1051
+ for local_path, remote_path in mounts:
1052
+ try:
1053
+ image = image.add_local_dir(local_path, remote_path=remote_path)
1054
+ except Exception:
1055
+ pass
1056
+ return image
1057
+
1058
+ if hasattr(_module, "image"):
1059
+ _module.image = _apply_local_mounts(getattr(_module, "image"))
1060
+
1061
+ _candidate = getattr(_module, "app", None)
1062
+ if _ModalApp is None or not isinstance(_candidate, _ModalApp):
1063
+ candidate_modal_app = getattr(_module, "modal_app", None)
1064
+ if _ModalApp is not None and isinstance(candidate_modal_app, _ModalApp):
1065
+ _candidate = candidate_modal_app
1066
+ setattr(_module, "app", _candidate)
1067
+
1068
+ if _ModalApp is not None and not isinstance(_candidate, _ModalApp):
1069
+ raise SystemExit(
1070
+ "Modal task app must expose an 'app = modal.App(...)' (or modal_app) attribute."
1071
+ )
1072
+
1073
+ for remote_path in ("/root/synth_ai", f"/root/{{_package_name}}"):
1074
+ if remote_path not in _sys.path:
1075
+ _sys.path.insert(0, remote_path)
1076
+
1077
+ globals().update({{k: v for k, v in vars(_module).items() if not k.startswith("__")}})
1078
+ app = getattr(_module, "app")
1079
+ """
1080
+ ).strip()
1081
+
1082
+ wrapper_path = temp_root / "__modal_wrapper__.py"
1083
+ wrapper_path.write_text(wrapper_source + "\n", encoding="utf-8")
1084
+ return wrapper_path, temp_root
1085
+
1086
+
1087
+
671
1088
  def _run_modal_script(
672
1089
  script_path: Path,
673
1090
  modal_cli: str,
@@ -677,43 +1094,143 @@ def _run_modal_script(
677
1094
  modal_name: str | None = None,
678
1095
  dry_run: bool = False,
679
1096
  ) -> None:
680
- modal_path = shutil.which(modal_cli)
681
- if modal_path is None:
682
- raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
683
-
684
1097
  env_paths_list = [Path(p).resolve() for p in env_paths]
685
1098
  path_strings = [str(p) for p in env_paths_list]
686
1099
  _load_env_files_into_process(path_strings)
687
1100
  _ensure_env_values(env_paths_list, script_path.parent)
688
1101
  _load_env_values(env_paths_list)
1102
+ # Ensure ENVIRONMENT_API_KEY is uploaded to backend for this org (matches registry path behavior)
1103
+ try:
1104
+ _preflight_env_key(env_paths_list, crash_on_failure=True)
1105
+ except Exception as _pf_err:
1106
+ raise click.ClickException(str(_pf_err))
689
1107
 
690
- cmd = [modal_path, command, str(script_path)]
691
- if modal_name:
1108
+ proc_env = os.environ.copy()
1109
+ pythonpath_entries: list[str] = []
1110
+ script_dir = script_path.parent.resolve()
1111
+ pythonpath_entries.append(str(script_dir))
1112
+ if (script_dir / "__init__.py").exists():
1113
+ # Script lives inside a package; ensure the parent package directory is importable.
1114
+ pythonpath_entries.append(str(script_dir.parent.resolve()))
1115
+ pythonpath_entries.append(str(REPO_ROOT))
1116
+ existing_pp = proc_env.get("PYTHONPATH")
1117
+ if existing_pp:
1118
+ pythonpath_entries.append(existing_pp)
1119
+ unique_paths = list(dict.fromkeys(pythonpath_entries))
1120
+ proc_env["PYTHONPATH"] = os.pathsep.join(unique_paths)
1121
+
1122
+ wrapper_info: tuple[Path, Path] | None = None
1123
+ target_script = script_path
1124
+ if command in {"serve", "deploy"}:
1125
+ wrapper_path, temp_root = _build_modal_app_wrapper(script_path)
1126
+ wrapper_info = (wrapper_path, temp_root)
1127
+ target_script = wrapper_path
1128
+
1129
+ # Ensure the wrapper has access to the Synth AI source for intra-repo imports
1130
+ if "PYTHONPATH" in proc_env:
1131
+ proc_env["PYTHONPATH"] = os.pathsep.join(
1132
+ [str(REPO_ROOT)] + proc_env["PYTHONPATH"].split(os.pathsep)
1133
+ )
1134
+ else:
1135
+ proc_env["PYTHONPATH"] = str(REPO_ROOT)
1136
+
1137
+ cmd = [*_modal_command_prefix(modal_cli), command, str(target_script)]
1138
+ if modal_name and command == "deploy":
692
1139
  cmd.extend(["--name", modal_name])
693
1140
  if dry_run:
694
1141
  click.echo("Dry run: " + " ".join(cmd))
695
1142
  return
696
1143
  try:
697
- subprocess.run(cmd, check=True)
1144
+ # Stream output live for better diagnostics
1145
+ proc = subprocess.Popen(
1146
+ cmd,
1147
+ stdout=subprocess.PIPE,
1148
+ stderr=subprocess.STDOUT,
1149
+ text=True,
1150
+ bufsize=1,
1151
+ env=proc_env,
1152
+ )
1153
+ task_app_url = None
1154
+ assert proc.stdout is not None
1155
+ for line in proc.stdout:
1156
+ click.echo(line, nl=False)
1157
+ if task_app_url is None and ("modal.run" in line and "=>" in line):
1158
+ parts = line.split("=>")
1159
+ if len(parts) >= 2:
1160
+ task_app_url = parts[-1].strip()
1161
+ if task_app_url and env_paths_list:
1162
+ env_file = env_paths_list[0]
1163
+ _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1164
+ click.echo(f"\n✓ Task app URL: {task_app_url}\n")
1165
+ rc = proc.wait()
1166
+ if rc != 0:
1167
+ raise subprocess.CalledProcessError(rc, cmd)
698
1168
  except subprocess.CalledProcessError as exc:
699
- raise click.ClickException(f"modal {command} failed with exit code {exc.returncode}") from exc
1169
+ raise click.ClickException(
1170
+ f"modal {command} failed with exit code {exc.returncode}"
1171
+ ) from exc
1172
+ finally:
1173
+ if wrapper_info is not None:
1174
+ wrapper_path, temp_root = wrapper_info
1175
+ try:
1176
+ wrapper_path.unlink(missing_ok=True)
1177
+ except Exception:
1178
+ pass
1179
+ shutil.rmtree(temp_root, ignore_errors=True)
700
1180
 
701
1181
 
702
- def _preflight_env_key(crash_on_failure: bool = False) -> None:
1182
+ def _preflight_env_key(env_paths: Sequence[Path] | None = None, *, crash_on_failure: bool = False) -> None:
703
1183
  try:
704
- raw_backend = os.environ.get("BACKEND_BASE_URL") or os.environ.get("SYNTH_BASE_URL") or "http://localhost:8000/api"
705
- backend_base = raw_backend.rstrip('/')
706
- if not backend_base.endswith('/api'):
707
- backend_base = backend_base + '/api'
1184
+ raw_backend = (
1185
+ os.environ.get("BACKEND_BASE_URL")
1186
+ or os.environ.get("SYNTH_BASE_URL")
1187
+ or f"{PROD_BASE_URL_DEFAULT}/api"
1188
+ )
1189
+ backend_base = raw_backend.rstrip("/")
1190
+ if not backend_base.endswith("/api"):
1191
+ backend_base = backend_base + "/api"
708
1192
  synth_key = os.environ.get("SYNTH_API_KEY") or ""
709
1193
  env_api_key = (
710
- os.environ.get("ENVIRONMENT_API_KEY")
711
- or os.environ.get("dev_environment_api_key")
712
- or os.environ.get("DEV_ENVIRONMENT_API_KEY")
713
- or ""
714
- )
1194
+ os.environ.get("ENVIRONMENT_API_KEY") or os.environ.get("DEV_ENVIRONMENT_API_KEY") or ""
1195
+ ).strip()
1196
+
1197
+ def _preview(value: str) -> str:
1198
+ if len(value) <= 10:
1199
+ return value
1200
+ return f"{value[:6]}...{value[-4:]}"
1201
+
1202
+ minted = False
1203
+ if not env_api_key:
1204
+ try:
1205
+ from synth_ai.learning.rl.secrets import mint_environment_api_key
1206
+
1207
+ env_api_key = mint_environment_api_key()
1208
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1209
+ os.environ.setdefault("DEV_ENVIRONMENT_API_KEY", env_api_key)
1210
+ minted = True
1211
+ click.echo(
1212
+ f"[preflight] minted ENVIRONMENT_API_KEY ({_preview(env_api_key)})"
1213
+ )
1214
+ except Exception as mint_err:
1215
+ if crash_on_failure:
1216
+ raise click.ClickException(
1217
+ f"[CRITICAL] Failed to mint ENVIRONMENT_API_KEY: {mint_err}"
1218
+ ) from mint_err
1219
+ click.echo(
1220
+ f"[WARN] Failed to mint ENVIRONMENT_API_KEY automatically ({mint_err}); proceeding without upload"
1221
+ )
1222
+
1223
+ if env_api_key and not os.environ.get("ENVIRONMENT_API_KEY"):
1224
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1225
+ if env_api_key and not os.environ.get("DEV_ENVIRONMENT_API_KEY"):
1226
+ os.environ["DEV_ENVIRONMENT_API_KEY"] = env_api_key
1227
+
1228
+ if minted:
1229
+ _persist_env_api_key(env_api_key, env_paths)
1230
+
715
1231
  if synth_key and env_api_key:
716
1232
  import base64
1233
+
717
1234
  import httpx
718
1235
 
719
1236
  click.echo(f"[preflight] backend={backend_base}")
@@ -725,34 +1242,107 @@ def _preflight_env_key(crash_on_failure: bool = False) -> None:
725
1242
  try:
726
1243
  from nacl.public import PublicKey, SealedBox
727
1244
 
728
- pub = PublicKey(base64.b64decode(pk, validate=True))
1245
+ # Decode public key and build sealed box
1246
+ pk_bytes = base64.b64decode(pk, validate=True)
1247
+ pub = PublicKey(pk_bytes)
729
1248
  sb = SealedBox(pub)
730
- ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode('utf-8'))).decode()
1249
+
1250
+ # Encrypt plaintext key
1251
+ ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode("utf-8"))).decode()
731
1252
  payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
732
- with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}) as c:
1253
+
1254
+ # Emit diagnostic logging (safe previews + hashes only)
1255
+ try:
1256
+ import hashlib as _hash
1257
+
1258
+ # Backend URL context
1259
+ click.echo(f"[preflight] posting to {backend_base.rstrip('/')}/v1/env-keys")
1260
+
1261
+ # Public key diagnostics
1262
+ pk_sha256 = _hash.sha256(pk_bytes).hexdigest()
1263
+ click.echo(
1264
+ f"[preflight] public_key: b64_len={len(pk)} sha256={pk_sha256} head={pk[:16]} tail={pk[-16:]}"
1265
+ )
1266
+
1267
+ # Plaintext diagnostics (never print full secret)
1268
+ _plain = env_api_key
1269
+ _plen = len(_plain)
1270
+ _ppref = (_plain[:6] + "…") if _plen > 10 else _plain
1271
+ _psuf = ("…" + _plain[-4:]) if _plen > 10 else ""
1272
+ _has_ws = any(ch.isspace() for ch in _plain)
1273
+ click.echo(
1274
+ f"[preflight] plaintext: len={_plen} preview={_ppref}{_psuf} has_ws={bool(_has_ws)}"
1275
+ )
1276
+
1277
+ # Ciphertext diagnostics
1278
+ try:
1279
+ _ct_bytes = base64.b64decode(ct_b64, validate=True)
1280
+ _ct_sha256 = _hash.sha256(_ct_bytes).hexdigest()
1281
+ click.echo(
1282
+ f"[preflight] ciphertext: b64_len={len(ct_b64)} sha256={_ct_sha256} head={ct_b64[:16]} tail={ct_b64[-16:]}"
1283
+ )
1284
+ except Exception:
1285
+ click.echo("[preflight] ciphertext: invalid base64 (unexpected)")
1286
+ except Exception:
1287
+ # Best-effort logging only
1288
+ pass
1289
+ with httpx.Client(
1290
+ timeout=15.0,
1291
+ headers={
1292
+ "Authorization": f"Bearer {synth_key}",
1293
+ "Content-Type": "application/json",
1294
+ },
1295
+ ) as c:
733
1296
  click.echo("[preflight] upserting env key…")
734
1297
  up = c.post(f"{backend_base.rstrip('/')}/v1/env-keys", json=payload)
735
- click.echo(f"[preflight] upsert status={up.status_code}")
736
- click.echo("[preflight] verifying env key presence…")
737
- ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
738
- if ver.status_code == 200 and (ver.json() or {}).get("present"):
739
- # Show first and last 5 chars of the API key for verification
740
- key_preview = f"{env_api_key[:5]}...{env_api_key[-5:]}" if len(env_api_key) > 10 else env_api_key
741
- click.echo(f"✅ ENVIRONMENT_API_KEY upserted and verified in backend ({key_preview})")
1298
+ body_snip = ""
1299
+ try:
1300
+ body_snip = up.text[:400] if up.text else ""
1301
+ except Exception:
1302
+ body_snip = ""
1303
+ click.echo(f"[preflight] upsert status={up.status_code}{(' body='+body_snip) if body_snip else ''}")
1304
+
1305
+ # If upload succeeded (2xx), consider it successful even if verification fails
1306
+ # This handles cases where verification endpoint has issues
1307
+ if 200 <= up.status_code < 300:
1308
+ key_preview = (
1309
+ _preview(env_api_key)
1310
+ )
1311
+ click.echo(
1312
+ f"✅ ENVIRONMENT_API_KEY uploaded successfully ({key_preview})"
1313
+ )
1314
+
1315
+ # Try verification, but don't fail if it doesn't work
1316
+ click.echo("[preflight] verifying env key presence…")
1317
+ try:
1318
+ ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
1319
+ if ver.status_code == 200 and (ver.json() or {}).get("present"):
1320
+ click.echo("✅ Key verified in backend")
1321
+ else:
1322
+ click.echo(
1323
+ f"⚠️ Verification returned {ver.status_code}, but upload succeeded - proceeding"
1324
+ )
1325
+ except Exception as verify_err:
1326
+ click.echo(
1327
+ f"⚠️ Verification check failed ({verify_err}), but upload succeeded - proceeding"
1328
+ )
742
1329
  else:
743
- error_msg = "ENVIRONMENT_API_KEY verification failed"
1330
+ error_msg = (
1331
+ f"ENVIRONMENT_API_KEY upload failed with status {up.status_code}"
1332
+ + (f" body={body_snip}" if body_snip else "")
1333
+ )
744
1334
  if crash_on_failure:
745
1335
  raise click.ClickException(f"[CRITICAL] {error_msg}")
746
1336
  click.echo(f"[WARN] {error_msg}; proceeding anyway")
747
1337
  except Exception as e:
748
1338
  error_msg = f"Failed to encrypt/upload ENVIRONMENT_API_KEY: {e}"
749
1339
  if crash_on_failure:
750
- raise click.ClickException(f"[CRITICAL] {error_msg}")
1340
+ raise click.ClickException(f"[CRITICAL] {error_msg}") from e
751
1341
  click.echo(f"[WARN] {error_msg}; proceeding anyway")
752
1342
  except Exception as e:
753
1343
  error_msg = f"Backend preflight for ENVIRONMENT_API_KEY failed: {e}"
754
1344
  if crash_on_failure:
755
- raise click.ClickException(f"[CRITICAL] {error_msg}")
1345
+ raise click.ClickException(f"[CRITICAL] {error_msg}") from e
756
1346
  click.echo(f"[WARN] {error_msg}; proceeding anyway")
757
1347
 
758
1348
 
@@ -767,17 +1357,33 @@ def _run_modal_with_entry(
767
1357
  dry_run: bool = False,
768
1358
  original_path: Path | None = None,
769
1359
  ) -> None:
770
- modal_path = shutil.which(modal_cli)
771
- if modal_path is None:
772
- raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
773
-
774
1360
  env_paths_list = [Path(p).resolve() for p in env_paths]
775
1361
  dotenv_paths = [str(p) for p in env_paths_list]
776
1362
  _load_env_files_into_process(dotenv_paths)
777
1363
  fallback_dir = env_paths_list[0].parent if env_paths_list else Path.cwd()
778
1364
  _ensure_env_values(env_paths_list, fallback_dir)
779
1365
  _load_env_values(env_paths_list)
780
- _preflight_env_key(crash_on_failure=True)
1366
+ _preflight_env_key(env_paths_list, crash_on_failure=True)
1367
+
1368
+ inline_secret_values: dict[str, str] = {}
1369
+ env_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
1370
+ if env_key:
1371
+ inline_secret_values["ENVIRONMENT_API_KEY"] = env_key
1372
+ inline_secret_values.setdefault("DEV_ENVIRONMENT_API_KEY", env_key)
1373
+ aliases = os.environ.get("ENVIRONMENT_API_KEY_ALIASES", "").strip()
1374
+ if aliases:
1375
+ inline_secret_values["ENVIRONMENT_API_KEY_ALIASES"] = aliases
1376
+ for vendor_key in ("GROQ_API_KEY", "OPENAI_API_KEY"):
1377
+ val = os.environ.get(vendor_key, "").strip()
1378
+ if val:
1379
+ inline_secret_values[vendor_key] = val
1380
+
1381
+ if inline_secret_values:
1382
+ preview = inline_secret_values.get("ENVIRONMENT_API_KEY", "")
1383
+ shown = f"{preview[:6]}...{preview[-4:]}" if preview and len(preview) > 10 else preview
1384
+ click.echo(f"[deploy] inline ENVIRONMENT_API_KEY prepared ({shown})")
1385
+ else:
1386
+ click.echo("[deploy] no inline ENVIRONMENT_API_KEY found; relying on Modal secrets/dotenv")
781
1387
 
782
1388
  script_path = _write_modal_entrypoint(
783
1389
  entry,
@@ -785,8 +1391,22 @@ def _run_modal_with_entry(
785
1391
  modal_name,
786
1392
  dotenv_paths=dotenv_paths,
787
1393
  original_path=original_path,
1394
+ inline_secret_values=inline_secret_values,
788
1395
  )
789
- cmd = [modal_path, command, str(script_path)]
1396
+ cmd = [*_modal_command_prefix(modal_cli), command, str(script_path)]
1397
+
1398
+ if modal_name and command == "deploy":
1399
+ cmd.extend(["--name", modal_name])
1400
+
1401
+ proc_env = os.environ.copy()
1402
+ pythonpath_entries: list[str] = [str(REPO_ROOT)]
1403
+ if original_path is not None:
1404
+ source_dir = Path(original_path).resolve().parent
1405
+ pythonpath_entries.insert(0, str(source_dir))
1406
+ existing_pp = proc_env.get("PYTHONPATH")
1407
+ if existing_pp:
1408
+ pythonpath_entries.append(existing_pp)
1409
+ proc_env["PYTHONPATH"] = os.pathsep.join(list(dict.fromkeys(pythonpath_entries)))
790
1410
 
791
1411
  if dry_run:
792
1412
  click.echo("Dry run: " + " ".join(cmd))
@@ -794,15 +1414,41 @@ def _run_modal_with_entry(
794
1414
  return
795
1415
 
796
1416
  try:
797
- subprocess.run(cmd, check=True)
1417
+ # Stream output live for better diagnostics
1418
+ proc = subprocess.Popen(
1419
+ cmd,
1420
+ stdout=subprocess.PIPE,
1421
+ stderr=subprocess.STDOUT,
1422
+ text=True,
1423
+ bufsize=1,
1424
+ env=proc_env,
1425
+ )
1426
+ task_app_url = None
1427
+ assert proc.stdout is not None
1428
+ for line in proc.stdout:
1429
+ # Echo lines as they arrive
1430
+ click.echo(line, nl=False)
1431
+ # Look for lines containing modal.run URLs
1432
+ if task_app_url is None and ("modal.run" in line and "=>" in line):
1433
+ parts = line.split("=>")
1434
+ if len(parts) >= 2:
1435
+ task_app_url = parts[-1].strip()
1436
+ # Save URL immediately for convenience
1437
+ if task_app_url and env_paths_list:
1438
+ env_file = env_paths_list[0]
1439
+ _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1440
+ click.echo(f"\n✓ Task app URL: {task_app_url}\n")
1441
+ rc = proc.wait()
1442
+ if rc != 0:
1443
+ raise subprocess.CalledProcessError(rc, cmd)
798
1444
  except subprocess.CalledProcessError as exc:
799
- raise click.ClickException(f"modal {command} failed with exit code {exc.returncode}") from exc
1445
+ raise click.ClickException(
1446
+ f"modal {command} failed with exit code {exc.returncode}"
1447
+ ) from exc
800
1448
  finally:
801
1449
  script_path.unlink(missing_ok=True)
802
1450
 
803
1451
 
804
-
805
-
806
1452
  def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[str, str]:
807
1453
  values: dict[str, str] = {}
808
1454
  for p in paths:
@@ -811,15 +1457,17 @@ def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[st
811
1457
  except FileNotFoundError:
812
1458
  continue
813
1459
  for line in content.splitlines():
814
- if not line or line.lstrip().startswith('#') or '=' not in line:
1460
+ if not line or line.lstrip().startswith("#") or "=" not in line:
815
1461
  continue
816
- key, value = line.split('=', 1)
1462
+ key, value = line.split("=", 1)
817
1463
  if key and key not in values:
818
1464
  values[key.strip()] = value.strip()
819
1465
  if not allow_empty and not values:
820
1466
  raise click.ClickException("No environment values found")
821
1467
  os.environ.update({k: v for k, v in values.items() if k and v})
822
1468
  return values
1469
+
1470
+
823
1471
  def _interactive_create_env(target_dir: Path) -> Path | None:
824
1472
  env_path = (target_dir / ".env").resolve()
825
1473
  if env_path.exists():
@@ -838,9 +1486,9 @@ def _parse_env_file(path: Path) -> dict[str, str]:
838
1486
  data: dict[str, str] = {}
839
1487
  try:
840
1488
  for line in path.read_text(encoding="utf-8").splitlines():
841
- if not line or line.lstrip().startswith('#') or '=' not in line:
1489
+ if not line or line.lstrip().startswith("#") or "=" not in line:
842
1490
  continue
843
- key, value = line.split('=', 1)
1491
+ key, value = line.split("=", 1)
844
1492
  data[key.strip()] = value.strip()
845
1493
  except FileNotFoundError:
846
1494
  pass
@@ -853,7 +1501,9 @@ def _interactive_fill_env(env_path: Path) -> Path | None:
853
1501
  def _prompt(label: str, *, default: str = "", required: bool) -> str | None:
854
1502
  while True:
855
1503
  try:
856
- value = click.prompt(label, default=default, show_default=bool(default) or not required).strip()
1504
+ value = click.prompt(
1505
+ label, default=default, show_default=bool(default) or not required
1506
+ ).strip()
857
1507
  except (click.exceptions.Abort, EOFError, KeyboardInterrupt):
858
1508
  click.echo("Aborted env creation.")
859
1509
  return None
@@ -904,11 +1554,22 @@ def _deploy_entry(
904
1554
  ) -> None:
905
1555
  modal_cfg = entry.modal
906
1556
  if modal_cfg is None:
907
- raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
1557
+ raise click.ClickException(
1558
+ f"Task app '{entry.app_id}' does not define Modal deployment settings"
1559
+ )
908
1560
 
909
1561
  env_paths = _determine_env_files(entry, env_file)
910
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
911
- _run_modal_with_entry(entry, modal_cfg, modal_cli, modal_name, env_paths, command="deploy", dry_run=dry_run, original_path=original_path)
1562
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1563
+ _run_modal_with_entry(
1564
+ entry,
1565
+ modal_cfg,
1566
+ modal_cli,
1567
+ modal_name,
1568
+ env_paths,
1569
+ command="deploy",
1570
+ dry_run=dry_run,
1571
+ original_path=original_path,
1572
+ )
912
1573
 
913
1574
 
914
1575
  def _modal_serve_entry(
@@ -920,21 +1581,29 @@ def _modal_serve_entry(
920
1581
  ) -> None:
921
1582
  modal_cfg = entry.modal
922
1583
  if modal_cfg is None:
923
- raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
1584
+ raise click.ClickException(
1585
+ f"Task app '{entry.app_id}' does not define Modal deployment settings"
1586
+ )
924
1587
 
925
1588
  env_paths = _determine_env_files(entry, env_file)
926
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
927
- _run_modal_with_entry(entry, modal_cfg, modal_cli, modal_name, env_paths, command="serve", original_path=original_path)
1589
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1590
+ _run_modal_with_entry(
1591
+ entry,
1592
+ modal_cfg,
1593
+ modal_cli,
1594
+ modal_name,
1595
+ env_paths,
1596
+ command="serve",
1597
+ original_path=original_path,
1598
+ )
928
1599
 
929
- @click.group(
930
- name='task-app',
931
- help='Utilities for serving and deploying Synth task apps.'
932
- )
1600
+
1601
+ @click.group(name="task-app", help="Utilities for serving and deploying Synth task apps.")
933
1602
  def task_app_group() -> None:
934
1603
  pass
935
1604
 
936
1605
 
937
- @task_app_group.command('list')
1606
+ @task_app_group.command("list")
938
1607
  def list_apps() -> None:
939
1608
  """List registered task apps."""
940
1609
 
@@ -945,6 +1614,8 @@ def list_apps() -> None:
945
1614
  for entry in entries:
946
1615
  aliases = f" (aliases: {', '.join(entry.aliases)})" if entry.aliases else ""
947
1616
  click.echo(f"- {entry.app_id}{aliases}: {entry.description}")
1617
+
1618
+
948
1619
  def _load_env_files_into_process(paths: Sequence[str]) -> None:
949
1620
  for p in paths:
950
1621
  try:
@@ -952,9 +1623,9 @@ def _load_env_files_into_process(paths: Sequence[str]) -> None:
952
1623
  except Exception:
953
1624
  continue
954
1625
  for line in txt.splitlines():
955
- if not line or line.startswith('#') or '=' not in line:
1626
+ if not line or line.startswith("#") or "=" not in line:
956
1627
  continue
957
- k, v = line.split('=', 1)
1628
+ k, v = line.split("=", 1)
958
1629
  key = k.strip()
959
1630
  val = v.strip().strip('"').strip("'")
960
1631
  # Load into process, but allow overriding if the current value is empty
@@ -964,53 +1635,251 @@ def _load_env_files_into_process(paths: Sequence[str]) -> None:
964
1635
  os.environ[key] = val
965
1636
 
966
1637
 
967
-
968
- @click.command('serve')
969
- @click.argument('app_id', type=str, required=False)
970
- @click.option('--host', default='0.0.0.0', show_default=True)
971
- @click.option('--port', default=8001, show_default=True, type=int)
972
- @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
973
- @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
974
- @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
975
- @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
976
- @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
1638
+ @click.command("serve")
1639
+ @click.argument("app_id", type=str, required=False)
1640
+ @click.option("--host", default="0.0.0.0", show_default=True)
1641
+ @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
1642
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
1643
+ @click.option(
1644
+ "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
1645
+ )
1646
+ @click.option(
1647
+ "--force/--no-force",
1648
+ "force",
1649
+ default=False,
1650
+ help="Kill any process already bound to the selected port before starting",
1651
+ )
1652
+ @click.option(
1653
+ "--trace",
1654
+ "trace_dir",
1655
+ type=click.Path(),
1656
+ default=None,
1657
+ help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
1658
+ )
1659
+ @click.option(
1660
+ "--trace-db",
1661
+ "trace_db",
1662
+ type=click.Path(),
1663
+ default=None,
1664
+ help="Override local trace DB path (default: traces/v3/synth_ai.db)",
1665
+ )
977
1666
  def serve_command(
978
1667
  app_id: str | None,
979
1668
  host: str,
980
- port: int,
1669
+ port: int | None,
981
1670
  env_file: Sequence[str],
982
1671
  reload_flag: bool,
983
1672
  force: bool,
984
1673
  trace_dir: str | None,
985
1674
  trace_db: str | None,
986
1675
  ) -> None:
1676
+ # Change to demo directory if stored (REQUIRED for demo isolation)
1677
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
1678
+
1679
+ demo_dir = load_demo_dir()
1680
+ if demo_dir:
1681
+ demo_path = Path(demo_dir)
1682
+ if not demo_path.is_dir():
1683
+ raise click.ClickException(
1684
+ f"Demo directory not found: {demo_dir}\nRun 'synth-ai setup' to create a demo."
1685
+ )
1686
+ os.chdir(demo_dir)
1687
+ click.echo(f"Using demo directory: {demo_dir}\n")
1688
+ # Store demo directory for path resolution
1689
+ os.environ["SYNTH_DEMO_DIR"] = str(demo_path.resolve())
1690
+
1691
+ # Prompt for port if not provided
1692
+ if port is None:
1693
+ port = click.prompt("Port to serve on", type=int, default=8001)
1694
+
1695
+ # Prompt for trace directory if not provided
1696
+ if trace_dir is None:
1697
+ click.echo(
1698
+ "\nTracing captures rollout data (actions, rewards, model outputs) to a local SQLite DB."
1699
+ )
1700
+ click.echo("This data can be exported to JSONL for supervised fine-tuning (SFT).")
1701
+ enable_tracing = click.confirm("Enable tracing?", default=True)
1702
+ if enable_tracing:
1703
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1704
+ default_trace_dir = str((demo_base / "traces/v3").resolve())
1705
+ trace_dir = click.prompt(
1706
+ "Trace directory", type=str, default=default_trace_dir, show_default=True
1707
+ )
1708
+ else:
1709
+ trace_dir = None
1710
+
1711
+ # Prompt for trace DB if not provided and tracing is enabled
1712
+ if trace_dir and trace_db is None:
1713
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1714
+ default_trace_db = str((demo_base / "traces/v3/synth_ai.db").resolve())
1715
+ trace_db = click.prompt(
1716
+ "Trace DB path", type=str, default=default_trace_db, show_default=True
1717
+ )
1718
+
987
1719
  choice = _select_app_choice(app_id, purpose="serve")
988
1720
  entry = choice.ensure_entry()
989
- _serve_entry(entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
990
-
991
-
992
- @task_app_group.command('serve')
993
- @click.argument('app_id', type=str, required=False)
994
- @click.option('--host', default='0.0.0.0', show_default=True)
995
- @click.option('--port', default=8001, show_default=True, type=int)
996
- @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
997
- @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
998
- @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
999
- @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
1000
- @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
1721
+ _serve_entry(
1722
+ entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db
1723
+ )
1724
+
1725
+
1726
+ @task_app_group.command("info")
1727
+ @click.option(
1728
+ "--base",
1729
+ "base_url",
1730
+ default=None,
1731
+ help="Task app base URL (default: TASK_APP_BASE_URL or http://127.0.0.1:8001)",
1732
+ )
1733
+ @click.option(
1734
+ "--api-key",
1735
+ default=None,
1736
+ help="Environment API key (default: ENVIRONMENT_API_KEY or dev fallbacks)",
1737
+ )
1738
+ @click.option(
1739
+ "--seed",
1740
+ "seeds",
1741
+ multiple=True,
1742
+ type=int,
1743
+ help="Optional seed(s) to request specific instances (repeatable)",
1744
+ )
1745
+ def info_command(base_url: str | None, api_key: str | None, seeds: tuple[int, ...]) -> None:
1746
+ """Fetch Task App /task_info with authentication and print JSON."""
1747
+ import json as _json
1748
+ import os as _os
1749
+
1750
+ import requests as _requests
1751
+
1752
+ base = (base_url or _os.getenv("TASK_APP_BASE_URL") or "http://127.0.0.1:8001").rstrip("/")
1753
+
1754
+ # Resolve API key, permitting dev fallbacks
1755
+ try:
1756
+ from synth_ai.task.auth import normalize_environment_api_key as _norm_key
1757
+ except Exception:
1758
+ _norm_key = lambda: _os.getenv("ENVIRONMENT_API_KEY") # noqa: E731
1759
+ key = (api_key or _norm_key() or "").strip()
1760
+ if not key:
1761
+ raise click.ClickException("Missing API key. Provide --api-key or set ENVIRONMENT_API_KEY.")
1762
+
1763
+ headers: dict[str, str] = {"X-API-Key": key, "Authorization": f"Bearer {key}"}
1764
+ aliases = (_os.getenv("ENVIRONMENT_API_KEY_ALIASES") or "").strip()
1765
+ keys_csv = (
1766
+ ",".join([key] + [p.strip() for p in aliases.split(",") if p.strip()]) if aliases else key
1767
+ )
1768
+ if keys_csv:
1769
+ headers["X-API-Keys"] = keys_csv
1770
+
1771
+ params: list[tuple[str, str]] = []
1772
+ for s in seeds:
1773
+ params.append(("seed", str(int(s))))
1774
+
1775
+ url = f"{base}/task_info"
1776
+ try:
1777
+ r = _requests.get(url, headers=headers, params=params or None, timeout=30)
1778
+ except Exception as exc:
1779
+ raise click.ClickException(f"Request failed: {exc}") from exc
1780
+ if not (200 <= r.status_code < 300):
1781
+ ct = r.headers.get("content-type", "")
1782
+ detail = r.text
1783
+ if ct.startswith("application/json"):
1784
+ with contextlib.suppress(Exception):
1785
+ detail = _json.dumps(r.json(), indent=2)
1786
+ raise click.ClickException(f"{url} returned {r.status_code}:\n{detail}")
1787
+
1788
+ data = (
1789
+ r.json()
1790
+ if r.headers.get("content-type", "").startswith("application/json")
1791
+ else {"raw": r.text}
1792
+ )
1793
+ click.echo(_json.dumps(data, indent=2, sort_keys=True))
1794
+
1795
+
1796
+ @task_app_group.command("serve")
1797
+ @click.argument("app_id", type=str, required=False)
1798
+ @click.option("--host", default="0.0.0.0", show_default=True)
1799
+ @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
1800
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
1801
+ @click.option(
1802
+ "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
1803
+ )
1804
+ @click.option(
1805
+ "--force/--no-force",
1806
+ "force",
1807
+ default=False,
1808
+ help="Kill any process already bound to the selected port before starting",
1809
+ )
1810
+ @click.option(
1811
+ "--trace",
1812
+ "trace_dir",
1813
+ type=click.Path(),
1814
+ default=None,
1815
+ help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
1816
+ )
1817
+ @click.option(
1818
+ "--trace-db",
1819
+ "trace_db",
1820
+ type=click.Path(),
1821
+ default=None,
1822
+ help="Override local trace DB path (default: traces/v3/synth_ai.db)",
1823
+ )
1001
1824
  def serve_task_group(
1002
1825
  app_id: str | None,
1003
1826
  host: str,
1004
- port: int,
1827
+ port: int | None,
1005
1828
  env_file: Sequence[str],
1006
1829
  reload_flag: bool,
1007
1830
  force: bool,
1008
1831
  trace_dir: str | None,
1009
1832
  trace_db: str | None,
1010
1833
  ) -> None:
1834
+ # Change to demo directory if stored (REQUIRED for demo isolation)
1835
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
1836
+
1837
+ demo_dir = load_demo_dir()
1838
+ if demo_dir:
1839
+ demo_path = Path(demo_dir)
1840
+ if not demo_path.is_dir():
1841
+ raise click.ClickException(
1842
+ f"Demo directory not found: {demo_dir}\nRun 'synth-ai setup' to create a demo."
1843
+ )
1844
+ os.chdir(demo_dir)
1845
+ click.echo(f"Using demo directory: {demo_dir}\n")
1846
+ # Store demo directory for path resolution
1847
+ os.environ["SYNTH_DEMO_DIR"] = str(demo_path.resolve())
1848
+
1849
+ # Prompt for port if not provided
1850
+ if port is None:
1851
+ port = click.prompt("Port to serve on", type=int, default=8001)
1852
+
1853
+ # Prompt for trace directory if not provided
1854
+ if trace_dir is None:
1855
+ click.echo(
1856
+ "\nTracing captures rollout data (actions, rewards, model outputs) to a local SQLite DB."
1857
+ )
1858
+ click.echo("This data can be exported to JSONL for supervised fine-tuning (SFT).")
1859
+ enable_tracing = click.confirm("Enable tracing?", default=True)
1860
+ if enable_tracing:
1861
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1862
+ default_trace_dir = str((demo_base / "traces/v3").resolve())
1863
+ trace_dir = click.prompt(
1864
+ "Trace directory", type=str, default=default_trace_dir, show_default=True
1865
+ )
1866
+ else:
1867
+ trace_dir = None
1868
+
1869
+ # Prompt for trace DB if not provided and tracing is enabled
1870
+ if trace_dir and trace_db is None:
1871
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1872
+ default_trace_db = str((demo_base / "traces/v3/synth_ai.db").resolve())
1873
+ trace_db = click.prompt(
1874
+ "Trace DB path", type=str, default=default_trace_db, show_default=True
1875
+ )
1876
+
1011
1877
  choice = _select_app_choice(app_id, purpose="serve")
1012
1878
  entry = choice.ensure_entry()
1013
- _serve_entry(entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
1879
+ _serve_entry(
1880
+ entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db
1881
+ )
1882
+
1014
1883
 
1015
1884
  def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) -> list[Path]:
1016
1885
  resolved: list[Path] = []
@@ -1026,25 +1895,25 @@ def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) ->
1026
1895
  # Look for env files in current working directory first, then repo root
1027
1896
  cwd = Path.cwd()
1028
1897
  env_candidates = []
1029
-
1898
+
1030
1899
  # Add CWD env files first (prioritized)
1031
- cwd_env_files = sorted(cwd.glob('**/*.env'))
1900
+ cwd_env_files = sorted(cwd.glob("**/*.env"))
1032
1901
  env_candidates.extend(cwd_env_files)
1033
-
1902
+
1034
1903
  # Add repo root env files
1035
- repo_env_files = sorted(REPO_ROOT.glob('**/*.env'))
1904
+ repo_env_files = sorted(REPO_ROOT.glob("**/*.env"))
1036
1905
  # Avoid duplicates
1037
1906
  for repo_file in repo_env_files:
1038
1907
  if repo_file not in env_candidates:
1039
1908
  env_candidates.append(repo_file)
1040
-
1909
+
1041
1910
  if not env_candidates:
1042
- raise click.ClickException('No env file found. Pass --env-file explicitly.')
1911
+ raise click.ClickException("No env file found. Pass --env-file explicitly.")
1043
1912
 
1044
- click.echo('Select env file to load:')
1913
+ click.echo("Select env file to load:")
1045
1914
  for idx, path in enumerate(env_candidates, start=1):
1046
- click.echo(f" {idx}) {path}")
1047
- choice = click.prompt('Enter choice', type=click.IntRange(1, len(env_candidates)))
1915
+ click.echo(f" {idx}) {path.resolve()}")
1916
+ choice = click.prompt("Enter choice", type=click.IntRange(1, len(env_candidates)), default=1)
1048
1917
  return [env_candidates[choice - 1]]
1049
1918
 
1050
1919
 
@@ -1060,7 +1929,9 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
1060
1929
  return
1061
1930
 
1062
1931
  try:
1063
- out = subprocess.run(["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False)
1932
+ out = subprocess.run(
1933
+ ["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False
1934
+ )
1064
1935
  pids = [pid for pid in out.stdout.strip().splitlines() if pid]
1065
1936
  except FileNotFoundError:
1066
1937
  pids = []
@@ -1075,7 +1946,7 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
1075
1946
  try:
1076
1947
  os.kill(int(pid), signal.SIGTERM)
1077
1948
  except Exception as exc:
1078
- raise click.ClickException(f'Failed to terminate PID {pid}: {exc}')
1949
+ raise click.ClickException(f"Failed to terminate PID {pid}: {exc}") from exc
1079
1950
 
1080
1951
  time.sleep(0.5)
1081
1952
 
@@ -1087,13 +1958,139 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
1087
1958
  try:
1088
1959
  os.kill(int(pid), signal.SIGKILL)
1089
1960
  except Exception as exc:
1090
- raise click.ClickException(f'Failed to force terminate PID {pid}: {exc}')
1961
+ raise click.ClickException(f"Failed to force terminate PID {pid}: {exc}") from exc
1091
1962
  time.sleep(0.5)
1092
1963
 
1093
1964
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1094
1965
  in_use_after = s.connect_ex((host, port)) == 0
1095
1966
  if in_use_after:
1096
- raise click.ClickException(f'Port {port} is still in use after attempting to terminate processes.')
1967
+ raise click.ClickException(
1968
+ f"Port {port} is still in use after attempting to terminate processes."
1969
+ )
1970
+
1971
+
1972
+ def _save_to_env_file(env_path: Path, key: str, value: str) -> None:
1973
+ """Save or update a key-value pair in the .env file."""
1974
+ try:
1975
+ # Read existing .env
1976
+ existing_lines = []
1977
+ if env_path.exists():
1978
+ existing_lines = env_path.read_text().splitlines()
1979
+ else:
1980
+ env_path.parent.mkdir(parents=True, exist_ok=True)
1981
+
1982
+ # Check if key already exists and update it
1983
+ key_updated = False
1984
+ new_lines = []
1985
+ for line in existing_lines:
1986
+ if line.strip().startswith(f"{key}="):
1987
+ new_lines.append(f"{key}={value}")
1988
+ key_updated = True
1989
+ else:
1990
+ new_lines.append(line)
1991
+
1992
+ if key_updated:
1993
+ # Write updated lines back
1994
+ env_path.write_text("\n".join(new_lines) + "\n")
1995
+ click.echo(f"Updated {key} in {env_path}")
1996
+ else:
1997
+ # Append to .env
1998
+ with open(env_path, "a") as f:
1999
+ if existing_lines and not existing_lines[-1].strip():
2000
+ # File exists and last line is not empty
2001
+ pass
2002
+ elif existing_lines:
2003
+ # Add newline before appending
2004
+ f.write("\n")
2005
+ f.write(f"{key}={value}\n")
2006
+ click.echo(f"Saved {key} to {env_path}")
2007
+ except Exception as e:
2008
+ click.echo(f"Warning: Could not save {key} to .env: {e}", err=True)
2009
+
2010
+
2011
+ def _persist_env_api_key(env_api_key: str, env_paths: Sequence[Path] | None) -> None:
2012
+ """Persist ENVIRONMENT_API_KEY to provided env files (or default .env)."""
2013
+ targets: list[Path] = []
2014
+ seen: set[Path] = set()
2015
+ for path in env_paths or ():
2016
+ try:
2017
+ resolved = Path(path).resolve()
2018
+ except Exception:
2019
+ continue
2020
+ if resolved in seen:
2021
+ continue
2022
+ seen.add(resolved)
2023
+ targets.append(resolved)
2024
+
2025
+ if not targets:
2026
+ demo_dir = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2027
+ targets.append((demo_dir / ".env").resolve())
2028
+
2029
+ for target in targets:
2030
+ _save_to_env_file(target, "ENVIRONMENT_API_KEY", env_api_key)
2031
+
2032
+
2033
+ def _validate_required_env_keys() -> None:
2034
+ """Validate required environment keys are set, prompting if missing."""
2035
+ # Use demo directory .env file if set, otherwise current directory
2036
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2037
+ env_file = demo_base / ".env"
2038
+
2039
+ if env_file.exists():
2040
+ try:
2041
+ from dotenv import load_dotenv
2042
+
2043
+ load_dotenv(env_file, override=False)
2044
+ except Exception:
2045
+ pass # Best effort
2046
+
2047
+ env_api_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
2048
+
2049
+ if not env_api_key:
2050
+ env_api_key = input("Please enter your RL Environment API key:\n> ").strip()
2051
+ if not env_api_key:
2052
+ raise click.ClickException("RL Environment API key is required to start the server")
2053
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
2054
+ _save_to_env_file(env_file, "ENVIRONMENT_API_KEY", env_api_key)
2055
+
2056
+ # Check for Groq API key
2057
+ groq_api_key = os.environ.get("GROQ_API_KEY", "").strip()
2058
+
2059
+ if not groq_api_key:
2060
+ click.echo("\nInference API key configuration:")
2061
+ click.echo("This workflow requires a Groq API key.")
2062
+ groq_api_key = input("Groq API key (or press Enter to skip): ").strip()
2063
+ if groq_api_key:
2064
+ os.environ["GROQ_API_KEY"] = groq_api_key
2065
+ _save_to_env_file(env_file, "GROQ_API_KEY", groq_api_key)
2066
+
2067
+
2068
+ def _print_demo_next_steps_if_applicable() -> None:
2069
+ """Print next steps if currently in a demo directory."""
2070
+ try:
2071
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
2072
+
2073
+ cwd = Path.cwd().resolve()
2074
+ demo_dir = load_demo_dir()
2075
+
2076
+ # Check if we're in the demo directory
2077
+ if (
2078
+ demo_dir
2079
+ and Path(demo_dir).resolve() == cwd
2080
+ and (cwd / "run_local_rollout_traced.py").exists()
2081
+ ):
2082
+ click.echo("\n" + "=" * 60)
2083
+ click.echo("Next step: Collect traced rollouts")
2084
+ click.echo("=" * 60)
2085
+ click.echo("\nIn another terminal, run:")
2086
+ click.echo(f" cd {cwd}")
2087
+ click.echo(" uv run python run_local_rollout_traced.py")
2088
+ click.echo("\nRun this 5-10 times to collect diverse traces.")
2089
+ click.echo("=" * 60 + "\n")
2090
+ except Exception:
2091
+ # Silently fail - this is just a helpful printout
2092
+ pass
2093
+
1097
2094
 
1098
2095
  def _serve_entry(
1099
2096
  entry: TaskAppEntry,
@@ -1111,33 +2108,51 @@ def _serve_entry(
1111
2108
 
1112
2109
  trace_enabled = trace_dir is not None or trace_db is not None
1113
2110
  if trace_enabled:
1114
- os.environ['TASKAPP_TRACING_ENABLED'] = '1'
2111
+ os.environ["TASKAPP_TRACING_ENABLED"] = "1"
2112
+
2113
+ # Ensure paths are absolute relative to demo directory
2114
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2115
+
1115
2116
  if trace_dir is not None:
1116
2117
  dir_path = Path(trace_dir).expanduser()
2118
+ if not dir_path.is_absolute():
2119
+ dir_path = (demo_base / dir_path).resolve()
1117
2120
  try:
1118
2121
  dir_path.mkdir(parents=True, exist_ok=True)
1119
2122
  except Exception as exc:
1120
- raise click.ClickException(f"Failed to create trace directory {dir_path}: {exc}") from exc
1121
- os.environ['TASKAPP_SFT_OUTPUT_DIR'] = str(dir_path)
2123
+ raise click.ClickException(
2124
+ f"Failed to create trace directory {dir_path}: {exc}"
2125
+ ) from exc
2126
+ os.environ["TASKAPP_SFT_OUTPUT_DIR"] = str(dir_path)
1122
2127
  click.echo(f"Tracing enabled. SFT JSONL will be written to {dir_path}")
1123
2128
  if trace_db is not None:
1124
2129
  db_path = Path(trace_db).expanduser()
1125
- os.environ['SQLD_DB_PATH'] = str(db_path)
1126
- os.environ.pop('TURSO_LOCAL_DB_URL', None)
2130
+ if not db_path.is_absolute():
2131
+ db_path = (demo_base / db_path).resolve()
2132
+ # Construct the sqlite URL from the absolute path
2133
+ db_url = f"sqlite+aiosqlite:///{db_path}"
2134
+ os.environ["SQLD_DB_PATH"] = str(db_path)
2135
+ os.environ["TURSO_LOCAL_DB_URL"] = db_url
1127
2136
  click.echo(f"Tracing DB path set to {db_path}")
1128
2137
  from synth_ai.tracing_v3.config import CONFIG as TRACE_CONFIG
1129
- # recompute db_url based on current environment
1130
- new_db_url = os.getenv('TURSO_LOCAL_DB_URL') or TRACE_CONFIG.db_url
2138
+
2139
+ # Use the explicitly set URL if available
2140
+ new_db_url = os.getenv("TURSO_LOCAL_DB_URL") or TRACE_CONFIG.db_url
1131
2141
  TRACE_CONFIG.db_url = new_db_url
1132
2142
  if new_db_url:
1133
- os.environ['TURSO_LOCAL_DB_URL'] = new_db_url
1134
2143
  click.echo(f"Tracing DB URL resolved to {new_db_url}")
1135
- elif os.getenv('TASKAPP_TRACING_ENABLED'):
2144
+ elif os.getenv("TASKAPP_TRACING_ENABLED"):
1136
2145
  click.echo("Tracing enabled via environment variables")
1137
2146
 
1138
2147
  _ensure_port_free(port, host, force=force)
1139
2148
 
1140
- _preflight_env_key()
2149
+ _validate_required_env_keys()
2150
+ env_path_objs = [Path(p) for p in env_files if p]
2151
+ _preflight_env_key(env_path_objs)
2152
+
2153
+ # Print next steps if in demo context
2154
+ if trace_enabled:
2155
+ _print_demo_next_steps_if_applicable()
1141
2156
 
1142
2157
  run_task_app(
1143
2158
  entry.config_factory,
@@ -1148,37 +2163,76 @@ def _serve_entry(
1148
2163
  )
1149
2164
 
1150
2165
 
1151
- @task_app_group.command('deploy')
2166
+ @task_app_group.command("deploy")
1152
2167
  @click.argument("app_id", type=str, required=False)
1153
2168
  @click.option("--name", "modal_name", default=None, help="Override Modal app name")
1154
2169
  @click.option("--dry-run", is_flag=True, help="Print modal deploy command without executing")
1155
2170
  @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
1156
- @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
1157
- def deploy_app(app_id: str | None, modal_name: str | None, dry_run: bool, modal_cli: str, env_file: Sequence[str]) -> None:
2171
+ @click.option(
2172
+ "--env-file",
2173
+ multiple=True,
2174
+ type=click.Path(),
2175
+ help="Env file to load into the container (can be repeated)",
2176
+ )
2177
+ def deploy_app(
2178
+ app_id: str | None,
2179
+ modal_name: str | None,
2180
+ dry_run: bool,
2181
+ modal_cli: str,
2182
+ env_file: Sequence[str],
2183
+ ) -> None:
1158
2184
  """Deploy a task app to Modal."""
1159
2185
 
2186
+ # Change to demo directory if stored (for consistent discovery)
2187
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
2188
+
2189
+ demo_dir = load_demo_dir()
2190
+ if demo_dir:
2191
+ demo_path = Path(demo_dir)
2192
+ if not demo_path.is_dir():
2193
+ raise click.ClickException(
2194
+ f"Demo directory not found: {demo_dir}\nRun 'synth-ai demo' to create a demo."
2195
+ )
2196
+ os.chdir(demo_dir)
2197
+ click.echo(f"Using demo directory: {demo_dir}\n")
2198
+
1160
2199
  choice = _select_app_choice(app_id, purpose="deploy")
1161
2200
 
1162
2201
  if choice.modal_script:
1163
2202
  env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
1164
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
1165
- _run_modal_script(choice.modal_script, modal_cli, "deploy", env_paths, modal_name=modal_name, dry_run=dry_run)
2203
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
2204
+ _run_modal_script(
2205
+ choice.modal_script,
2206
+ modal_cli,
2207
+ "deploy",
2208
+ env_paths,
2209
+ modal_name=modal_name,
2210
+ dry_run=dry_run,
2211
+ )
1166
2212
  return
1167
2213
 
1168
2214
  entry = choice.ensure_entry()
1169
2215
  _deploy_entry(entry, modal_name, dry_run, modal_cli, env_file, original_path=choice.path)
1170
2216
 
1171
- @task_app_group.command('modal-serve')
1172
- @click.argument('app_id', type=str, required=False)
1173
- @click.option('--modal-cli', default='modal', help='Path to modal CLI executable')
1174
- @click.option('--name', 'modal_name', default=None, help='Override Modal app name (optional)')
1175
- @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
1176
- def modal_serve_app(app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]) -> None:
2217
+
2218
+ @task_app_group.command("modal-serve")
2219
+ @click.argument("app_id", type=str, required=False)
2220
+ @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
2221
+ @click.option("--name", "modal_name", default=None, help="Override Modal app name (optional)")
2222
+ @click.option(
2223
+ "--env-file",
2224
+ multiple=True,
2225
+ type=click.Path(),
2226
+ help="Env file to load into the container (can be repeated)",
2227
+ )
2228
+ def modal_serve_app(
2229
+ app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]
2230
+ ) -> None:
1177
2231
  choice = _select_app_choice(app_id, purpose="modal-serve")
1178
2232
 
1179
2233
  if choice.modal_script:
1180
2234
  env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
1181
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
2235
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1182
2236
  _run_modal_script(choice.modal_script, modal_cli, "serve", env_paths, modal_name=modal_name)
1183
2237
  return
1184
2238
 
@@ -1193,6 +2247,7 @@ def _write_modal_entrypoint(
1193
2247
  *,
1194
2248
  dotenv_paths: Sequence[str] | None = None,
1195
2249
  original_path: Path | None = None,
2250
+ inline_secret_values: dict[str, str] | None = None,
1196
2251
  ) -> Path:
1197
2252
  modal_name = override_name or modal_cfg.app_name
1198
2253
 
@@ -1203,7 +2258,8 @@ def _write_modal_entrypoint(
1203
2258
  try:
1204
2259
  # Build lookup of local->remote mounts
1205
2260
  mount_map: list[tuple[Path, Path]] = [
1206
- (Path(local).resolve(), Path(remote)) for (local, remote) in modal_cfg.extra_local_dirs
2261
+ (Path(local).resolve(), Path(remote))
2262
+ for (local, remote) in modal_cfg.extra_local_dirs
1207
2263
  ]
1208
2264
  orig = Path(original_path).resolve()
1209
2265
  for local_src, remote_dst in mount_map:
@@ -1220,32 +2276,77 @@ def _write_modal_entrypoint(
1220
2276
  except Exception:
1221
2277
  remote_file_str = None
1222
2278
  module_name = entry.config_factory.__module__
1223
-
2279
+
2280
+ # Prefer a guaranteed mount for the discovered file to avoid package import issues
2281
+ guaranteed_file_str: str | None = None
2282
+ if original_path:
2283
+ guaranteed_file_str = str(
2284
+ (Path("/opt/synth_ai_repo/__local_task_app__") / Path(original_path).stem).with_suffix(
2285
+ ".py"
2286
+ )
2287
+ )
2288
+
1224
2289
  dotenv_paths = [str(Path(path)) for path in (dotenv_paths or [])]
1225
2290
 
1226
2291
  pip_packages = list(modal_cfg.pip_packages)
2292
+ # Ensure synth-ai (matching host version if available) is installed in the container
2293
+ synth_pkg = "synth-ai"
2294
+ try:
2295
+ import synth_ai as _host_synth
2296
+
2297
+ host_ver = getattr(_host_synth, "__version__", None)
2298
+ if host_ver:
2299
+ synth_pkg = f"synth-ai=={host_ver}"
2300
+ except Exception:
2301
+ pass
2302
+ if not any(str(p).startswith("synth-ai") for p in pip_packages):
2303
+ pip_packages.insert(0, synth_pkg)
1227
2304
 
1228
2305
  local_dirs = [(str(Path(src)), dst) for src, dst in modal_cfg.extra_local_dirs]
2306
+ # Also mount the host synth_ai source if available to ensure latest code is used
2307
+ try:
2308
+ import synth_ai as _host_synth
2309
+
2310
+ host_synth_dir = Path(_host_synth.__file__).resolve().parent
2311
+ # host_synth_dir points to .../synth_ai; mount that directory
2312
+ sy_dst = "/opt/synth_ai_repo/synth_ai"
2313
+ candidate = (str(host_synth_dir), sy_dst)
2314
+ if candidate not in local_dirs:
2315
+ local_dirs.insert(0, candidate)
2316
+ except Exception:
2317
+ pass
2318
+ # Ensure the discovered app directory is mounted, regardless of modal_cfg
2319
+ if original_path:
2320
+ discovered_dir = str(Path(original_path).resolve().parent)
2321
+ mount_dst = "/opt/synth_ai_repo/__local_task_app__"
2322
+ if (discovered_dir, mount_dst) not in local_dirs:
2323
+ local_dirs.append((discovered_dir, mount_dst))
1229
2324
  secret_names = list(modal_cfg.secret_names)
1230
2325
  volume_mounts = [(name, mount) for name, mount in modal_cfg.volume_mounts]
2326
+ inline_secret_values = {k: v for k, v in (inline_secret_values or {}).items() if v}
1231
2327
 
1232
2328
  script = f"""from __future__ import annotations
1233
2329
 
1234
2330
  import importlib
1235
2331
  import importlib.util
1236
2332
  import sys
2333
+ import os
2334
+ import shutil
2335
+ import tempfile
2336
+ from pathlib import Path as _Path
2337
+ import fnmatch
1237
2338
  sys.path.insert(0, '/opt/synth_ai_repo')
1238
2339
 
1239
2340
  from modal import App, Image, Secret, Volume, asgi_app
1240
2341
 
1241
- from synth_ai.task.apps import registry
1242
- from synth_ai.task.server import create_task_app
2342
+ # Defer importing synth_ai until inside fastapi_app to avoid local import errors
1243
2343
 
1244
2344
  ENTRY_ID = {entry.app_id!r}
1245
2345
  MODAL_APP_NAME = {modal_name!r}
1246
2346
  MODULE_NAME = {module_name!r}
1247
- MODULE_FILE = {remote_file_str!r}
2347
+ MODULE_FILE = {guaranteed_file_str or remote_file_str!r}
1248
2348
  DOTENV_PATHS = {dotenv_paths!r}
2349
+ INLINE_SECRET_VALUES = {inline_secret_values!r}
1249
2350
 
1250
2351
  image = Image.debian_slim(python_version={modal_cfg.python_version!r})
1251
2352
 
@@ -1254,12 +2355,44 @@ if pip_packages:
1254
2355
  image = image.pip_install(*pip_packages)
1255
2356
 
1256
2357
  local_dirs = {local_dirs!r}
2358
+
2359
+ def _copy_tree_filtered(src_dir: str) -> str:
2360
+ src = _Path(src_dir)
2361
+ temp_dir = _Path(tempfile.mkdtemp(prefix='synth_mount_'))
2362
+
2363
+ exclude_dirs = {".cache", ".git", "__pycache__"}
2364
+ exclude_globs = ['*.db', '*.db-journal', '*-wal', '*-shm']
2365
+
2366
+ for root, dirs, files in os.walk(src):
2367
+ rel_root = _Path(root).relative_to(src)
2368
+ # filter dirs in-place
2369
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
2370
+ # ensure target directory exists
2371
+ target_dir = (temp_dir / rel_root)
2372
+ target_dir.mkdir(parents=True, exist_ok=True)
2373
+ # copy files with filtering
2374
+ for name in files:
2375
+ if any(fnmatch.fnmatch(name, pat) for pat in exclude_globs):
2376
+ continue
2377
+ src_file = _Path(root) / name
2378
+ dst_file = target_dir / name
2379
+ try:
2380
+ shutil.copy2(src_file, dst_file)
2381
+ except Exception:
2382
+ # ignore problematic files
2383
+ continue
2384
+ return str(temp_dir)
2385
+
1257
2386
  for local_src, remote_dst in local_dirs:
1258
- image = image.add_local_dir(local_src, remote_dst)
2387
+ safe_src = _copy_tree_filtered(local_src)
2388
+ image = image.add_local_dir(safe_src, remote_dst)
1259
2389
 
1260
2390
  secrets = {secret_names!r}
1261
2391
  secret_objs = [Secret.from_name(name) for name in secrets]
1262
2392
 
2393
+ if INLINE_SECRET_VALUES:
2394
+ secret_objs.append(Secret.from_dict(INLINE_SECRET_VALUES))
2395
+
1263
2396
  if DOTENV_PATHS:
1264
2397
  secret_objs.extend(Secret.from_dotenv(path) for path in DOTENV_PATHS)
1265
2398
 
@@ -1268,24 +2401,6 @@ volume_map = {{}}
1268
2401
  for vol_name, mount_path in volume_mounts:
1269
2402
  volume_map[mount_path] = Volume.from_name(vol_name, create_if_missing=True)
1270
2403
 
1271
- # Import the module to trigger registration
1272
- if MODULE_FILE:
1273
- spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', MODULE_FILE)
1274
- if spec and spec.loader:
1275
- mod = importlib.util.module_from_spec(spec)
1276
- sys.modules[MODULE_NAME or 'task_app_module'] = mod
1277
- spec.loader.exec_module(mod)
1278
- else:
1279
- raise RuntimeError("Failed to import task app from file: " + str(MODULE_FILE))
1280
- else:
1281
- importlib.import_module(MODULE_NAME)
1282
-
1283
- # Get the entry from registry (now that it's registered)
1284
- entry = registry.get(ENTRY_ID)
1285
- modal_cfg = entry.modal
1286
- if modal_cfg is None:
1287
- raise RuntimeError("Modal configuration missing for task app {entry.app_id}")
1288
-
1289
2404
  app = App(MODAL_APP_NAME)
1290
2405
 
1291
2406
  @app.function(
@@ -1300,17 +2415,368 @@ app = App(MODAL_APP_NAME)
1300
2415
  )
1301
2416
  @asgi_app()
1302
2417
  def fastapi_app():
2418
+ # Import the module to trigger registration (inside container)
2419
+ import os
2420
+ # Prefer mounted source over any preinstalled site-packages version
2421
+ import sys as _sys
2422
+ for k in list(_sys.modules.keys()):
2423
+ if k == 'synth_ai' or k.startswith('synth_ai.'):
2424
+ _sys.modules.pop(k, None)
2425
+ import importlib as _importlib
2426
+ _importlib.invalidate_caches()
2427
+ try:
2428
+ if MODULE_FILE and os.path.exists(MODULE_FILE):
2429
+ spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', MODULE_FILE)
2430
+ if not spec or not spec.loader:
2431
+ raise RuntimeError("Failed to prepare spec for: " + str(MODULE_FILE))
2432
+ mod = importlib.util.module_from_spec(spec)
2433
+ sys.modules[MODULE_NAME or 'task_app_module'] = mod
2434
+ spec.loader.exec_module(mod)
2435
+ else:
2436
+ try:
2437
+ importlib.import_module(MODULE_NAME)
2438
+ except Exception:
2439
+ fallback_file = '/opt/synth_ai_repo/__local_task_app__/' + (MODULE_NAME.split('.')[-1] if MODULE_NAME else 'task_app') + '.py'
2440
+ if os.path.exists(fallback_file):
2441
+ spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', fallback_file)
2442
+ if not spec or not spec.loader:
2443
+ raise RuntimeError("Failed to prepare fallback spec for: " + str(fallback_file))
2444
+ mod = importlib.util.module_from_spec(spec)
2445
+ sys.modules[MODULE_NAME or 'task_app_module'] = mod
2446
+ spec.loader.exec_module(mod)
2447
+ else:
2448
+ raise
2449
+ except Exception as e:
2450
+ raise RuntimeError("Task app import failed: " + str(e))
2451
+
2452
+ # Get the entry from registry (now that it's registered)
2453
+ from synth_ai.task.apps import registry
2454
+ from synth_ai.task.server import create_task_app
2455
+ entry = registry.get(ENTRY_ID)
2456
+ cfg = entry.modal
2457
+ if cfg is None:
2458
+ raise RuntimeError("Modal configuration missing for task app " + ENTRY_ID)
1303
2459
  config = entry.config_factory()
1304
2460
  return create_task_app(config)
1305
2461
  """
1306
2462
 
1307
- tmp = tempfile.NamedTemporaryFile("w", suffix=f"_{entry.app_id}_modal.py", delete=False)
1308
- tmp.write(script)
1309
- tmp.flush()
1310
- tmp.close()
1311
- return Path(tmp.name)
2463
+ with tempfile.NamedTemporaryFile("w", suffix=f"_{entry.app_id}_modal.py", delete=False) as tmp:
2464
+ tmp.write(script)
2465
+ tmp.flush()
2466
+ name = tmp.name
2467
+ return Path(name)
1312
2468
 
1313
2469
 
1314
2470
  def register(cli: click.Group) -> None:
1315
2471
  cli.add_command(serve_command)
1316
2472
  cli.add_command(task_app_group)
2473
+ cli.add_command(eval_command)
2474
+
2475
+
2476
+ @click.command("eval")
2477
+ @click.argument("app_id", type=str, required=False)
2478
+ @click.option("--config", type=click.Path(), default=None, help="Path to eval TOML (short schema)")
2479
+ @click.option(
2480
+ "--url",
2481
+ "task_app_url",
2482
+ type=str,
2483
+ default=None,
2484
+ help="Base URL of a running task app (skip in-process server)",
2485
+ )
2486
+ @click.option("--seeds", default="0,1,2,3,4", help="Comma-separated seeds/indices to evaluate")
2487
+ @click.option("--split", default="train", show_default=True, help="Dataset split to use")
2488
+ @click.option("--model", default=None, help="Model identifier (prompted if omitted)")
2489
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Env file(s) for keys")
2490
+ def eval_command(
2491
+ app_id: str | None,
2492
+ config: str | None,
2493
+ task_app_url: str | None,
2494
+ seeds: str,
2495
+ split: str,
2496
+ model: str | None,
2497
+ env_file: Sequence[str],
2498
+ ) -> None:
2499
+ """Run local rollouts against a task app using in-process ASGI and summarize results."""
2500
+ cfg: dict[str, Any] = {}
2501
+ config_path: Path | None = None
2502
+ if config:
2503
+ config_path = Path(config)
2504
+ else:
2505
+ auto_configs = _discover_eval_config_paths()
2506
+ if auto_configs:
2507
+ config_path = auto_configs[0]
2508
+ click.echo(f"Using eval config: {config_path}")
2509
+
2510
+ if config_path:
2511
+ if _toml is None:
2512
+ raise click.ClickException(
2513
+ "TOML parser not available; use Python 3.11+ or install tomli"
2514
+ )
2515
+ if not config_path.exists():
2516
+ raise click.ClickException(f"Eval config not found: {config_path}")
2517
+ try:
2518
+ data = config_path.read_bytes()
2519
+ parsed = _toml.loads(data.decode("utf-8"))
2520
+ if isinstance(parsed, dict):
2521
+ section = parsed.get("eval")
2522
+ cfg = dict(section) if isinstance(section, dict) else dict(parsed)
2523
+ except Exception as exc:
2524
+ raise click.ClickException(f"Failed to parse TOML '{config_path}': {exc}") from exc
2525
+
2526
+ app_id = app_id or (cfg.get("app_id") if isinstance(cfg.get("app_id"), str) else None) # type: ignore
2527
+
2528
+ # Determine selection params (CLI takes precedence; TOML only fills unset model/seeds/env)
2529
+ if cfg.get("model") and not model:
2530
+ model = str(cfg["model"]) # type: ignore[index]
2531
+ if cfg.get("seeds") and seeds == "0,1,2,3,4":
2532
+ val = cfg["seeds"]
2533
+ if isinstance(val, list):
2534
+ with contextlib.suppress(Exception):
2535
+ seeds = ",".join(str(int(x)) for x in val)
2536
+ elif isinstance(val, str):
2537
+ seeds = val
2538
+ elif isinstance(val, int):
2539
+ seeds = str(val)
2540
+ if cfg.get("env_file") and not env_file:
2541
+ ef = cfg["env_file"]
2542
+ if isinstance(ef, str):
2543
+ env_file = (ef,) # type: ignore[assignment]
2544
+ elif isinstance(ef, list):
2545
+ env_file = tuple(str(x) for x in ef) # type: ignore[assignment]
2546
+
2547
+ entry: TaskAppEntry | None = None
2548
+ if task_app_url is None:
2549
+ choice = _select_app_choice(app_id, purpose="eval")
2550
+ entry = choice.ensure_entry()
2551
+
2552
+ env_paths: list[Path] = []
2553
+ if entry is not None:
2554
+ env_paths = _determine_env_files(entry, env_file)
2555
+ else:
2556
+ if not env_file:
2557
+ raise click.ClickException("--env-file is required when using --url")
2558
+ for candidate in env_file:
2559
+ p = Path(candidate).expanduser()
2560
+ if not p.exists():
2561
+ raise click.ClickException(f"Env file not found: {p}")
2562
+ env_paths.append(p)
2563
+
2564
+ click.echo("Using env file(s): " + ", ".join(str(p) for p in env_paths))
2565
+ _load_env_files_into_process([str(Path(p)) for p in env_paths])
2566
+
2567
+ if task_app_url is None:
2568
+ config = entry.config_factory() # type: ignore[union-attr]
2569
+ # Help the type checker; runtime check also enforced in server.run_task_app
2570
+ if not isinstance(config, TaskAppConfig):
2571
+ raise click.ClickException(
2572
+ "Invalid task app: config_factory did not return TaskAppConfig"
2573
+ )
2574
+ app = create_task_app(config)
2575
+
2576
+ # Determine supported models
2577
+ supported: list[str] = []
2578
+ if task_app_url is None:
2579
+ try:
2580
+ supported = list((config.base_task_info.inference or {}).get("models") or []) # type: ignore[union-attr]
2581
+ except Exception:
2582
+ supported = []
2583
+ else:
2584
+ try:
2585
+ import httpx as _hx
2586
+
2587
+ headers = {}
2588
+ api_key = (os.environ.get("ENVIRONMENT_API_KEY") or "").strip()
2589
+ if api_key:
2590
+ headers["X-API-Key"] = api_key
2591
+ with _hx.Client(base_url=task_app_url, headers=headers, timeout=15.0) as c:
2592
+ info = c.get("/info").json()
2593
+ inf = info.get("inference") if isinstance(info, dict) else None
2594
+ if isinstance(inf, dict):
2595
+ m = inf.get("models")
2596
+ if isinstance(m, list):
2597
+ supported = [str(x) for x in m]
2598
+ if not supported:
2599
+ providers = inf.get("providers")
2600
+ if isinstance(providers, list):
2601
+ if "openai" in providers:
2602
+ supported.append("gpt-5")
2603
+ if "groq" in providers:
2604
+ supported.append("groq:llama-3.1-70b-versatile")
2605
+ supported.append("synth:qwen-0.6b")
2606
+ except Exception:
2607
+ supported = []
2608
+ if not supported:
2609
+ # Only fall back to local config-derived providers when running in-process
2610
+ if task_app_url is None:
2611
+ try:
2612
+ providers = list((config.base_task_info.inference or {}).get("providers") or []) # type: ignore[union-attr]
2613
+ except Exception:
2614
+ providers = []
2615
+ if "openai" in providers:
2616
+ supported.append("gpt-5")
2617
+ if "groq" in providers:
2618
+ supported.append("groq:llama-3.1-70b-versatile")
2619
+ # Always include a local synth model option for smoke tests
2620
+ supported.append("synth:qwen-0.6b")
2621
+
2622
+ selected_model = model
2623
+ if not selected_model:
2624
+ if not supported:
2625
+ raise click.ClickException(
2626
+ "No supported models; supply --model or add base_task_info.inference.models"
2627
+ )
2628
+ click.echo("Select model to evaluate:")
2629
+ for idx, m in enumerate(supported, start=1):
2630
+ click.echo(f" {idx}) {m}")
2631
+ choice_idx = click.prompt("Enter choice", type=click.IntRange(1, len(supported)))
2632
+ selected_model = supported[choice_idx - 1]
2633
+
2634
+ try:
2635
+ seed_values = [int(s.strip()) for s in seeds.split(",") if s.strip()]
2636
+ except Exception as exc:
2637
+ raise click.ClickException("Invalid --seeds; expected comma-separated integers") from exc
2638
+
2639
+ import httpx
2640
+
2641
+ headers = {}
2642
+ api_key = (os.environ.get("ENVIRONMENT_API_KEY") or "").strip()
2643
+ if api_key:
2644
+ headers["X-API-Key"] = api_key
2645
+
2646
+ successes = 0
2647
+ failures = 0
2648
+ # Aggregate outcome stats across successful seeds
2649
+ outcome_sum: float = 0.0
2650
+ outcome_count: int = 0
2651
+ outcome_correct: int = 0
2652
+ if task_app_url is None:
2653
+ transport = httpx.ASGITransport(app=app) # type: ignore[name-defined]
2654
+ # Newer httpx types consider ASGITransport under httpx._transports; cast to satisfy type checker
2655
+ client = httpx.Client(
2656
+ transport=cast(Any, transport),
2657
+ base_url="http://eval.local",
2658
+ timeout=60.0,
2659
+ headers=headers,
2660
+ )
2661
+ else:
2662
+ client = httpx.Client(base_url=task_app_url, timeout=60.0, headers=headers)
2663
+ try:
2664
+ with contextlib.suppress(Exception):
2665
+ client.get("/task_info")
2666
+ # Precompute optional policy overrides from TOML
2667
+ policy_overrides: dict[str, Any] = {}
2668
+ try:
2669
+ # Accept [eval.policy] table or top-level keys for convenience
2670
+ if isinstance(cfg.get("policy"), dict):
2671
+ policy_overrides.update(dict(cfg["policy"]))
2672
+ # Back-compat: allow temperature/max_tokens at top level
2673
+ for k in (
2674
+ "temperature",
2675
+ "max_tokens",
2676
+ "reasoning_effort",
2677
+ "system_hint",
2678
+ "tool_choice",
2679
+ ):
2680
+ if k in cfg and k not in policy_overrides:
2681
+ policy_overrides[k] = cfg.get(k)
2682
+ except Exception:
2683
+ policy_overrides = {}
2684
+
2685
+ for seed_val in seed_values:
2686
+ body = {
2687
+ "run_id": str(uuid.uuid4()),
2688
+ "env": {"config": {"split": split, "index": seed_val}, "seed": seed_val},
2689
+ "policy": {
2690
+ "policy_name": selected_model,
2691
+ "config": {"model": selected_model, **policy_overrides},
2692
+ },
2693
+ "ops": [],
2694
+ }
2695
+ try:
2696
+ resp = client.post("/rollout", json=body)
2697
+ ok = 200 <= resp.status_code < 300
2698
+ if ok:
2699
+ successes += 1
2700
+ else:
2701
+ failures += 1
2702
+
2703
+ # Print summary with any available metrics/tool calls
2704
+ summary = [f"seed={seed_val}", f"status={resp.status_code}"]
2705
+ try:
2706
+ data = resp.json()
2707
+ except Exception:
2708
+ data = None
2709
+ if isinstance(data, dict):
2710
+ metrics = data.get("metrics") if isinstance(data.get("metrics"), dict) else None
2711
+ if metrics:
2712
+ mean_return = metrics.get("mean_return") or metrics.get("total_reward")
2713
+ outcome = metrics.get("outcome_score")
2714
+ if mean_return is not None:
2715
+ summary.append(f"mean_return={mean_return}")
2716
+ if outcome is not None:
2717
+ summary.append(f"outcome={outcome}")
2718
+ # Aggregate outcome stats
2719
+ try:
2720
+ val = float(outcome)
2721
+ outcome_sum += val
2722
+ outcome_count += 1
2723
+ if val >= 0.5:
2724
+ outcome_correct += 1
2725
+ except Exception:
2726
+ pass
2727
+ # Try to infer tool call count from first trajectory step
2728
+ trajs = (
2729
+ data.get("trajectories")
2730
+ if isinstance(data.get("trajectories"), list)
2731
+ else None
2732
+ )
2733
+ if trajs:
2734
+ first = trajs[0] if trajs else None
2735
+ steps = first.get("steps") if isinstance(first, dict) else None
2736
+ if isinstance(steps, list) and steps:
2737
+ step0 = steps[0]
2738
+ tool_calls = step0.get("tool_calls") or step0.get("tools") or []
2739
+ if isinstance(tool_calls, list):
2740
+ summary.append(f"tool_calls={len(tool_calls)}")
2741
+ click.echo(" ".join(summary))
2742
+ # Print the full response JSON (trace, trajectories, metrics)
2743
+ with contextlib.suppress(Exception):
2744
+ click.echo(json.dumps(data, indent=2))
2745
+ else:
2746
+ click.echo(" ".join(summary))
2747
+ except Exception as exc:
2748
+ failures += 1
2749
+ click.echo(f"seed={seed_val} error={exc}")
2750
+
2751
+ finally:
2752
+ try:
2753
+ client.close()
2754
+ except AttributeError:
2755
+ transport_obj = getattr(client, "_transport", None)
2756
+ if transport_obj and hasattr(transport_obj, "aclose"):
2757
+ try:
2758
+ asyncio.run(transport_obj.aclose())
2759
+ except RuntimeError:
2760
+ # Fallback when already inside a running loop (rare for CLI).
2761
+ new_loop = asyncio.new_event_loop()
2762
+ try:
2763
+ new_loop.run_until_complete(transport_obj.aclose())
2764
+ finally:
2765
+ new_loop.close()
2766
+ except Exception:
2767
+ pass
2768
+
2769
+ click.echo(
2770
+ f"Eval complete: {successes} ok, {failures} failed; model={selected_model}, split={split}"
2771
+ )
2772
+ # Print outcome summary if any successes
2773
+ if outcome_count > 0:
2774
+ mean_outcome = outcome_sum / float(outcome_count)
2775
+ frac_right = outcome_correct / float(outcome_count)
2776
+ click.echo(
2777
+ f"Outcome summary: correct={outcome_correct}/{outcome_count} ({frac_right:.2%}), mean_outcome={mean_outcome:.3f}"
2778
+ )
2779
+
2780
+
2781
+ def register_eval(cli: click.Group) -> None:
2782
+ cli.add_command(eval_command)