synth-ai 0.2.14__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1091) hide show
  1. synth_ai/__init__.py +19 -40
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +105 -70
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/baseline/__init__.py +12 -0
  25. synth_ai/cli/commands/baseline/core.py +636 -0
  26. synth_ai/cli/commands/baseline/list.py +94 -0
  27. synth_ai/cli/commands/demo/__init__.py +3 -0
  28. synth_ai/cli/commands/demo/core.py +153 -0
  29. synth_ai/cli/commands/eval/__init__.py +19 -0
  30. synth_ai/cli/commands/eval/core.py +1113 -0
  31. synth_ai/cli/commands/eval/errors.py +81 -0
  32. synth_ai/cli/commands/eval/validation.py +133 -0
  33. synth_ai/cli/commands/filter/__init__.py +12 -0
  34. synth_ai/cli/commands/filter/core.py +424 -0
  35. synth_ai/cli/commands/filter/errors.py +55 -0
  36. synth_ai/cli/commands/filter/validation.py +77 -0
  37. synth_ai/cli/commands/help/__init__.py +185 -0
  38. synth_ai/cli/commands/help/core.py +72 -0
  39. synth_ai/cli/commands/scan/__init__.py +19 -0
  40. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  41. synth_ai/cli/commands/scan/core.py +344 -0
  42. synth_ai/cli/commands/scan/health_checker.py +242 -0
  43. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  44. synth_ai/cli/commands/scan/models.py +83 -0
  45. synth_ai/cli/commands/smoke/__init__.py +7 -0
  46. synth_ai/cli/commands/smoke/core.py +1438 -0
  47. synth_ai/cli/commands/status/__init__.py +66 -0
  48. synth_ai/cli/commands/status/client.py +192 -0
  49. synth_ai/cli/commands/status/config.py +92 -0
  50. synth_ai/cli/commands/status/errors.py +20 -0
  51. synth_ai/cli/commands/status/formatters.py +164 -0
  52. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  53. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  54. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  55. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  56. synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
  57. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  58. synth_ai/cli/commands/status/subcommands/session.py +182 -0
  59. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  60. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  61. synth_ai/cli/commands/status/utils.py +114 -0
  62. synth_ai/cli/commands/train/__init__.py +53 -0
  63. synth_ai/cli/commands/train/core.py +22 -0
  64. synth_ai/cli/commands/train/errors.py +117 -0
  65. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  66. synth_ai/cli/commands/train/judge_validation.py +305 -0
  67. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  68. synth_ai/cli/commands/train/validation.py +392 -0
  69. synth_ai/cli/demo_apps/__init__.py +10 -0
  70. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  71. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  72. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  73. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  74. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  75. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  81. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  82. synth_ai/cli/demo_apps/math/_common.py +16 -0
  83. synth_ai/cli/demo_apps/math/app.py +38 -0
  84. synth_ai/cli/demo_apps/math/config.toml +76 -0
  85. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  86. synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
  87. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  88. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  89. synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
  90. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  91. synth_ai/cli/demos/__init__.py +12 -0
  92. synth_ai/cli/demos/demo.py +32 -0
  93. synth_ai/cli/demos/rl_demo.py +254 -0
  94. synth_ai/cli/deploy.py +216 -0
  95. synth_ai/cli/infra/__init__.py +14 -0
  96. synth_ai/cli/infra/balance.py +216 -0
  97. synth_ai/cli/infra/mcp.py +35 -0
  98. synth_ai/cli/infra/modal_app.py +36 -0
  99. synth_ai/cli/infra/setup.py +69 -0
  100. synth_ai/cli/infra/status.py +16 -0
  101. synth_ai/cli/infra/turso.py +77 -0
  102. synth_ai/cli/lib/__init__.py +10 -0
  103. synth_ai/cli/lib/agents.py +76 -0
  104. synth_ai/cli/lib/apps/modal_app.py +101 -0
  105. synth_ai/cli/lib/apps/task_app.py +643 -0
  106. synth_ai/cli/lib/bin.py +39 -0
  107. synth_ai/cli/lib/env.py +375 -0
  108. synth_ai/cli/lib/errors.py +85 -0
  109. synth_ai/cli/lib/modal.py +315 -0
  110. synth_ai/cli/lib/plotting.py +126 -0
  111. synth_ai/cli/lib/prompt_args.py +39 -0
  112. synth_ai/cli/lib/prompts.py +284 -0
  113. synth_ai/cli/lib/sqld.py +122 -0
  114. synth_ai/cli/lib/task_app_discovery.py +884 -0
  115. synth_ai/cli/lib/task_app_env.py +295 -0
  116. synth_ai/cli/lib/train_cfgs.py +300 -0
  117. synth_ai/cli/lib/tunnel_records.py +207 -0
  118. synth_ai/cli/local/__init__.py +14 -0
  119. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  120. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  121. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  122. synth_ai/cli/local/experiment_queue/config.py +128 -0
  123. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  124. synth_ai/cli/local/experiment_queue/database.py +175 -0
  125. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  126. synth_ai/cli/local/experiment_queue/models.py +231 -0
  127. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  128. synth_ai/cli/local/experiment_queue/results.py +373 -0
  129. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  130. synth_ai/cli/local/experiment_queue/service.py +344 -0
  131. synth_ai/cli/local/experiment_queue/status.py +372 -0
  132. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  133. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  134. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  135. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  136. synth_ai/cli/local/session/__init__.py +92 -0
  137. synth_ai/cli/local/session/client.py +383 -0
  138. synth_ai/cli/local/session/constants.py +63 -0
  139. synth_ai/cli/local/session/exceptions.py +105 -0
  140. synth_ai/cli/local/session/manager.py +139 -0
  141. synth_ai/cli/local/session/models.py +89 -0
  142. synth_ai/cli/local/session/query.py +110 -0
  143. synth_ai/cli/root.py +30 -6
  144. synth_ai/cli/task_apps/__init__.py +26 -0
  145. synth_ai/cli/task_apps/commands.py +3153 -0
  146. synth_ai/cli/task_apps/deploy.py +7 -0
  147. synth_ai/cli/task_apps/list.py +26 -0
  148. synth_ai/cli/task_apps/main.py +36 -0
  149. synth_ai/cli/task_apps/modal_serve.py +11 -0
  150. synth_ai/cli/task_apps/serve.py +11 -0
  151. synth_ai/cli/training/__init__.py +8 -0
  152. synth_ai/cli/training/train.py +5 -0
  153. synth_ai/cli/training/train_cfg.py +34 -0
  154. synth_ai/cli/training/watch.py +506 -0
  155. synth_ai/cli/turso.py +34 -55
  156. synth_ai/cli/usage.py +159 -0
  157. synth_ai/cli/utils/__init__.py +8 -0
  158. synth_ai/cli/utils/experiments.py +235 -0
  159. synth_ai/cli/utils/queue.py +504 -0
  160. synth_ai/cli/utils/recent.py +133 -0
  161. synth_ai/cli/utils/traces.py +164 -0
  162. synth_ai/contracts/__init__.py +67 -0
  163. synth_ai/core/__init__.py +100 -0
  164. synth_ai/core/_utils/__init__.py +54 -0
  165. synth_ai/core/_utils/base_url.py +10 -0
  166. synth_ai/core/_utils/http.py +10 -0
  167. synth_ai/core/_utils/prompts.py +14 -0
  168. synth_ai/core/_utils/task_app_state.py +12 -0
  169. synth_ai/core/_utils/user_config.py +10 -0
  170. synth_ai/core/apps/common.py +116 -0
  171. synth_ai/core/auth.py +95 -0
  172. synth_ai/core/cfgs.py +240 -0
  173. synth_ai/core/config/__init__.py +16 -0
  174. synth_ai/core/config/base.py +168 -0
  175. synth_ai/core/config/resolver.py +89 -0
  176. synth_ai/core/env.py +220 -0
  177. synth_ai/core/errors.py +126 -0
  178. synth_ai/core/http.py +230 -0
  179. synth_ai/core/integrations/__init__.py +11 -0
  180. synth_ai/core/integrations/cloudflare.py +1710 -0
  181. synth_ai/core/integrations/mcp/__init__.py +6 -0
  182. synth_ai/core/integrations/mcp/__main__.py +8 -0
  183. synth_ai/core/integrations/mcp/claude.py +36 -0
  184. synth_ai/core/integrations/mcp/main.py +254 -0
  185. synth_ai/core/integrations/mcp/setup.py +100 -0
  186. synth_ai/core/integrations/modal.py +277 -0
  187. synth_ai/core/json.py +72 -0
  188. synth_ai/core/log_filter.py +99 -0
  189. synth_ai/core/logging.py +82 -0
  190. synth_ai/core/paths.py +107 -0
  191. synth_ai/core/pricing.py +109 -0
  192. synth_ai/core/process.py +233 -0
  193. synth_ai/core/ssl.py +25 -0
  194. synth_ai/core/storage/__init__.py +71 -0
  195. synth_ai/core/task_app_state.py +318 -0
  196. synth_ai/core/telemetry.py +282 -0
  197. synth_ai/core/tracing_v3/__init__.py +99 -0
  198. synth_ai/core/tracing_v3/abstractions.py +302 -0
  199. synth_ai/core/tracing_v3/config.py +229 -0
  200. synth_ai/core/tracing_v3/constants.py +21 -0
  201. synth_ai/core/tracing_v3/db_config.py +182 -0
  202. synth_ai/core/tracing_v3/decorators.py +401 -0
  203. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  204. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  205. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  206. synth_ai/core/tracing_v3/storage/base.py +211 -0
  207. synth_ai/core/tracing_v3/storage/config.py +109 -0
  208. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  209. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  210. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  211. synth_ai/core/tracing_v3/turso/models.py +470 -0
  212. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  213. synth_ai/core/tracing_v3/utils.py +108 -0
  214. synth_ai/core/urls.py +18 -0
  215. synth_ai/core/user_config.py +137 -0
  216. synth_ai/core/uvicorn.py +222 -0
  217. synth_ai/data/__init__.py +110 -0
  218. synth_ai/data/enums.py +141 -0
  219. synth_ai/data/rewards.py +152 -0
  220. synth_ai/data/specs.py +36 -0
  221. synth_ai/data/traces.py +35 -0
  222. synth_ai/products/__init__.py +6 -0
  223. synth_ai/products/graph_evolve/__init__.py +46 -0
  224. synth_ai/products/graph_evolve/client.py +226 -0
  225. synth_ai/products/graph_evolve/config.py +591 -0
  226. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  227. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  228. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  229. synth_ai/products/graph_evolve/run.py +222 -0
  230. synth_ai/sdk/__init__.py +119 -0
  231. synth_ai/sdk/api/__init__.py +1 -0
  232. synth_ai/sdk/api/models/supported.py +514 -0
  233. synth_ai/sdk/api/research_agent/__init__.py +86 -0
  234. synth_ai/sdk/api/research_agent/cli.py +428 -0
  235. synth_ai/sdk/api/research_agent/config.py +357 -0
  236. synth_ai/sdk/api/research_agent/job.py +717 -0
  237. synth_ai/sdk/api/train/__init__.py +85 -0
  238. synth_ai/sdk/api/train/builders.py +895 -0
  239. synth_ai/sdk/api/train/cli.py +2188 -0
  240. synth_ai/sdk/api/train/config_finder.py +267 -0
  241. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  242. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  243. synth_ai/sdk/api/train/configs/rl.py +188 -0
  244. synth_ai/sdk/api/train/configs/sft.py +99 -0
  245. synth_ai/sdk/api/train/configs/shared.py +81 -0
  246. synth_ai/sdk/api/train/context_learning.py +312 -0
  247. synth_ai/sdk/api/train/env_resolver.py +418 -0
  248. synth_ai/sdk/api/train/graph_validators.py +216 -0
  249. synth_ai/sdk/api/train/graphgen.py +984 -0
  250. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  251. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  252. synth_ai/sdk/api/train/pollers.py +124 -0
  253. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  254. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  255. synth_ai/sdk/api/train/progress/events.py +326 -0
  256. synth_ai/sdk/api/train/progress/results.py +428 -0
  257. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  258. synth_ai/sdk/api/train/prompt_learning.py +470 -0
  259. synth_ai/sdk/api/train/rl.py +442 -0
  260. synth_ai/sdk/api/train/sft.py +396 -0
  261. synth_ai/sdk/api/train/summary.py +522 -0
  262. synth_ai/sdk/api/train/supported_algos.py +147 -0
  263. synth_ai/sdk/api/train/task_app.py +331 -0
  264. synth_ai/sdk/api/train/utils.py +279 -0
  265. synth_ai/sdk/api/train/validators.py +2424 -0
  266. synth_ai/sdk/baseline/__init__.py +25 -0
  267. synth_ai/sdk/baseline/config.py +209 -0
  268. synth_ai/sdk/baseline/discovery.py +216 -0
  269. synth_ai/sdk/baseline/execution.py +154 -0
  270. synth_ai/sdk/graphs/__init__.py +15 -0
  271. synth_ai/sdk/graphs/completions.py +570 -0
  272. synth_ai/sdk/inference/__init__.py +6 -0
  273. synth_ai/sdk/inference/client.py +128 -0
  274. synth_ai/sdk/jobs/__init__.py +16 -0
  275. synth_ai/sdk/jobs/client.py +371 -0
  276. synth_ai/sdk/judging/__init__.py +15 -0
  277. synth_ai/sdk/judging/base.py +24 -0
  278. synth_ai/sdk/judging/client.py +191 -0
  279. synth_ai/sdk/judging/schemas.py +222 -0
  280. synth_ai/sdk/learning/__init__.py +69 -0
  281. synth_ai/sdk/learning/client.py +240 -0
  282. synth_ai/sdk/learning/ft_client.py +7 -0
  283. synth_ai/sdk/learning/health.py +49 -0
  284. synth_ai/sdk/learning/jobs.py +202 -0
  285. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  286. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  287. synth_ai/sdk/learning/prompt_learning_types.py +185 -0
  288. synth_ai/sdk/learning/rl/client.py +268 -0
  289. synth_ai/sdk/learning/rl/contracts.py +27 -0
  290. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  291. synth_ai/sdk/learning/rl/secrets.py +13 -0
  292. synth_ai/sdk/learning/sft/client.py +95 -0
  293. synth_ai/sdk/learning/sft/config.py +270 -0
  294. synth_ai/sdk/learning/sft/data.py +698 -0
  295. synth_ai/sdk/learning/validators.py +52 -0
  296. synth_ai/sdk/research_agent/__init__.py +34 -0
  297. synth_ai/sdk/research_agent/container_builder.py +328 -0
  298. synth_ai/sdk/research_agent/container_spec.py +198 -0
  299. synth_ai/sdk/research_agent/defaults.py +34 -0
  300. synth_ai/sdk/research_agent/results_collector.py +69 -0
  301. synth_ai/sdk/specs/__init__.py +46 -0
  302. synth_ai/sdk/specs/dataclasses.py +149 -0
  303. synth_ai/sdk/specs/loader.py +144 -0
  304. synth_ai/sdk/specs/serializer.py +199 -0
  305. synth_ai/sdk/specs/validation.py +250 -0
  306. synth_ai/sdk/streaming/__init__.py +35 -0
  307. synth_ai/sdk/streaming/config.py +94 -0
  308. synth_ai/sdk/streaming/handlers.py +1997 -0
  309. synth_ai/sdk/streaming/streamer.py +704 -0
  310. synth_ai/sdk/streaming/types.py +112 -0
  311. synth_ai/sdk/task/__init__.py +151 -0
  312. synth_ai/sdk/task/apps/__init__.py +133 -0
  313. synth_ai/sdk/task/config.py +261 -0
  314. synth_ai/sdk/task/contracts.py +298 -0
  315. synth_ai/sdk/task/datasets.py +108 -0
  316. synth_ai/sdk/task/in_process.py +1190 -0
  317. synth_ai/sdk/task/in_process_runner.py +309 -0
  318. synth_ai/sdk/task/inference_api.py +299 -0
  319. synth_ai/sdk/task/proxy.py +287 -0
  320. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  321. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  322. synth_ai/sdk/task/rubrics.py +219 -0
  323. synth_ai/sdk/task/server.py +580 -0
  324. synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
  325. synth_ai/sdk/task/tracing_utils.py +95 -0
  326. synth_ai/sdk/task/validators.py +456 -0
  327. synth_ai/sdk/tracing/__init__.py +39 -0
  328. synth_ai/sdk/training/__init__.py +102 -0
  329. synth_ai/sdk/usage/__init__.py +37 -0
  330. synth_ai/sdk/usage/client.py +171 -0
  331. synth_ai/sdk/usage/models.py +261 -0
  332. synth_ai/utils/__init__.py +213 -0
  333. synth_ai-0.4.1.dist-info/METADATA +195 -0
  334. synth_ai-0.4.1.dist-info/RECORD +379 -0
  335. synth_ai-0.4.1.dist-info/top_level.txt +1 -0
  336. examples/__init__.py +0 -16
  337. examples/analyze_semantic_words.sh +0 -17
  338. examples/crafter_debug_render.py +0 -186
  339. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  340. examples/multi_step/configs/README_verilog_rl.md +0 -77
  341. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  342. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  343. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  344. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  345. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  346. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  347. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  348. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  349. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  350. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  351. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  352. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  353. examples/multi_step/crafter_rl_lora.md +0 -70
  354. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  355. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  356. examples/multi_step/readme.md +0 -48
  357. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  358. examples/multi_step/task_app_config_notes.md +0 -494
  359. examples/multi_step/verilog_rl_lora.md +0 -218
  360. examples/qwen_coder/README.md +0 -102
  361. examples/qwen_coder/_shared.py +0 -113
  362. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  363. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  364. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  365. examples/qwen_coder/generate_dataset.py +0 -98
  366. examples/qwen_coder/infer_ft_smoke.py +0 -65
  367. examples/qwen_coder/infer_prod_proxy.py +0 -73
  368. examples/qwen_coder/infer_via_synth.py +0 -87
  369. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  370. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  371. examples/qwen_coder/sft_full_17b.py +0 -103
  372. examples/qwen_coder/sft_lora_30b.py +0 -110
  373. examples/qwen_coder/subset_jsonl.py +0 -39
  374. examples/qwen_coder/todos.md +0 -38
  375. examples/qwen_coder/validate_jsonl.py +0 -60
  376. examples/rl/README.md +0 -169
  377. examples/rl/download_dataset.py +0 -80
  378. examples/run_crafter_demo.sh +0 -10
  379. examples/sft/README.md +0 -139
  380. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  381. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  382. examples/sft/evaluate.py +0 -119
  383. examples/sft/export_dataset.py +0 -117
  384. examples/sft/generate_traces.py +0 -164
  385. examples/swe/__init__.py +0 -12
  386. examples/swe/task_app/README.md +0 -105
  387. examples/swe/task_app/__init__.py +0 -2
  388. examples/swe/task_app/grpo_swe_mini.py +0 -601
  389. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  390. examples/swe/task_app/hosted/README.md +0 -173
  391. examples/swe/task_app/hosted/__init__.py +0 -5
  392. examples/swe/task_app/hosted/branching.py +0 -143
  393. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  394. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  395. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  396. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  397. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  398. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  399. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  400. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  401. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  402. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  403. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  404. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  405. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  406. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  407. examples/swe/task_app/hosted/hosted_app.py +0 -204
  408. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  409. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  410. examples/swe/task_app/hosted/main.py +0 -100
  411. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  412. examples/swe/task_app/hosted/registry.py +0 -195
  413. examples/swe/task_app/hosted/rollout.py +0 -1911
  414. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  415. examples/swe/task_app/hosted/storage/volume.py +0 -211
  416. examples/swe/task_app/hosted/test_agents.py +0 -161
  417. examples/swe/task_app/hosted/test_service.py +0 -136
  418. examples/swe/task_app/hosted/utils.py +0 -62
  419. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  420. examples/task_apps/TESTING.md +0 -275
  421. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  422. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  423. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  424. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  425. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  426. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  427. examples/task_apps/crafter/__init__.py +0 -0
  428. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  429. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  430. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  431. examples/task_apps/crafter/task_app/README.md +0 -42
  432. examples/task_apps/crafter/task_app/__init__.py +0 -5
  433. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  434. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  454. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  455. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  456. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  457. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  458. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  459. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  510. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  511. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  512. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  513. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  514. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  515. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  516. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  517. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  518. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  519. examples/task_apps/enron/__init__.py +0 -1
  520. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  521. examples/task_apps/enron/filter_sft.toml +0 -5
  522. examples/task_apps/enron/task_app/README.md +0 -14
  523. examples/task_apps/enron/task_app/__init__.py +0 -1
  524. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  525. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  526. examples/task_apps/enron/tests/__init__.py +0 -4
  527. examples/task_apps/enron/tests/conftest.py +0 -115
  528. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  529. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  530. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  531. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  532. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  533. examples/task_apps/math/README.md +0 -22
  534. examples/task_apps/math/__init__.py +0 -0
  535. examples/task_apps/math/math_single_step.py +0 -1000
  536. examples/task_apps/math/math_task_app.py +0 -115
  537. examples/task_apps/pokemon_battle/__init__.py +0 -2
  538. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  539. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  540. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  541. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  542. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  543. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  544. examples/task_apps/pokemon_red/README.md +0 -357
  545. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  546. examples/task_apps/pokemon_red/__init__.py +0 -3
  547. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  548. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  549. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  550. examples/task_apps/pokemon_red/task_app.py +0 -799
  551. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  552. examples/task_apps/sokoban/README.md +0 -307
  553. examples/task_apps/sokoban/__init__.py +0 -3
  554. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  555. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  556. examples/task_apps/sokoban/filter_sft.toml +0 -5
  557. examples/task_apps/sokoban/task_app.py +0 -1058
  558. examples/task_apps/sokoban/tests/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/conftest.py +0 -113
  560. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  561. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  562. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  563. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  564. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  565. examples/task_apps/verilog/__init__.py +0 -1
  566. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  567. examples/task_apps/verilog/filter_sft.toml +0 -5
  568. examples/task_apps/verilog/task_app/README.md +0 -12
  569. examples/task_apps/verilog/task_app/__init__.py +0 -1
  570. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  571. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  572. examples/task_apps/verilog/tests/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/conftest.py +0 -115
  574. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  575. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  576. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  577. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  578. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  579. examples/vlm/PROPOSAL.md +0 -53
  580. examples/vlm/README.md +0 -68
  581. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  582. examples/vlm/crafter_image_only_agent.py +0 -207
  583. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  584. examples/vlm/filter_image_rows.py +0 -63
  585. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  586. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  587. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  588. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  589. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  590. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  591. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  592. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  593. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  594. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  595. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  596. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  597. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  598. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  599. examples/warming_up_to_rl/groq_test.py +0 -97
  600. examples/warming_up_to_rl/manage_secrets.py +0 -131
  601. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  602. examples/warming_up_to_rl/old/notes.md +0 -73
  603. examples/warming_up_to_rl/readme.md +0 -179
  604. examples/warming_up_to_rl/run_eval.py +0 -736
  605. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  606. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  607. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  608. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  609. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  610. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  611. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  612. examples/workflows/__init__.py +0 -0
  613. examples/workflows/math_rl/__init__.py +0 -0
  614. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  615. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  616. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  617. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  618. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  619. examples/workflows/math_rl/download_dataset.py +0 -80
  620. examples/workflows/math_rl/run_eval.py +0 -436
  621. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  622. synth_ai/api/models/supported.py +0 -377
  623. synth_ai/api/train/__init__.py +0 -5
  624. synth_ai/api/train/builders.py +0 -351
  625. synth_ai/api/train/cli.py +0 -635
  626. synth_ai/api/train/config_finder.py +0 -228
  627. synth_ai/api/train/configs/__init__.py +0 -44
  628. synth_ai/api/train/configs/rl.py +0 -134
  629. synth_ai/api/train/configs/sft.py +0 -95
  630. synth_ai/api/train/configs/shared.py +0 -24
  631. synth_ai/api/train/env_resolver.py +0 -349
  632. synth_ai/api/train/pollers.py +0 -75
  633. synth_ai/api/train/supported_algos.py +0 -147
  634. synth_ai/api/train/task_app.py +0 -195
  635. synth_ai/api/train/utils.py +0 -225
  636. synth_ai/cli/_modal_wrapper.py +0 -29
  637. synth_ai/cli/_storage.py +0 -20
  638. synth_ai/cli/_typer_patch.py +0 -49
  639. synth_ai/cli/_validate_task_app.py +0 -11
  640. synth_ai/cli/balance.py +0 -216
  641. synth_ai/cli/calc.py +0 -84
  642. synth_ai/cli/demo.py +0 -165
  643. synth_ai/cli/legacy_root_backup.py +0 -468
  644. synth_ai/cli/man.py +0 -106
  645. synth_ai/cli/recent.py +0 -132
  646. synth_ai/cli/rl_demo.py +0 -254
  647. synth_ai/cli/status.py +0 -134
  648. synth_ai/cli/task_apps.py +0 -4523
  649. synth_ai/cli/traces.py +0 -164
  650. synth_ai/cli/tui.py +0 -57
  651. synth_ai/cli/watch.py +0 -506
  652. synth_ai/compound/cais.py +0 -0
  653. synth_ai/config/base_url.py +0 -107
  654. synth_ai/core/experiment.py +0 -13
  655. synth_ai/core/system.py +0 -15
  656. synth_ai/demo_registry.py +0 -295
  657. synth_ai/demos/core/__init__.py +0 -1
  658. synth_ai/demos/core/cli.py +0 -1718
  659. synth_ai/demos/demo_task_apps/core.py +0 -440
  660. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  661. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  662. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  663. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  664. synth_ai/environments/__init__.py +0 -31
  665. synth_ai/environments/environment/__init__.py +0 -1
  666. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  667. synth_ai/environments/environment/artifacts/base.py +0 -52
  668. synth_ai/environments/environment/core.py +0 -67
  669. synth_ai/environments/environment/db/__init__.py +0 -1
  670. synth_ai/environments/environment/db/sqlite.py +0 -45
  671. synth_ai/environments/environment/registry.py +0 -233
  672. synth_ai/environments/environment/resources/sqlite.py +0 -45
  673. synth_ai/environments/environment/results.py +0 -1
  674. synth_ai/environments/environment/rewards/__init__.py +0 -1
  675. synth_ai/environments/environment/rewards/core.py +0 -29
  676. synth_ai/environments/environment/shared_engine.py +0 -26
  677. synth_ai/environments/environment/tools/__init__.py +0 -200
  678. synth_ai/environments/examples/__init__.py +0 -1
  679. synth_ai/environments/examples/bandit/__init__.py +0 -33
  680. synth_ai/environments/examples/bandit/engine.py +0 -302
  681. synth_ai/environments/examples/bandit/environment.py +0 -194
  682. synth_ai/environments/examples/bandit/taskset.py +0 -200
  683. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  684. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  716. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  717. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  718. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  719. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  720. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  744. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  745. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  746. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  747. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  748. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  749. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  750. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  751. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  752. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  753. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  754. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  755. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  756. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  757. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  758. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  759. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  760. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  761. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  762. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  763. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  764. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  765. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  766. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  767. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  768. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  769. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  770. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  771. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  772. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  773. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  774. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  775. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  776. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  777. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  778. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  779. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  780. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  781. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  782. synth_ai/environments/examples/enron/engine.py +0 -300
  783. synth_ai/environments/examples/enron/environment.py +0 -234
  784. synth_ai/environments/examples/enron/taskset.py +0 -112
  785. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  786. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  787. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  788. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  789. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  790. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  791. synth_ai/environments/examples/minigrid/engine.py +0 -589
  792. synth_ai/environments/examples/minigrid/environment.py +0 -274
  793. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  794. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  795. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  796. synth_ai/environments/examples/nethack/__init__.py +0 -7
  797. synth_ai/environments/examples/nethack/achievements.py +0 -337
  798. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  799. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  800. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  801. synth_ai/environments/examples/nethack/engine.py +0 -739
  802. synth_ai/environments/examples/nethack/environment.py +0 -256
  803. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  804. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  805. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  806. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  807. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  808. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  809. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  810. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  811. synth_ai/environments/examples/nethack/taskset.py +0 -323
  812. synth_ai/environments/examples/red/__init__.py +0 -7
  813. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  814. synth_ai/environments/examples/red/config_logging.py +0 -110
  815. synth_ai/environments/examples/red/engine.py +0 -721
  816. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  817. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  818. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  828. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  829. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  830. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  831. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  832. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  833. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  834. synth_ai/environments/examples/red/environment.py +0 -298
  835. synth_ai/environments/examples/red/taskset.py +0 -79
  836. synth_ai/environments/examples/red/units/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  838. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  839. synth_ai/environments/examples/sokoban/engine.py +0 -678
  840. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  841. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  848. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  849. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  850. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  851. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  852. synth_ai/environments/examples/sokoban/environment.py +0 -229
  853. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  854. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  855. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  856. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  857. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  858. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  859. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  860. synth_ai/environments/examples/verilog/__init__.py +0 -10
  861. synth_ai/environments/examples/verilog/engine.py +0 -421
  862. synth_ai/environments/examples/verilog/environment.py +0 -350
  863. synth_ai/environments/examples/verilog/taskset.py +0 -420
  864. synth_ai/environments/examples/wordle/__init__.py +0 -29
  865. synth_ai/environments/examples/wordle/engine.py +0 -398
  866. synth_ai/environments/examples/wordle/environment.py +0 -159
  867. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  868. synth_ai/environments/examples/wordle/taskset.py +0 -230
  869. synth_ai/environments/reproducibility/core.py +0 -42
  870. synth_ai/environments/reproducibility/helpers.py +0 -0
  871. synth_ai/environments/reproducibility/tree.py +0 -363
  872. synth_ai/environments/service/app.py +0 -97
  873. synth_ai/environments/service/core_routes.py +0 -1021
  874. synth_ai/environments/service/external_registry.py +0 -56
  875. synth_ai/environments/service/registry.py +0 -9
  876. synth_ai/environments/stateful/__init__.py +0 -1
  877. synth_ai/environments/stateful/core.py +0 -163
  878. synth_ai/environments/stateful/engine.py +0 -21
  879. synth_ai/environments/stateful/state.py +0 -7
  880. synth_ai/environments/tasks/api.py +0 -19
  881. synth_ai/environments/tasks/core.py +0 -81
  882. synth_ai/environments/tasks/filters.py +0 -40
  883. synth_ai/environments/tasks/utils.py +0 -90
  884. synth_ai/environments/v0_observability/history.py +0 -3
  885. synth_ai/environments/v0_observability/log.py +0 -2
  886. synth_ai/evals/__init__.py +0 -15
  887. synth_ai/evals/base.py +0 -13
  888. synth_ai/evals/client.py +0 -82
  889. synth_ai/handshake.py +0 -109
  890. synth_ai/http.py +0 -26
  891. synth_ai/http_client.py +0 -136
  892. synth_ai/inference/__init__.py +0 -5
  893. synth_ai/inference/client.py +0 -34
  894. synth_ai/jobs/client.py +0 -295
  895. synth_ai/judge_schemas.py +0 -127
  896. synth_ai/learning/__init__.py +0 -59
  897. synth_ai/learning/client.py +0 -241
  898. synth_ai/learning/ft_client.py +0 -7
  899. synth_ai/learning/health.py +0 -49
  900. synth_ai/learning/jobs.py +0 -201
  901. synth_ai/learning/rl/client.py +0 -267
  902. synth_ai/learning/rl/contracts.py +0 -27
  903. synth_ai/learning/rl/env_keys.py +0 -166
  904. synth_ai/learning/rl/secrets.py +0 -13
  905. synth_ai/learning/sft/client.py +0 -68
  906. synth_ai/learning/sft/config.py +0 -270
  907. synth_ai/learning/sft/data.py +0 -295
  908. synth_ai/learning/validators.py +0 -49
  909. synth_ai/lm/__init__.py +0 -25
  910. synth_ai/task/__init__.py +0 -121
  911. synth_ai/task/apps/__init__.py +0 -129
  912. synth_ai/task/config.py +0 -257
  913. synth_ai/task/contracts.py +0 -236
  914. synth_ai/task/datasets.py +0 -108
  915. synth_ai/task/proxy.py +0 -251
  916. synth_ai/task/rubrics/__init__.py +0 -56
  917. synth_ai/task/rubrics/loaders.py +0 -152
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
  1057. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1058. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1068. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1069. /synth_ai/{evals → sdk/judging}/types.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1077. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1078. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1079. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1080. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1081. /synth_ai/{task → sdk/task}/client.py +0 -0
  1082. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1083. /synth_ai/{task → sdk/task}/health.py +0 -0
  1084. /synth_ai/{task → sdk/task}/json.py +0 -0
  1085. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1086. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1087. /synth_ai/{task → sdk/task}/rubrics/strict.py +0 -0
  1088. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1089. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
  1090. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/entry_points.txt +0 -0
  1091. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0
synth_ai/cli/task_apps.py DELETED
@@ -1,4523 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import argparse
4
- import ast
5
- import asyncio
6
- import contextlib
7
- import functools
8
- import hashlib
9
- import importlib
10
- import importlib.util
11
- import inspect
12
- import json
13
- import os
14
- import shlex
15
- import shutil
16
- import signal
17
- import sqlite3
18
- import subprocess
19
- import sys
20
- import tempfile
21
- import textwrap
22
- import time
23
- import types
24
- import uuid
25
- from collections.abc import Callable, Iterable, Iterator, Sequence
26
- from dataclasses import dataclass
27
- from datetime import datetime, timezone
28
- from pathlib import Path
29
- from typing import Any, Optional, cast
30
-
31
- try: # Python 3.11+
32
- import tomllib as _toml
33
- except Exception: # pragma: no cover - fallback
34
- _toml = None # type: ignore
35
-
36
- import click
37
- from click.exceptions import Abort
38
-
39
- # Tracing imports - make conditional for optional dependencies
40
- try:
41
- from synth_ai.tracing_v3 import ( # type: ignore[import-untyped]
42
- BaseEvent,
43
- EnvironmentEvent,
44
- RuntimeEvent,
45
- SessionEventMarkovBlanketMessage,
46
- SessionMessageContent,
47
- SessionTimeStep,
48
- SessionTracer,
49
- TimeRecord,
50
- )
51
- from synth_ai.tracing_v3 import ( # type: ignore[import-untyped]
52
- SessionTrace as V3SessionTrace,
53
- )
54
- _TRACING_AVAILABLE = True
55
- except (ImportError, ModuleNotFoundError, TypeError):
56
- # Tracing system not available (missing optional dependencies)
57
- BaseEvent = EnvironmentEvent = RuntimeEvent = None # type: ignore
58
- SessionEventMarkovBlanketMessage = SessionMessageContent = None # type: ignore
59
- SessionTimeStep = SessionTracer = TimeRecord = None # type: ignore
60
- V3SessionTrace = None # type: ignore
61
- _TRACING_AVAILABLE = False
62
-
63
- # ---------------------------------------------------------------------------
64
- # Dynamic imports to avoid hard dependencies during type checking.
65
- # ---------------------------------------------------------------------------
66
- ModalDeploymentConfigType = TaskAppConfigType = TaskAppEntryType = Any
67
-
68
- try: # Resolve base URL defaults lazily
69
- _config_module = cast(
70
- Any, importlib.import_module("synth_ai.config.base_url")
71
- )
72
- PROD_BASE_URL_DEFAULT = cast(str, _config_module.PROD_BASE_URL_DEFAULT)
73
- except Exception: # pragma: no cover - fallback
74
- PROD_BASE_URL_DEFAULT = "https://agent-learning.onrender.com"
75
-
76
- try:
77
- _task_apps_module = cast(Any, importlib.import_module("synth_ai.task.apps"))
78
- ModalDeploymentConfig = cast(
79
- type[ModalDeploymentConfigType], _task_apps_module.ModalDeploymentConfig
80
- )
81
- TaskAppConfig = cast(type[TaskAppConfigType], _task_apps_module.TaskAppConfig)
82
- TaskAppEntry = cast(type[TaskAppEntryType], _task_apps_module.TaskAppEntry)
83
- registry = _task_apps_module.registry
84
- except Exception as exc: # pragma: no cover - critical dependency
85
- raise RuntimeError("Unable to load task app registry") from exc
86
-
87
- try:
88
- _task_server_module = cast(Any, importlib.import_module("synth_ai.task.server"))
89
- create_task_app = cast(Callable[..., Any], _task_server_module.create_task_app)
90
- run_task_app = cast(Callable[..., Any], _task_server_module.run_task_app)
91
- except Exception as exc: # pragma: no cover - critical dependency
92
- raise RuntimeError("Unable to load task app server utilities") from exc
93
-
94
-
95
- def _load_demo_directory() -> Optional[Path]:
96
- """Return the demo task apps directory if available."""
97
-
98
- try:
99
- module = cast(
100
- Any, importlib.import_module("synth_ai.demos.demo_task_apps.core")
101
- )
102
- loader = cast(Callable[[], Optional[str | Path]], module.load_demo_dir)
103
- demo_dir = loader()
104
- if isinstance(demo_dir, str | Path):
105
- demo_path = Path(demo_dir)
106
- if demo_path.exists():
107
- return demo_path.resolve()
108
- except Exception:
109
- return None
110
- return None
111
-
112
-
113
- def _maybe_import(name: str) -> Any:
114
- """Safely import a module by name and return it, or None on failure."""
115
-
116
- try:
117
- return importlib.import_module(name)
118
- except Exception:
119
- return None
120
-
121
- REPO_ROOT = Path(__file__).resolve().parents[2]
122
-
123
- DEFAULT_IGNORE_DIRS = {
124
- ".git",
125
- "__pycache__",
126
- "node_modules",
127
- "venv",
128
- ".venv",
129
- "build",
130
- "dist",
131
- ".mypy_cache",
132
- ".pytest_cache",
133
- }
134
-
135
- DEFAULT_SEARCH_RELATIVE = (
136
- Path("."),
137
- Path("examples"),
138
- Path("synth_ai"),
139
- )
140
-
141
-
142
- def _pearson(xs: Sequence[float], ys: Sequence[float]) -> Optional[float]:
143
- if len(xs) != len(ys) or len(xs) < 2:
144
- return None
145
- mean_x = sum(xs) / len(xs)
146
- mean_y = sum(ys) / len(ys)
147
- num = 0.0
148
- denom_x = 0.0
149
- denom_y = 0.0
150
- for x, y in zip(xs, ys, strict=False):
151
- dx = x - mean_x
152
- dy = y - mean_y
153
- num += dx * dy
154
- denom_x += dx * dx
155
- denom_y += dy * dy
156
- if denom_x <= 0 or denom_y <= 0:
157
- return None
158
- return num / (denom_x ** 0.5 * denom_y ** 0.5)
159
-
160
-
161
- @dataclass
162
- class AppChoice:
163
- app_id: str
164
- label: str
165
- path: Path
166
- source: str
167
- description: Optional[str] = None
168
- aliases: tuple[str, ...] = ()
169
- entry: TaskAppEntryType | None = None
170
- entry_loader: Callable[[], TaskAppEntryType] | None = None
171
- modal_script: Path | None = None
172
- lineno: int | None = None
173
-
174
- def ensure_entry(self) -> TaskAppEntryType:
175
- if self.entry is not None:
176
- return self.entry
177
- if self.entry_loader is None:
178
- raise click.ClickException(f"Unable to load task app '{self.app_id}' from {self.path}")
179
- entry = self.entry_loader()
180
- self.entry = entry
181
- return entry
182
-
183
-
184
- @dataclass
185
- class JudgeSpec:
186
- name: str
187
- fn: Callable[..., Any]
188
- kwargs: dict[str, Any]
189
-
190
-
191
- def _parse_datetime_for_trace(value: Any) -> Optional[datetime]:
192
- if isinstance(value, datetime):
193
- return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
194
- if isinstance(value, str):
195
- value = value.replace("Z", "+00:00")
196
- try:
197
- dt = datetime.fromisoformat(value)
198
- except ValueError:
199
- try:
200
- dt = datetime.fromtimestamp(float(value), tz=timezone.utc)
201
- except Exception:
202
- return None
203
- return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
204
- if isinstance(value, int | float):
205
- return datetime.fromtimestamp(float(value), tz=timezone.utc)
206
- return None
207
-
208
-
209
- def _time_record_from_dict(payload: dict[str, Any] | None) -> TimeRecord:
210
- payload = payload or {}
211
- event_time = payload.get("event_time")
212
- if not isinstance(event_time, int | float):
213
- try:
214
- event_time = float(event_time)
215
- except Exception:
216
- event_time = float(time.time())
217
- message_time = payload.get("message_time")
218
- if message_time is not None:
219
- try:
220
- message_time = int(message_time)
221
- except Exception:
222
- message_time = None
223
- return TimeRecord(event_time=event_time, message_time=message_time)
224
-
225
-
226
- def _event_from_dict(payload: dict[str, Any]) -> BaseEvent:
227
- base_kwargs = {
228
- "system_instance_id": payload.get("system_instance_id", ""),
229
- "time_record": _time_record_from_dict(payload.get("time_record")),
230
- "metadata": payload.get("metadata") or {},
231
- "event_metadata": payload.get("event_metadata"),
232
- }
233
- if "actions" in payload:
234
- return RuntimeEvent(actions=payload.get("actions") or [], **base_kwargs)
235
- if any(key in payload for key in ("reward", "terminated", "truncated")):
236
- return EnvironmentEvent(
237
- reward=float(payload.get("reward", 0.0) or 0.0),
238
- terminated=bool(payload.get("terminated", False)),
239
- truncated=bool(payload.get("truncated", False)),
240
- system_state_before=payload.get("system_state_before"),
241
- system_state_after=payload.get("system_state_after"),
242
- **base_kwargs,
243
- )
244
- return BaseEvent(**base_kwargs)
245
-
246
-
247
- def _markov_message_from_dict(payload: dict[str, Any]) -> SessionEventMarkovBlanketMessage:
248
- content_payload = payload.get("content") or {}
249
- content = SessionMessageContent(
250
- text=content_payload.get("text"),
251
- json_payload=content_payload.get("json_payload"),
252
- )
253
- raw_type = (payload.get("message_type") or "").lower()
254
- if raw_type == "observation":
255
- normalized_type = "system"
256
- elif raw_type == "action":
257
- normalized_type = "assistant"
258
- elif raw_type in {"user", "assistant", "system", "tool_use", "tool_result"}:
259
- normalized_type = raw_type
260
- else:
261
- normalized_type = "system"
262
-
263
- return SessionEventMarkovBlanketMessage(
264
- content=content,
265
- message_type=normalized_type,
266
- time_record=_time_record_from_dict(payload.get("time_record")),
267
- metadata=payload.get("metadata") or {},
268
- )
269
-
270
-
271
- def _step_from_dict(payload: dict[str, Any]) -> SessionTimeStep:
272
- events = [
273
- _event_from_dict(event)
274
- for event in payload.get("events", [])
275
- if isinstance(event, dict)
276
- ]
277
- messages = [
278
- _markov_message_from_dict(msg)
279
- for msg in payload.get("markov_blanket_messages", [])
280
- if isinstance(msg, dict)
281
- ]
282
- timestamp = _parse_datetime_for_trace(payload.get("timestamp")) or datetime.now(timezone.utc)
283
- completed_at = _parse_datetime_for_trace(payload.get("completed_at"))
284
- return SessionTimeStep(
285
- step_id=payload.get("step_id", ""),
286
- step_index=int(payload.get("step_index", 0) or 0),
287
- timestamp=timestamp,
288
- turn_number=payload.get("turn_number"),
289
- events=events,
290
- markov_blanket_messages=messages,
291
- step_metadata=payload.get("step_metadata") or {},
292
- completed_at=completed_at,
293
- )
294
-
295
-
296
- def _session_trace_from_dict(payload: dict[str, Any]) -> Optional[V3SessionTrace]:
297
- if not isinstance(payload, dict):
298
- return None
299
- steps = [
300
- _step_from_dict(step)
301
- for step in payload.get("session_time_steps", [])
302
- if isinstance(step, dict)
303
- ]
304
- events = [
305
- _event_from_dict(event)
306
- for event in payload.get("event_history", [])
307
- if isinstance(event, dict)
308
- ]
309
- markov_history = [
310
- _markov_message_from_dict(msg)
311
- for msg in payload.get("markov_blanket_message_history", [])
312
- if isinstance(msg, dict)
313
- ]
314
- created_at = _parse_datetime_for_trace(payload.get("created_at")) or datetime.now(timezone.utc)
315
- metadata = payload.get("metadata") or {}
316
- session_metadata = payload.get("session_metadata")
317
- return V3SessionTrace(
318
- session_id=payload.get("session_id", ""),
319
- created_at=created_at,
320
- session_time_steps=steps,
321
- event_history=events,
322
- markov_blanket_message_history=markov_history,
323
- metadata=metadata,
324
- session_metadata=session_metadata,
325
- )
326
-
327
-
328
- async def _store_trace(
329
- tracer: SessionTracer | None,
330
- trace_namespace: dict[str, Any] | None,
331
- extra_metadata: dict[str, Any] | None = None,
332
- ):
333
- import logging
334
- _logger = logging.getLogger(__name__)
335
-
336
- _logger.info(f"[STORE_TRACE_DEBUG] Called with tracer={tracer is not None}, trace_namespace={trace_namespace is not None}")
337
-
338
- if tracer is None or not isinstance(trace_namespace, dict):
339
- _logger.warning(f"[STORE_TRACE_DEBUG] Early return: tracer={tracer is not None}, trace_namespace type={type(trace_namespace)}")
340
- return
341
-
342
- _logger.info(f"[STORE_TRACE_DEBUG] trace_namespace keys: {list(trace_namespace.keys())}")
343
-
344
- session_payload = trace_namespace.get("session_trace")
345
- if not isinstance(session_payload, dict):
346
- _logger.warning(f"[STORE_TRACE_DEBUG] No session_trace found or wrong type: {type(session_payload)}")
347
- return
348
-
349
- _logger.info(f"[STORE_TRACE_DEBUG] session_payload keys: {list(session_payload.keys())}")
350
- msg_count = len(session_payload.get("markov_blanket_message_history", []))
351
- _logger.info(f"[STORE_TRACE_DEBUG] Found {msg_count} messages in session_payload")
352
-
353
- trace_obj = _session_trace_from_dict(session_payload)
354
- if trace_obj is None:
355
- _logger.warning(f"[STORE_TRACE_DEBUG] _session_trace_from_dict returned None")
356
- return
357
-
358
- _logger.info(f"[STORE_TRACE_DEBUG] Created SessionTrace object with {len(trace_obj.markov_blanket_message_history)} messages")
359
-
360
- if tracer.db is None:
361
- await tracer.initialize()
362
- meta = dict(trace_obj.metadata or {})
363
- if extra_metadata:
364
- meta.update(extra_metadata)
365
- trace_obj.metadata = meta
366
-
367
- _logger.info(f"[STORE_TRACE_DEBUG] Calling insert_session_trace for session_id={trace_obj.session_id}")
368
- await tracer.db.insert_session_trace(trace_obj)
369
- _logger.info(f"[STORE_TRACE_DEBUG] Successfully inserted trace")
370
-
371
- def _temporary_sys_path(paths: Sequence[Path]):
372
- """Context manager to prepend entries to sys.path temporarily."""
373
-
374
- @contextlib.contextmanager
375
- def _manager() -> Iterator[None]:
376
- added: list[str] = []
377
- for p in paths:
378
- try:
379
- resolved = str(p.resolve())
380
- except Exception:
381
- continue
382
- if resolved in sys.path:
383
- continue
384
- sys.path.insert(0, resolved)
385
- added.append(resolved)
386
- try:
387
- yield None
388
- finally:
389
- for entry in added:
390
- with contextlib.suppress(ValueError):
391
- sys.path.remove(entry)
392
-
393
- return _manager()
394
-
395
-
396
- def _possible_module_names(
397
- path: Path, module_search_roots: Sequence[Path]
398
- ) -> list[tuple[str, Path]]:
399
- """Return potential module names based on candidate roots."""
400
-
401
- candidates: list[tuple[str, Path]] = []
402
- for root in module_search_roots:
403
- try:
404
- resolved_root = root.resolve()
405
- except Exception:
406
- continue
407
- if not resolved_root.exists() or not path.is_relative_to(resolved_root):
408
- continue
409
- relative = path.relative_to(resolved_root)
410
- stem = relative.with_suffix("")
411
- parts = list(stem.parts)
412
- if not parts:
413
- continue
414
- module_name = ".".join(parts)
415
- if module_name:
416
- candidates.append((module_name, resolved_root))
417
- return candidates
418
-
419
-
420
- def _ensure_parent_namespace(module_name: str, search_root: Path) -> None:
421
- """Ensure namespace packages exist for dotted module names."""
422
-
423
- parts = module_name.split(".")
424
- for depth in range(1, len(parts)):
425
- parent_name = ".".join(parts[:depth])
426
- if parent_name in sys.modules:
427
- continue
428
- parent_module = types.ModuleType(parent_name)
429
- candidate_dir = search_root.joinpath(*parts[:depth])
430
- try:
431
- resolved = candidate_dir.resolve()
432
- except Exception:
433
- resolved = search_root.resolve()
434
- parent_module.__path__ = [str(resolved)] # type: ignore[attr-defined]
435
- sys.modules[parent_name] = parent_module
436
-
437
-
438
- def _should_ignore_path(path: Path) -> bool:
439
- return any(part in DEFAULT_IGNORE_DIRS for part in path.parts)
440
-
441
-
442
- def _candidate_search_roots() -> list[Path]:
443
- """Only search for task apps in the current working directory and subdirectories."""
444
- roots: list[Path] = []
445
-
446
- demo_path = _load_demo_directory()
447
- if demo_path is not None and demo_path.is_dir():
448
- roots.append(demo_path)
449
-
450
- # Allow explicit search paths via environment variable
451
- env_paths = os.environ.get("SYNTH_TASK_APP_SEARCH_PATH")
452
- if env_paths:
453
- for chunk in env_paths.split(os.pathsep):
454
- if chunk:
455
- roots.append(Path(chunk).expanduser())
456
-
457
- # Always include current working directory
458
- cwd = Path.cwd().resolve()
459
- roots.append(cwd)
460
-
461
- for rel in DEFAULT_SEARCH_RELATIVE:
462
- try:
463
- candidate = (cwd / rel).resolve()
464
- except Exception:
465
- continue
466
- roots.append(candidate)
467
-
468
- # Remove duplicates while preserving order
469
- seen: set[Path] = set()
470
- ordered: list[Path] = []
471
- for root in roots:
472
- try:
473
- resolved = root.resolve()
474
- except Exception:
475
- continue
476
- if resolved in seen or not resolved.exists():
477
- continue
478
- seen.add(resolved)
479
- ordered.append(resolved)
480
- return ordered
481
-
482
-
483
- def _eval_config_sort_key(path: Path) -> tuple[int, int, int, str]:
484
- name = path.name.lower()
485
- parent_names = {p.name.lower() for p in path.parents}
486
- in_configs = 0 if "configs" in parent_names else 1
487
- in_examples = 0 if "examples" in parent_names else 1
488
- starts_eval = 0 if name.startswith("eval") else 1
489
- return (in_configs, in_examples, starts_eval, str(path))
490
-
491
-
492
- def _discover_eval_config_paths() -> list[Path]:
493
- """Find candidate eval TOML files near the current working directory."""
494
-
495
- candidates: list[Path] = []
496
- seen: set[Path] = set()
497
- search_roots = _candidate_search_roots()
498
- for root in search_roots:
499
- if not root.exists() or not root.is_dir():
500
- continue
501
- try:
502
- root = root.resolve()
503
- except Exception:
504
- continue
505
- for path in root.rglob("*.toml"):
506
- if not path.is_file():
507
- continue
508
- if _should_ignore_path(path):
509
- continue
510
- name_lower = path.name.lower()
511
- if "eval" not in name_lower and "evaluation" not in name_lower:
512
- continue
513
- try:
514
- resolved = path.resolve()
515
- except Exception:
516
- continue
517
- if resolved in seen:
518
- continue
519
- seen.add(resolved)
520
- candidates.append(resolved)
521
-
522
- candidates.sort(key=_eval_config_sort_key)
523
- return candidates
524
-
525
-
526
- class _TaskAppConfigVisitor(ast.NodeVisitor):
527
- def __init__(self) -> None:
528
- self.matches: list[tuple[str, int]] = []
529
-
530
- def visit_Call(self, node: ast.Call) -> None: # noqa: D401
531
- if _is_task_app_config_call(node):
532
- app_id = _extract_app_id(node)
533
- if app_id:
534
- self.matches.append((app_id, getattr(node, "lineno", 0)))
535
- elif _is_register_task_app_call(node):
536
- app_id = _extract_register_app_id(node)
537
- if app_id:
538
- self.matches.append((app_id, getattr(node, "lineno", 0)))
539
- self.generic_visit(node)
540
-
541
-
542
- def _is_task_app_config_call(node: ast.Call) -> bool:
543
- func = node.func
544
- return (isinstance(func, ast.Name) and func.id == "TaskAppConfig") or (
545
- isinstance(func, ast.Attribute) and func.attr == "TaskAppConfig"
546
- )
547
-
548
-
549
- def _extract_app_id(node: ast.Call) -> str | None:
550
- for kw in node.keywords:
551
- if (
552
- kw.arg == "app_id"
553
- and isinstance(kw.value, ast.Constant)
554
- and isinstance(kw.value.value, str)
555
- ):
556
- return kw.value.value
557
- if node.args:
558
- first = node.args[0]
559
- if isinstance(first, ast.Constant) and isinstance(first.value, str):
560
- return first.value
561
- return None
562
-
563
-
564
- def _is_register_task_app_call(node: ast.Call) -> bool:
565
- func = node.func
566
- return (isinstance(func, ast.Name) and func.id == "register_task_app") or (
567
- isinstance(func, ast.Attribute) and func.attr == "register_task_app"
568
- )
569
-
570
-
571
- def _extract_register_app_id(node: ast.Call) -> str | None:
572
- # Look for entry=TaskAppEntry(app_id="...", ...)
573
- for kw in node.keywords:
574
- if kw.arg == "entry" and isinstance(kw.value, ast.Call):
575
- entry_call = kw.value
576
- if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
577
- for entry_kw in entry_call.keywords:
578
- if (
579
- entry_kw.arg == "app_id"
580
- and isinstance(entry_kw.value, ast.Constant)
581
- and isinstance(entry_kw.value.value, str)
582
- ):
583
- return entry_kw.value.value
584
- return None
585
-
586
-
587
- class _ModalAppVisitor(ast.NodeVisitor):
588
- def __init__(self) -> None:
589
- self.app_aliases: set[str] = set()
590
- self.modal_aliases: set[str] = set()
591
- self.matches: list[tuple[str, int]] = []
592
-
593
- def visit_ImportFrom(self, node: ast.ImportFrom) -> None: # noqa: D401
594
- if node.module == "modal":
595
- for alias in node.names:
596
- if alias.name == "App":
597
- self.app_aliases.add(alias.asname or alias.name)
598
- self.generic_visit(node)
599
-
600
- def visit_Import(self, node: ast.Import) -> None: # noqa: D401
601
- for alias in node.names:
602
- if alias.name == "modal":
603
- self.modal_aliases.add(alias.asname or alias.name)
604
- self.generic_visit(node)
605
-
606
- def visit_Call(self, node: ast.Call) -> None: # noqa: D401
607
- func = node.func
608
- if isinstance(func, ast.Name) and func.id in self.app_aliases:
609
- name = _extract_modal_app_name(node)
610
- if name:
611
- self.matches.append((name, getattr(node, "lineno", 0)))
612
- elif isinstance(func, ast.Attribute):
613
- if (
614
- isinstance(func.value, ast.Name)
615
- and func.value.id in self.modal_aliases
616
- and func.attr == "App"
617
- ):
618
- name = _extract_modal_app_name(node)
619
- if name:
620
- self.matches.append((name, getattr(node, "lineno", 0)))
621
- self.generic_visit(node)
622
-
623
-
624
- def _extract_modal_app_name(node: ast.Call) -> str | None:
625
- for kw in node.keywords:
626
- if (
627
- kw.arg in {"name", "app_name"}
628
- and isinstance(kw.value, ast.Constant)
629
- and isinstance(kw.value.value, str)
630
- ):
631
- return kw.value.value
632
- if node.args:
633
- first = node.args[0]
634
- if isinstance(first, ast.Constant) and isinstance(first.value, str):
635
- return first.value
636
- return None
637
-
638
-
639
- def _collect_task_app_choices() -> list[AppChoice]:
640
- # Clear registry to avoid duplicate registration errors
641
- registry.clear()
642
-
643
- choices: list[AppChoice] = []
644
- with contextlib.suppress(Exception):
645
- _maybe_import("synth_ai.demos.demo_task_apps")
646
- # Only use discovered task apps, not registered ones (since we moved them to examples)
647
- choices.extend(_collect_scanned_task_configs())
648
- choices.extend(_collect_modal_scripts())
649
-
650
- unique: dict[tuple[str, Path], AppChoice] = {}
651
- ordered: list[AppChoice] = []
652
- for choice in choices:
653
- key = (choice.app_id, choice.path.resolve())
654
- if key in unique:
655
- existing = unique[key]
656
- if existing.source == "registered" and choice.source != "registered":
657
- continue
658
- if choice.source == "registered" and existing.source != "registered":
659
- unique[key] = choice
660
- idx = ordered.index(existing)
661
- ordered[idx] = choice
662
- continue
663
- unique[key] = choice
664
- ordered.append(choice)
665
- ordered.sort(key=_app_choice_sort_key)
666
- return ordered
667
-
668
-
669
- def _collect_registered_choices() -> list[AppChoice]:
670
- result: list[AppChoice] = []
671
- for entry in registry.list():
672
- module_name = entry.config_factory.__module__
673
- module = sys.modules.get(module_name)
674
- if module is None:
675
- module = importlib.import_module(module_name)
676
- module_file = getattr(module, "__file__", None)
677
- path = Path(module_file).resolve() if module_file else REPO_ROOT
678
- result.append(
679
- AppChoice(
680
- app_id=entry.app_id,
681
- label=entry.app_id,
682
- path=path,
683
- source="registered",
684
- description=entry.description,
685
- aliases=tuple(entry.aliases),
686
- entry=entry,
687
- )
688
- )
689
- return result
690
-
691
-
692
- def _collect_scanned_task_configs() -> list[AppChoice]:
693
- results: list[AppChoice] = []
694
- seen: set[tuple[str, Path]] = set()
695
- for root in _candidate_search_roots():
696
- try:
697
- root_resolved = root.resolve()
698
- except Exception:
699
- continue
700
- if not root.exists() or not root.is_dir():
701
- continue
702
- for path in root.rglob("*.py"):
703
- if not path.is_file():
704
- continue
705
- if _should_ignore_path(path):
706
- continue
707
- try:
708
- source = path.read_text(encoding="utf-8")
709
- except Exception:
710
- continue
711
- try:
712
- tree = ast.parse(source, filename=str(path))
713
- except SyntaxError:
714
- continue
715
- visitor = _TaskAppConfigVisitor()
716
- visitor.visit(tree)
717
- for app_id, lineno in visitor.matches:
718
- key = (app_id, path.resolve())
719
- if key in seen:
720
- continue
721
- seen.add(key)
722
- results.append(
723
- AppChoice(
724
- app_id=app_id,
725
- label=app_id,
726
- path=path.resolve(),
727
- source="discovered",
728
- description=f"TaskAppConfig in {path.name} (line {lineno})",
729
- entry_loader=lambda p=path.resolve(),
730
- a=app_id,
731
- roots=(root_resolved,): _load_entry_from_path(
732
- p, a, module_search_roots=roots
733
- ),
734
- lineno=lineno,
735
- )
736
- )
737
- return results
738
-
739
-
740
- def _collect_modal_scripts() -> list[AppChoice]:
741
- results: list[AppChoice] = []
742
- seen: set[tuple[str, Path]] = set()
743
- for root in _candidate_search_roots():
744
- if not root.exists() or not root.is_dir():
745
- continue
746
- for path in root.rglob("*.py"):
747
- if not path.is_file():
748
- continue
749
- if _should_ignore_path(path):
750
- continue
751
- try:
752
- source = path.read_text(encoding="utf-8")
753
- except Exception:
754
- continue
755
- try:
756
- tree = ast.parse(source, filename=str(path))
757
- except SyntaxError:
758
- continue
759
- visitor = _ModalAppVisitor()
760
- visitor.visit(tree)
761
- for app_name, lineno in visitor.matches:
762
- key = (app_name, path.resolve())
763
- if key in seen:
764
- continue
765
- seen.add(key)
766
- results.append(
767
- AppChoice(
768
- app_id=app_name,
769
- label=app_name,
770
- path=path.resolve(),
771
- source="modal-script",
772
- description=f"Modal App '{app_name}' in {path.name} (line {lineno})",
773
- modal_script=path.resolve(),
774
- lineno=lineno,
775
- )
776
- )
777
- return results
778
-
779
-
780
- def _app_choice_sort_key(choice: AppChoice) -> tuple[int, int, int, int, int, str, str]:
781
- """Ranking heuristic so wrapper-style task apps surface first."""
782
-
783
- # Prioritize apps in the current working directory (demo or otherwise)
784
- cwd_rank = 1
785
- try:
786
- cwd = Path.cwd().resolve()
787
- if choice.path.is_relative_to(cwd):
788
- # Check if this is directly in CWD (not in subdirectories like examples/)
789
- try:
790
- rel_path = choice.path.relative_to(cwd)
791
- # If it's in the immediate directory or one level deep, prioritize it
792
- if len(rel_path.parts) <= 2:
793
- cwd_rank = 0
794
- except Exception:
795
- pass
796
- except Exception:
797
- pass
798
-
799
- # Further prioritize apps in the demo directory if one is set
800
- demo_rank = 1
801
- demo_dir = _load_demo_directory()
802
- if demo_dir and choice.path.is_relative_to(demo_dir):
803
- demo_rank = 0
804
-
805
- modal_rank = 1 if choice.modal_script else 0
806
-
807
- name = choice.path.name.lower()
808
- file_rank = 3
809
- if name.endswith("_task_app.py") or name.endswith("task_app.py"):
810
- file_rank = 0
811
- elif name.endswith("_app.py") or "task_app" in name:
812
- file_rank = 1
813
- elif name.endswith(".py"):
814
- file_rank = 2
815
-
816
- directory_rank = 0 if choice.path.parent.name.lower() in {"task_app", "task_apps"} else 1
817
-
818
- return (
819
- demo_rank,
820
- cwd_rank,
821
- modal_rank,
822
- file_rank,
823
- directory_rank,
824
- choice.app_id,
825
- str(choice.path),
826
- )
827
-
828
-
829
- def _choice_matches_identifier(choice: AppChoice, identifier: str) -> bool:
830
- ident = identifier.strip()
831
- if not ident:
832
- return False
833
- return ident == choice.app_id or ident == choice.label or ident in choice.aliases
834
-
835
-
836
- def _choice_has_modal_support(choice: AppChoice) -> bool:
837
- if choice.modal_script:
838
- return True
839
- try:
840
- entry = choice.ensure_entry()
841
- except click.ClickException:
842
- # If we can't load the entry, try to detect Modal support via AST parsing
843
- return _has_modal_support_in_file(choice.path)
844
- return entry.modal is not None
845
-
846
-
847
- def _has_modal_support_in_file(path: Path) -> bool:
848
- """Detect if a file has Modal deployment support by parsing the AST."""
849
- try:
850
- source = path.read_text(encoding="utf-8")
851
- tree = ast.parse(source, filename=str(path))
852
-
853
- # Look for ModalDeploymentConfig in register_task_app calls
854
- for node in ast.walk(tree):
855
- if isinstance(node, ast.Call) and _is_register_task_app_call(node):
856
- # Check if the entry has modal=ModalDeploymentConfig(...)
857
- for kw in node.keywords:
858
- if kw.arg == "entry" and isinstance(kw.value, ast.Call):
859
- entry_call = kw.value
860
- if (
861
- isinstance(entry_call.func, ast.Name)
862
- and entry_call.func.id == "TaskAppEntry"
863
- ):
864
- for entry_kw in entry_call.keywords:
865
- if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
866
- modal_call = entry_kw.value
867
- if (
868
- isinstance(modal_call.func, ast.Name)
869
- and modal_call.func.id == "ModalDeploymentConfig"
870
- ):
871
- return True
872
- except Exception:
873
- pass
874
- return False
875
-
876
-
877
- def _extract_modal_config_from_file(path: Path) -> ModalDeploymentConfigType | None:
878
- """Extract ModalDeploymentConfig from a file by parsing the AST."""
879
- try:
880
- source = path.read_text(encoding="utf-8")
881
- tree = ast.parse(source, filename=str(path))
882
-
883
- # Look for ModalDeploymentConfig in register_task_app calls
884
- for node in ast.walk(tree):
885
- if isinstance(node, ast.Call) and _is_register_task_app_call(node):
886
- # Check if the entry has modal=ModalDeploymentConfig(...)
887
- for kw in node.keywords:
888
- if kw.arg == "entry" and isinstance(kw.value, ast.Call):
889
- entry_call = kw.value
890
- if (
891
- isinstance(entry_call.func, ast.Name)
892
- and entry_call.func.id == "TaskAppEntry"
893
- ):
894
- for entry_kw in entry_call.keywords:
895
- if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
896
- modal_call = entry_kw.value
897
- if (
898
- isinstance(modal_call.func, ast.Name)
899
- and modal_call.func.id == "ModalDeploymentConfig"
900
- ):
901
- # Extract the arguments to ModalDeploymentConfig
902
- return _build_modal_config_from_ast(modal_call)
903
- except Exception:
904
- pass
905
- return None
906
-
907
-
908
- def _build_modal_config_from_ast(modal_call: ast.Call) -> ModalDeploymentConfigType | None:
909
- """Build a ModalDeploymentConfig from an AST Call node."""
910
- try:
911
- # Extract keyword arguments
912
- kwargs = {}
913
- for kw in modal_call.keywords:
914
- if kw.arg and isinstance(kw.value, ast.Constant):
915
- kwargs[kw.arg] = kw.value.value
916
- elif kw.arg == "pip_packages" and isinstance(kw.value, (ast.List, ast.Tuple)):
917
- # Handle pip_packages list/tuple
918
- packages: list[str] = []
919
- value_node = kw.value
920
- if isinstance(value_node, (ast.List, ast.Tuple)):
921
- for elt in value_node.elts:
922
- if isinstance(elt, ast.Constant):
923
- packages.append(elt.value)
924
- kwargs[kw.arg] = tuple(packages)
925
- elif kw.arg == "extra_local_dirs" and isinstance(kw.value, (ast.List, ast.Tuple)):
926
- # Handle extra_local_dirs list/tuple of tuples
927
- dirs = []
928
- value_node = kw.value
929
- if isinstance(value_node, (ast.List, ast.Tuple)):
930
- for elt in value_node.elts:
931
- if isinstance(elt, (ast.List, ast.Tuple)) and len(elt.elts) == 2:
932
- src = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
933
- dst = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
934
- if src and dst:
935
- dirs.append((src, dst))
936
- kwargs[kw.arg] = tuple(dirs)
937
- elif kw.arg == "secret_names" and isinstance(kw.value, (ast.List, ast.Tuple)):
938
- # Handle secret_names list/tuple
939
- secrets = []
940
- value_node = kw.value
941
- if isinstance(value_node, (ast.List, ast.Tuple)):
942
- for elt in value_node.elts:
943
- if isinstance(elt, ast.Constant):
944
- secrets.append(elt.value)
945
- kwargs[kw.arg] = tuple(secrets)
946
- elif kw.arg == "volume_mounts" and isinstance(kw.value, (ast.List, ast.Tuple)):
947
- # Handle volume_mounts list/tuple of tuples
948
- mounts = []
949
- value_node = kw.value
950
- if isinstance(value_node, (ast.List, ast.Tuple)):
951
- for elt in value_node.elts:
952
- if isinstance(elt, (ast.List, ast.Tuple)) and len(elt.elts) == 2:
953
- name = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
954
- mount = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
955
- if name and mount:
956
- mounts.append((name, mount))
957
- kwargs[kw.arg] = tuple(mounts)
958
-
959
- return ModalDeploymentConfig(**kwargs)
960
- except Exception:
961
- return None
962
-
963
-
964
- def _choice_has_local_support(choice: AppChoice) -> bool:
965
- if choice.modal_script:
966
- return False
967
- try:
968
- choice.ensure_entry()
969
- except click.ClickException:
970
- return False
971
- return True
972
-
973
-
974
- def _format_choice(choice: AppChoice, index: int | None = None) -> str:
975
- prefix = f"[{index}] " if index is not None else ""
976
- # Get file modification timestamp
977
- try:
978
- from datetime import datetime
979
-
980
- mtime = choice.path.stat().st_mtime
981
- modified_str = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
982
- details = f"Modified: {modified_str}"
983
- except Exception:
984
- # Fallback if timestamp unavailable
985
- details = choice.description or "No timestamp available"
986
- # Format: single line with timestamp
987
- main_line = f"{prefix}{choice.app_id} ({choice.source}) – {details}"
988
- return main_line
989
-
990
-
991
- def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
992
- click.echo("Select a task app:")
993
- for idx, choice in enumerate(choices, start=1):
994
- click.echo(_format_choice(choice, idx))
995
- try:
996
- response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
997
- except (Abort, EOFError, KeyboardInterrupt) as exc:
998
- raise click.ClickException("Task app selection cancelled by user") from exc
999
- if not response.isdigit():
1000
- raise click.ClickException("Selection must be a number")
1001
- index = int(response)
1002
- if not 1 <= index <= len(choices):
1003
- raise click.ClickException("Selection out of range")
1004
- return choices[index - 1]
1005
-
1006
-
1007
- def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
1008
- choices = _collect_task_app_choices()
1009
- if purpose in {"serve", "eval"}:
1010
- filtered = [c for c in choices if not c.modal_script]
1011
- elif purpose in {"deploy", "modal-serve"}:
1012
- filtered = []
1013
- for choice in choices:
1014
- if choice.modal_script or _choice_has_modal_support(choice):
1015
- filtered.append(choice)
1016
- else:
1017
- filtered = choices
1018
-
1019
- filtered.sort(key=_app_choice_sort_key)
1020
-
1021
- if not filtered:
1022
- raise click.ClickException("No task apps discovered for this command.")
1023
-
1024
- if app_id:
1025
- matches = [c for c in filtered if _choice_matches_identifier(c, app_id)]
1026
- if not matches:
1027
- available = ", ".join(sorted({c.app_id for c in filtered}))
1028
- raise click.ClickException(f"Task app '{app_id}' not found. Available: {available}")
1029
- exact_matches = [c for c in matches if c.app_id == app_id]
1030
- if len(exact_matches) == 1:
1031
- return exact_matches[0]
1032
- if len(matches) == 1:
1033
- return matches[0]
1034
- # Prefer entries with modal support when required
1035
- if purpose in {"deploy", "modal-serve"}:
1036
- modal_matches = [c for c in matches if _choice_has_modal_support(c)]
1037
- if len(modal_matches) == 1:
1038
- return modal_matches[0]
1039
- if modal_matches:
1040
- matches = modal_matches
1041
- return _prompt_user_for_choice(matches)
1042
-
1043
- if len(filtered) == 1:
1044
- choice = filtered[0]
1045
- click.echo(_format_choice(choice))
1046
- return choice
1047
-
1048
- return _prompt_user_for_choice(filtered)
1049
-
1050
-
1051
- def _import_task_app_module(
1052
- resolved: Path,
1053
- module_name: str,
1054
- *,
1055
- namespace_root: Path | None,
1056
- sys_path_roots: Sequence[Path],
1057
- ensure_namespace: bool = True,
1058
- ) -> types.ModuleType:
1059
- spec = importlib.util.spec_from_file_location(module_name, str(resolved))
1060
- if spec is None or spec.loader is None:
1061
- raise click.ClickException(f"Unable to load Python module from {resolved}")
1062
-
1063
- module = importlib.util.module_from_spec(spec)
1064
- sys.modules[module_name] = module
1065
-
1066
- with _temporary_sys_path(sys_path_roots):
1067
- if ensure_namespace and namespace_root is not None and "." in module_name:
1068
- _ensure_parent_namespace(module_name, namespace_root)
1069
-
1070
- # Clear registry before importing to avoid duplicate registration errors
1071
- registry.clear()
1072
-
1073
- try:
1074
- spec.loader.exec_module(module)
1075
- except Exception:
1076
- # Remove partially-imported module to avoid reuse
1077
- sys.modules.pop(module_name, None)
1078
- raise
1079
-
1080
- return module
1081
-
1082
-
1083
- @contextlib.contextmanager
1084
- def _safe_import_context() -> Iterator[None]:
1085
- """Guard module imports against argparse/uvicorn side effects."""
1086
-
1087
- original_argv = sys.argv[:]
1088
- sys.argv = [original_argv[0]] if original_argv else ["python"]
1089
-
1090
- parser_cls = argparse.ArgumentParser
1091
- old_parse_args = parser_cls.parse_args
1092
-
1093
- def _parse_noargs(self, args=None, namespace=None): # type: ignore[override]
1094
- if args is None:
1095
- args = []
1096
- if namespace is None:
1097
- namespace = argparse.Namespace()
1098
- try:
1099
- return old_parse_args(self, args, namespace)
1100
- except SystemExit:
1101
- return namespace
1102
-
1103
- parser_cls.parse_args = _parse_noargs # type: ignore[assignment]
1104
-
1105
- uvicorn_run = None
1106
- run_task_app_orig = None
1107
- try:
1108
- import uvicorn # type: ignore
1109
-
1110
- uvicorn_run = uvicorn.run
1111
- uvicorn.run = lambda *args, **kwargs: None # type: ignore[assignment]
1112
- except Exception:
1113
- uvicorn_run = None
1114
-
1115
- try:
1116
- _task_server_patch = cast(
1117
- Any, importlib.import_module("synth_ai.task.server")
1118
- )
1119
- run_task_app_orig = cast(Callable[..., Any], _task_server_patch.run_task_app)
1120
- _task_server_patch.run_task_app = ( # type: ignore[assignment]
1121
- lambda *args, **kwargs: None
1122
- )
1123
- except Exception:
1124
- run_task_app_orig = None
1125
-
1126
- try:
1127
- yield
1128
- finally:
1129
- sys.argv = original_argv
1130
- parser_cls.parse_args = old_parse_args # type: ignore[assignment]
1131
- if uvicorn_run is not None:
1132
- try:
1133
- import uvicorn # type: ignore
1134
-
1135
- uvicorn.run = uvicorn_run # type: ignore[assignment]
1136
- except Exception:
1137
- pass
1138
- if run_task_app_orig is not None:
1139
- try:
1140
- _task_server_patch = cast(
1141
- Any, importlib.import_module("synth_ai.task.server")
1142
- )
1143
- _task_server_patch.run_task_app = run_task_app_orig # type: ignore[assignment]
1144
- except Exception:
1145
- pass
1146
-
1147
-
1148
- def _load_entry_from_path(
1149
- path: Path, app_id: str, module_search_roots: Sequence[Path] | None = None
1150
- ) -> TaskAppEntryType:
1151
- resolved = path.resolve()
1152
- search_roots: list[Path] = []
1153
- seen_roots: set[Path] = set()
1154
-
1155
- def _append_root(candidate: Path) -> None:
1156
- try:
1157
- resolved_root = candidate.resolve()
1158
- except Exception:
1159
- return
1160
- if resolved_root in seen_roots:
1161
- return
1162
- seen_roots.add(resolved_root)
1163
- search_roots.append(resolved_root)
1164
-
1165
- for root in module_search_roots or []:
1166
- _append_root(root)
1167
- _append_root(resolved.parent)
1168
- _append_root(REPO_ROOT)
1169
-
1170
- last_error: Exception | None = None
1171
- module: types.ModuleType | None = None
1172
-
1173
- for module_name, namespace_root in _possible_module_names(resolved, search_roots):
1174
- try:
1175
- with _safe_import_context():
1176
- module = _import_task_app_module(
1177
- resolved,
1178
- module_name,
1179
- namespace_root=namespace_root,
1180
- sys_path_roots=search_roots,
1181
- ensure_namespace=True,
1182
- )
1183
- break
1184
- except Exception as exc: # pragma: no cover - best-effort fallbacks
1185
- last_error = exc
1186
- continue
1187
-
1188
- if module is None:
1189
- hashed_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
1190
- try:
1191
- with _safe_import_context():
1192
- module = _import_task_app_module(
1193
- resolved,
1194
- hashed_name,
1195
- namespace_root=None,
1196
- sys_path_roots=search_roots,
1197
- ensure_namespace=False,
1198
- )
1199
- except Exception as exc: # pragma: no cover - propagate meaningful error
1200
- detail = last_error or exc
1201
- raise click.ClickException(f"Failed to import {resolved}: {detail}") from detail
1202
-
1203
- config_obj: TaskAppConfigType | None = None
1204
- factory_callable: Callable[[], TaskAppConfigType] | None = None
1205
-
1206
- for attr_name in dir(module):
1207
- try:
1208
- attr = getattr(module, attr_name)
1209
- except Exception:
1210
- continue
1211
- if isinstance(attr, TaskAppConfig) and attr.app_id == app_id:
1212
- config_obj = attr
1213
-
1214
- def _return_config(cfg: TaskAppConfigType = attr) -> TaskAppConfigType:
1215
- return cfg
1216
-
1217
- factory_callable = _return_config
1218
- break
1219
-
1220
- if factory_callable is None:
1221
- for attr_name in dir(module):
1222
- if attr_name.startswith("_"):
1223
- continue
1224
- try:
1225
- attr = getattr(module, attr_name)
1226
- except Exception:
1227
- continue
1228
- if not callable(attr):
1229
- continue
1230
- try:
1231
- sig = inspect.signature(attr)
1232
- except (TypeError, ValueError):
1233
- continue
1234
- has_required = False
1235
- for param in sig.parameters.values():
1236
- if (
1237
- param.kind
1238
- in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
1239
- and param.default is inspect._empty
1240
- ):
1241
- has_required = True
1242
- break
1243
- if has_required:
1244
- continue
1245
- try:
1246
- with _safe_import_context():
1247
- result = attr()
1248
- except SystemExit:
1249
- continue
1250
- except Exception:
1251
- continue
1252
- if isinstance(result, TaskAppConfig) and result.app_id == app_id:
1253
- # Bind attr to a local and close over it without exposing parameters
1254
- bound_func: Callable[[], TaskAppConfig] = cast(Callable[[], TaskAppConfig], attr) # type: ignore[assignment]
1255
-
1256
- def _factory_noargs(
1257
- func: Callable[[], TaskAppConfigType] = bound_func,
1258
- ) -> TaskAppConfigType:
1259
- return func()
1260
-
1261
- factory_callable = _factory_noargs
1262
- config_obj = result
1263
- break
1264
-
1265
- # If no TaskAppConfig found directly, check if it was registered via register_task_app
1266
- if factory_callable is None or config_obj is None:
1267
- try:
1268
- # Check if the app was registered in the registry
1269
- entry = registry.get(app_id)
1270
- return entry
1271
- except KeyError as exc:
1272
- raise click.ClickException(
1273
- f"Could not locate TaskAppConfig for '{app_id}' in {resolved}."
1274
- ) from exc
1275
-
1276
- modal_cfg: ModalDeploymentConfigType | None = None
1277
- for attr_name in dir(module):
1278
- try:
1279
- attr = getattr(module, attr_name)
1280
- except Exception:
1281
- continue
1282
- if isinstance(attr, ModalDeploymentConfig):
1283
- modal_cfg = attr
1284
- break
1285
-
1286
- # If no ModalDeploymentConfig found, try to detect it via AST parsing
1287
- if modal_cfg is None:
1288
- modal_cfg = _extract_modal_config_from_file(resolved)
1289
-
1290
- description = inspect.getdoc(module) or f"Discovered task app in {resolved.name}"
1291
- env_files: Iterable[str] = getattr(module, "ENV_FILES", ()) # type: ignore[arg-type]
1292
-
1293
- entry = TaskAppEntry(
1294
- app_id=app_id,
1295
- description=description,
1296
- config_factory=factory_callable,
1297
- aliases=(),
1298
- env_files=tuple(str(Path(p)) for p in env_files if p),
1299
- modal=modal_cfg,
1300
- )
1301
- return entry
1302
-
1303
-
1304
- def _resolve_env_paths_for_script(script_path: Path, explicit: Sequence[str]) -> list[Path]:
1305
- if explicit:
1306
- resolved: list[Path] = []
1307
- for candidate in explicit:
1308
- p = Path(candidate).expanduser()
1309
- if not p.exists():
1310
- raise click.ClickException(f"Env file not found: {p}")
1311
- resolved.append(p)
1312
- return resolved
1313
-
1314
- # Always prompt for env file selection instead of auto-loading defaults
1315
- script_dir = script_path.parent.resolve()
1316
- cwd = Path.cwd()
1317
-
1318
- # Look for env files in current working directory first, then repo root
1319
- env_candidates = []
1320
-
1321
- # Add CWD env files first (prioritized)
1322
- cwd_env_files = sorted(cwd.glob("**/*.env"))
1323
- env_candidates.extend(cwd_env_files)
1324
-
1325
- # Add repo root env files
1326
- repo_env_files = sorted(REPO_ROOT.glob("**/*.env"))
1327
- # Avoid duplicates
1328
- for repo_file in repo_env_files:
1329
- if repo_file not in env_candidates:
1330
- env_candidates.append(repo_file)
1331
-
1332
- if not env_candidates:
1333
- created = _interactive_create_env(script_dir)
1334
- if created is None:
1335
- raise click.ClickException("Env file required (--env-file) for this task app")
1336
- return [created]
1337
-
1338
- click.echo("Select env file to load:")
1339
- for idx, path in enumerate(env_candidates, start=1):
1340
- click.echo(f" {idx}) {path.resolve()}")
1341
- choice = click.prompt("Enter choice", type=click.IntRange(1, len(env_candidates)), default=1)
1342
- return [env_candidates[choice - 1]]
1343
-
1344
-
1345
- def _path_is_within(child: Path, parent: Path) -> bool:
1346
- try:
1347
- child.resolve().relative_to(parent.resolve())
1348
- return True
1349
- except Exception:
1350
- return False
1351
-
1352
-
1353
- @functools.lru_cache(maxsize=16)
1354
- def _is_modal_shim(path_str: str) -> bool:
1355
- """Return True if the candidate CLI path refers to the synth-ai shim."""
1356
-
1357
- path = Path(path_str)
1358
- try:
1359
- resolved = path.resolve(strict=True)
1360
- except Exception:
1361
- resolved = path
1362
-
1363
- if not resolved.exists() or resolved.is_dir():
1364
- return False
1365
-
1366
- snippet = ""
1367
- try:
1368
- snippet = resolved.read_bytes()[:4096].decode("utf-8", errors="ignore")
1369
- except Exception:
1370
- snippet = ""
1371
-
1372
- shim_markers = (
1373
- "synth_ai.cli._modal_wrapper",
1374
- "from modal.__main__ import main",
1375
- "import modal.__main__",
1376
- "run_module('modal.__main__'",
1377
- )
1378
- if snippet and any(marker in snippet for marker in shim_markers):
1379
- return True
1380
-
1381
- try:
1382
- size = resolved.stat().st_size
1383
- except Exception:
1384
- size = None
1385
-
1386
- if (
1387
- size is not None
1388
- and size < 2048
1389
- and "python" in (snippet.splitlines() or [""])[0]
1390
- and (
1391
- "modal.__main__" in snippet
1392
- or "modal.__main__" in snippet.replace(" ", "")
1393
- )
1394
- ):
1395
- return True
1396
-
1397
- virtual_env = os.environ.get("VIRTUAL_ENV")
1398
- if virtual_env and _path_is_within(resolved, Path(virtual_env)):
1399
- return True
1400
-
1401
- if _path_is_within(resolved, REPO_ROOT):
1402
- return True
1403
-
1404
- uv_tools_dir = Path.home() / ".local" / "share" / "uv" / "tools"
1405
- return uv_tools_dir.exists() and _path_is_within(resolved, uv_tools_dir)
1406
-
1407
-
1408
- def _find_modal_executable(modal_cli: str) -> tuple[str | None, str | None]:
1409
- """Return the first non-shim executable and the first shim discovered on PATH."""
1410
-
1411
- if not modal_cli:
1412
- modal_cli = "modal"
1413
-
1414
- candidate_path = Path(modal_cli).expanduser()
1415
- if candidate_path.is_absolute() or len(candidate_path.parts) > 1:
1416
- resolved_candidate = candidate_path
1417
- if not resolved_candidate.is_absolute():
1418
- resolved_candidate = (Path.cwd() / resolved_candidate).resolve()
1419
- else:
1420
- resolved_candidate = resolved_candidate.resolve()
1421
- if not resolved_candidate.exists():
1422
- raise click.ClickException(f"--modal-cli path does not exist: {resolved_candidate}")
1423
- if not os.access(resolved_candidate, os.X_OK):
1424
- raise click.ClickException(f"--modal-cli is not executable: {resolved_candidate}")
1425
- return str(resolved_candidate), None
1426
-
1427
- path_env = os.environ.get("PATH", "")
1428
- if not path_env:
1429
- return None, None
1430
-
1431
- seen_dirs: set[str] = set()
1432
- seen_candidates: set[str] = set()
1433
- shim_path: str | None = None
1434
-
1435
- for raw_entry in path_env.split(os.pathsep):
1436
- if not raw_entry:
1437
- continue
1438
- try:
1439
- resolved_entry = str(Path(raw_entry).resolve())
1440
- except Exception:
1441
- resolved_entry = os.path.normpath(raw_entry)
1442
- if resolved_entry in seen_dirs:
1443
- continue
1444
- seen_dirs.add(resolved_entry)
1445
-
1446
- candidate = shutil.which(modal_cli, path=raw_entry)
1447
- if candidate is None:
1448
- continue
1449
- if candidate in seen_candidates:
1450
- continue
1451
- seen_candidates.add(candidate)
1452
-
1453
- if _is_modal_shim(candidate):
1454
- if shim_path is None:
1455
- shim_path = candidate
1456
- continue
1457
- return candidate, shim_path
1458
-
1459
- return None, shim_path
1460
-
1461
-
1462
- def _modal_command_prefix(modal_cli: str) -> list[str]:
1463
- """Resolve a command prefix for invoking the Modal CLI within the active environment."""
1464
-
1465
- force_wrapper_env = os.environ.get("SYNTH_FORCE_MODAL_WRAPPER", "").strip().lower()
1466
- if force_wrapper_env in {"1", "true", "yes"}:
1467
- click.secho(
1468
- "[modal-prefix] SYNTH_FORCE_MODAL_WRAPPER=1 -> using in-process wrapper",
1469
- fg="yellow",
1470
- )
1471
- return [sys.executable, "-m", "synth_ai.cli._modal_wrapper"]
1472
-
1473
- lookup = modal_cli or "modal"
1474
- spec = importlib.util.find_spec("modal") if lookup == "modal" else None
1475
-
1476
- preferred, shim_candidate = _find_modal_executable(lookup)
1477
- if preferred is not None:
1478
- detail = f"[modal-prefix] modal_cli={lookup} selected={preferred}"
1479
- if lookup == "modal":
1480
- detail += f" spec={'yes' if spec else 'no'}"
1481
- click.secho(detail, fg="cyan")
1482
- return [preferred]
1483
-
1484
- if lookup != "modal":
1485
- raise click.ClickException(f"Modal CLI not found (looked for '{lookup}')")
1486
-
1487
- if spec is not None:
1488
- warning = "[modal-prefix] Using synth-ai modal shim; pass --modal-cli /path/to/modal to override."
1489
- if shim_candidate is not None:
1490
- warning = (
1491
- f"[modal-prefix] Using synth-ai modal shim at {shim_candidate}; "
1492
- "pass --modal-cli /path/to/modal to override."
1493
- )
1494
- click.secho(warning, fg="yellow")
1495
- click.secho(
1496
- "[modal-prefix] modal_cli=modal selected=module-wrapper spec=yes",
1497
- fg="yellow",
1498
- )
1499
- return [sys.executable, "-m", "synth_ai.cli._modal_wrapper"]
1500
-
1501
- if shim_candidate is not None:
1502
- raise click.ClickException(
1503
- "Modal CLI resolution found the synth-ai shim but the 'modal' package "
1504
- "is not importable in this environment. Install the official Modal CLI "
1505
- "or pass --modal-cli with its path."
1506
- )
1507
-
1508
- raise click.ClickException(
1509
- "Modal CLI not found. Install the 'modal' package in this environment or pass "
1510
- "--modal-cli with an explicit path."
1511
- )
1512
-
1513
-
1514
- def _build_modal_app_wrapper(original_script: Path) -> tuple[Path, Path]:
1515
- source_dir = original_script.parent.resolve()
1516
- repo_root = REPO_ROOT
1517
- temp_root = Path(tempfile.mkdtemp(prefix="synth_modal_app_"))
1518
-
1519
- wrapper_source = textwrap.dedent(
1520
- f"""
1521
- from importlib import util as _util
1522
- from pathlib import Path as _Path
1523
- import sys as _sys
1524
-
1525
- _source_dir = _Path({str(source_dir)!r}).resolve()
1526
- _module_path = _source_dir / {original_script.name!r}
1527
- _package_name = _source_dir.name
1528
- _repo_root = _Path({str(repo_root)!r}).resolve()
1529
- _synth_dir = _repo_root / "synth_ai"
1530
-
1531
- for _path in (str(_source_dir), str(_source_dir.parent), str(_repo_root)):
1532
- if _path not in _sys.path:
1533
- _sys.path.insert(0, _path)
1534
-
1535
- _spec = _util.spec_from_file_location("_synth_modal_target", str(_module_path))
1536
- if _spec is None or _spec.loader is None:
1537
- raise SystemExit("Unable to load modal task app from {original_script}")
1538
- _module = _util.module_from_spec(_spec)
1539
- _sys.modules.setdefault("_synth_modal_target", _module)
1540
- _spec.loader.exec_module(_module)
1541
-
1542
- try:
1543
- from modal import App as _ModalApp
1544
- from modal import Image as _ModalImage
1545
- except Exception:
1546
- _ModalApp = None # type: ignore[assignment]
1547
- _ModalImage = None # type: ignore[assignment]
1548
-
1549
- def _apply_local_mounts(image):
1550
- if _ModalImage is None or not isinstance(image, _ModalImage):
1551
- return image
1552
- mounts = [
1553
- (str(_source_dir), f"/root/{{_package_name}}"),
1554
- (str(_synth_dir), "/root/synth_ai"),
1555
- ]
1556
- for local_path, remote_path in mounts:
1557
- try:
1558
- image = image.add_local_dir(local_path, remote_path=remote_path)
1559
- except Exception:
1560
- pass
1561
- return image
1562
-
1563
- if hasattr(_module, "image"):
1564
- _module.image = _apply_local_mounts(getattr(_module, "image"))
1565
-
1566
- _candidate = getattr(_module, "app", None)
1567
- if _ModalApp is None or not isinstance(_candidate, _ModalApp):
1568
- candidate_modal_app = getattr(_module, "modal_app", None)
1569
- if _ModalApp is not None and isinstance(candidate_modal_app, _ModalApp):
1570
- _candidate = candidate_modal_app
1571
- setattr(_module, "app", _candidate)
1572
-
1573
- if _ModalApp is not None and not isinstance(_candidate, _ModalApp):
1574
- raise SystemExit(
1575
- "Modal task app must expose an 'app = modal.App(...)' (or modal_app) attribute."
1576
- )
1577
-
1578
- for remote_path in ("/root/synth_ai", f"/root/{{_package_name}}"):
1579
- if remote_path not in _sys.path:
1580
- _sys.path.insert(0, remote_path)
1581
-
1582
- globals().update({{k: v for k, v in vars(_module).items() if not k.startswith("__")}})
1583
- app = getattr(_module, "app")
1584
- """
1585
- ).strip()
1586
-
1587
- wrapper_path = temp_root / "__modal_wrapper__.py"
1588
- wrapper_path.write_text(wrapper_source + "\n", encoding="utf-8")
1589
- return wrapper_path, temp_root
1590
-
1591
-
1592
-
1593
- def _run_modal_script(
1594
- script_path: Path,
1595
- modal_cli: str,
1596
- command: str,
1597
- env_paths: Sequence[Path],
1598
- *,
1599
- modal_name: str | None = None,
1600
- dry_run: bool = False,
1601
- ) -> None:
1602
- env_paths_list = [Path(p).resolve() for p in env_paths]
1603
- path_strings = [str(p) for p in env_paths_list]
1604
- _load_env_files_into_process(path_strings)
1605
- _ensure_env_values(env_paths_list, script_path.parent)
1606
- _load_env_values(env_paths_list)
1607
- # Ensure ENVIRONMENT_API_KEY is uploaded to backend for this org (matches registry path behavior)
1608
- try:
1609
- _preflight_env_key(env_paths_list, crash_on_failure=True)
1610
- except Exception as _pf_err:
1611
- raise click.ClickException(str(_pf_err)) from _pf_err
1612
-
1613
- proc_env = os.environ.copy()
1614
- pythonpath_entries: list[str] = []
1615
- script_dir = script_path.parent.resolve()
1616
- pythonpath_entries.append(str(script_dir))
1617
- if (script_dir / "__init__.py").exists():
1618
- # Script lives inside a package; ensure the parent package directory is importable.
1619
- pythonpath_entries.append(str(script_dir.parent.resolve()))
1620
- pythonpath_entries.append(str(REPO_ROOT))
1621
- existing_pp = proc_env.get("PYTHONPATH")
1622
- if existing_pp:
1623
- pythonpath_entries.append(existing_pp)
1624
- unique_paths = list(dict.fromkeys(pythonpath_entries))
1625
- proc_env["PYTHONPATH"] = os.pathsep.join(unique_paths)
1626
-
1627
- wrapper_info: tuple[Path, Path] | None = None
1628
- target_script = script_path
1629
- if command in {"serve", "deploy"}:
1630
- wrapper_path, temp_root = _build_modal_app_wrapper(script_path)
1631
- wrapper_info = (wrapper_path, temp_root)
1632
- target_script = wrapper_path
1633
-
1634
- # Ensure the wrapper has access to the Synth AI source for intra-repo imports
1635
- if "PYTHONPATH" in proc_env:
1636
- proc_env["PYTHONPATH"] = os.pathsep.join(
1637
- [str(REPO_ROOT)] + proc_env["PYTHONPATH"].split(os.pathsep)
1638
- )
1639
- else:
1640
- proc_env["PYTHONPATH"] = str(REPO_ROOT)
1641
-
1642
- cmd = [*_modal_command_prefix(modal_cli), command, str(target_script)]
1643
- if modal_name and command == "deploy":
1644
- cmd.extend(["--name", modal_name])
1645
- if dry_run:
1646
- click.echo(
1647
- "Dry run: " + " ".join(shlex.quote(component) for component in cmd),
1648
- err=False,
1649
- )
1650
- return
1651
- click.secho(
1652
- "[modal-exec] " + " ".join(shlex.quote(component) for component in cmd),
1653
- fg="cyan",
1654
- )
1655
- try:
1656
- # Stream output live for better diagnostics
1657
- proc = subprocess.Popen(
1658
- cmd,
1659
- stdout=subprocess.PIPE,
1660
- stderr=subprocess.STDOUT,
1661
- text=True,
1662
- bufsize=1,
1663
- env=proc_env,
1664
- )
1665
- task_app_url = None
1666
- assert proc.stdout is not None
1667
- for line in proc.stdout:
1668
- click.echo(line, nl=False)
1669
- if task_app_url is None and ("modal.run" in line and "=>" in line):
1670
- parts = line.split("=>")
1671
- if len(parts) >= 2:
1672
- task_app_url = parts[-1].strip()
1673
- if task_app_url and env_paths_list:
1674
- env_file = env_paths_list[0]
1675
- _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1676
- click.echo(f"\n✓ Task app URL: {task_app_url}\n")
1677
- rc = proc.wait()
1678
- if rc != 0:
1679
- raise subprocess.CalledProcessError(rc, cmd)
1680
- except subprocess.CalledProcessError as exc:
1681
- raise click.ClickException(
1682
- f"modal {command} failed with exit code {exc.returncode}"
1683
- ) from exc
1684
- finally:
1685
- if wrapper_info is not None:
1686
- wrapper_path, temp_root = wrapper_info
1687
- with contextlib.suppress(Exception):
1688
- wrapper_path.unlink(missing_ok=True)
1689
- shutil.rmtree(temp_root, ignore_errors=True)
1690
-
1691
-
1692
- def _preflight_env_key(env_paths: Sequence[Path] | None = None, *, crash_on_failure: bool = False) -> None:
1693
- try:
1694
- raw_backend = (
1695
- os.environ.get("BACKEND_BASE_URL")
1696
- or os.environ.get("SYNTH_BASE_URL")
1697
- or f"{PROD_BASE_URL_DEFAULT}/api"
1698
- )
1699
- backend_base = raw_backend.rstrip("/")
1700
- if not backend_base.endswith("/api"):
1701
- backend_base = backend_base + "/api"
1702
- synth_key = os.environ.get("SYNTH_API_KEY") or ""
1703
- env_api_key = (
1704
- os.environ.get("ENVIRONMENT_API_KEY") or os.environ.get("DEV_ENVIRONMENT_API_KEY") or ""
1705
- ).strip()
1706
-
1707
- def _preview(value: str) -> str:
1708
- if len(value) <= 10:
1709
- return value
1710
- return f"{value[:6]}...{value[-4:]}"
1711
-
1712
- minted = False
1713
- if not env_api_key:
1714
- secrets_module = _maybe_import("synth_ai.learning.rl.secrets")
1715
- try:
1716
- if secrets_module is None:
1717
- raise RuntimeError("secrets module unavailable")
1718
- mint_env_key = secrets_module.mint_environment_api_key
1719
- env_api_key = mint_env_key()
1720
- os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1721
- os.environ.setdefault("DEV_ENVIRONMENT_API_KEY", env_api_key)
1722
- minted = True
1723
- click.echo(
1724
- f"[preflight] minted ENVIRONMENT_API_KEY ({_preview(env_api_key)})"
1725
- )
1726
- except Exception as mint_err:
1727
- if crash_on_failure:
1728
- raise click.ClickException(
1729
- f"[CRITICAL] Failed to mint ENVIRONMENT_API_KEY: {mint_err}"
1730
- ) from mint_err
1731
- click.echo(
1732
- f"[WARN] Failed to mint ENVIRONMENT_API_KEY automatically ({mint_err}); proceeding without upload"
1733
- )
1734
-
1735
- if env_api_key and not os.environ.get("ENVIRONMENT_API_KEY"):
1736
- os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1737
- if env_api_key and not os.environ.get("DEV_ENVIRONMENT_API_KEY"):
1738
- os.environ["DEV_ENVIRONMENT_API_KEY"] = env_api_key
1739
-
1740
- if minted:
1741
- _persist_env_api_key(env_api_key, env_paths)
1742
-
1743
- if synth_key and env_api_key:
1744
- import base64
1745
-
1746
- import httpx
1747
-
1748
- click.echo(f"[preflight] backend={backend_base}")
1749
- with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}"}) as c:
1750
- click.echo("[preflight] fetching public key…")
1751
- rpk = c.get(f"{backend_base.rstrip('/')}/v1/crypto/public-key")
1752
- pk = (rpk.json() or {}).get("public_key") if rpk.status_code == 200 else None
1753
- if pk:
1754
- try:
1755
- from nacl.public import PublicKey, SealedBox
1756
-
1757
- # Decode public key and build sealed box
1758
- pk_bytes = base64.b64decode(pk, validate=True)
1759
- pub = PublicKey(pk_bytes)
1760
- sb = SealedBox(pub)
1761
-
1762
- # Encrypt plaintext key
1763
- ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode("utf-8"))).decode()
1764
- payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
1765
-
1766
- # Emit diagnostic logging (safe previews + hashes only)
1767
- try:
1768
- import hashlib as _hash
1769
-
1770
- # Backend URL context
1771
- click.echo(f"[preflight] posting to {backend_base.rstrip('/')}/v1/env-keys")
1772
-
1773
- # Public key diagnostics
1774
- pk_sha256 = _hash.sha256(pk_bytes).hexdigest()
1775
- click.echo(
1776
- f"[preflight] public_key: b64_len={len(pk)} sha256={pk_sha256} head={pk[:16]} tail={pk[-16:]}"
1777
- )
1778
-
1779
- # Plaintext diagnostics (never print full secret)
1780
- _plain = env_api_key
1781
- _plen = len(_plain)
1782
- _ppref = (_plain[:6] + "…") if _plen > 10 else _plain
1783
- _psuf = ("…" + _plain[-4:]) if _plen > 10 else ""
1784
- _has_ws = any(ch.isspace() for ch in _plain)
1785
- click.echo(
1786
- f"[preflight] plaintext: len={_plen} preview={_ppref}{_psuf} has_ws={bool(_has_ws)}"
1787
- )
1788
-
1789
- # Ciphertext diagnostics
1790
- try:
1791
- _ct_bytes = base64.b64decode(ct_b64, validate=True)
1792
- _ct_sha256 = _hash.sha256(_ct_bytes).hexdigest()
1793
- click.echo(
1794
- f"[preflight] ciphertext: b64_len={len(ct_b64)} sha256={_ct_sha256} head={ct_b64[:16]} tail={ct_b64[-16:]}"
1795
- )
1796
- except Exception:
1797
- click.echo("[preflight] ciphertext: invalid base64 (unexpected)")
1798
- except Exception:
1799
- # Best-effort logging only
1800
- pass
1801
- with httpx.Client(
1802
- timeout=15.0,
1803
- headers={
1804
- "Authorization": f"Bearer {synth_key}",
1805
- "Content-Type": "application/json",
1806
- },
1807
- ) as c:
1808
- click.echo("[preflight] upserting env key…")
1809
- up = c.post(f"{backend_base.rstrip('/')}/v1/env-keys", json=payload)
1810
- body_snip = ""
1811
- try:
1812
- body_snip = up.text[:400] if up.text else ""
1813
- except Exception:
1814
- body_snip = ""
1815
- click.echo(f"[preflight] upsert status={up.status_code}{(' body='+body_snip) if body_snip else ''}")
1816
-
1817
- # If upload succeeded (2xx), consider it successful even if verification fails
1818
- # This handles cases where verification endpoint has issues
1819
- if 200 <= up.status_code < 300:
1820
- key_preview = (
1821
- _preview(env_api_key)
1822
- )
1823
- click.echo(
1824
- f"✅ ENVIRONMENT_API_KEY uploaded successfully ({key_preview})"
1825
- )
1826
-
1827
- # Try verification, but don't fail if it doesn't work
1828
- click.echo("[preflight] verifying env key presence…")
1829
- try:
1830
- ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
1831
- if ver.status_code == 200 and (ver.json() or {}).get("present"):
1832
- click.echo("✅ Key verified in backend")
1833
- else:
1834
- click.echo(
1835
- f"⚠️ Verification returned {ver.status_code}, but upload succeeded - proceeding"
1836
- )
1837
- except Exception as verify_err:
1838
- click.echo(
1839
- f"⚠️ Verification check failed ({verify_err}), but upload succeeded - proceeding"
1840
- )
1841
- else:
1842
- error_msg = (
1843
- f"ENVIRONMENT_API_KEY upload failed with status {up.status_code}"
1844
- + (f" body={body_snip}" if body_snip else "")
1845
- )
1846
- if crash_on_failure:
1847
- raise click.ClickException(f"[CRITICAL] {error_msg}")
1848
- click.echo(f"[WARN] {error_msg}; proceeding anyway")
1849
- except Exception as e:
1850
- error_msg = f"Failed to encrypt/upload ENVIRONMENT_API_KEY: {e}"
1851
- if crash_on_failure:
1852
- raise click.ClickException(f"[CRITICAL] {error_msg}") from e
1853
- click.echo(f"[WARN] {error_msg}; proceeding anyway")
1854
- except Exception as e:
1855
- error_msg = f"Backend preflight for ENVIRONMENT_API_KEY failed: {e}"
1856
- if crash_on_failure:
1857
- raise click.ClickException(f"[CRITICAL] {error_msg}") from e
1858
- click.echo(f"[WARN] {error_msg}; proceeding anyway")
1859
-
1860
-
1861
- def _run_modal_with_entry(
1862
- entry: TaskAppEntryType,
1863
- modal_cfg: ModalDeploymentConfigType,
1864
- modal_cli: str,
1865
- modal_name: str | None,
1866
- env_paths: list[Path],
1867
- command: str,
1868
- *,
1869
- dry_run: bool = False,
1870
- original_path: Path | None = None,
1871
- ) -> None:
1872
- env_paths_list = [Path(p).resolve() for p in env_paths]
1873
- dotenv_paths = [str(p) for p in env_paths_list]
1874
- _load_env_files_into_process(dotenv_paths)
1875
- fallback_dir = env_paths_list[0].parent if env_paths_list else Path.cwd()
1876
- _ensure_env_values(env_paths_list, fallback_dir)
1877
- _load_env_values(env_paths_list)
1878
- _preflight_env_key(env_paths_list, crash_on_failure=True)
1879
-
1880
- inline_secret_values: dict[str, str] = {}
1881
- env_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
1882
- if env_key:
1883
- inline_secret_values["ENVIRONMENT_API_KEY"] = env_key
1884
- inline_secret_values.setdefault("DEV_ENVIRONMENT_API_KEY", env_key)
1885
- aliases = os.environ.get("ENVIRONMENT_API_KEY_ALIASES", "").strip()
1886
- if aliases:
1887
- inline_secret_values["ENVIRONMENT_API_KEY_ALIASES"] = aliases
1888
- for vendor_key in ("GROQ_API_KEY", "OPENAI_API_KEY"):
1889
- val = os.environ.get(vendor_key, "").strip()
1890
- if val:
1891
- inline_secret_values[vendor_key] = val
1892
-
1893
- if inline_secret_values:
1894
- preview = inline_secret_values.get("ENVIRONMENT_API_KEY", "")
1895
- shown = f"{preview[:6]}...{preview[-4:]}" if preview and len(preview) > 10 else preview
1896
- click.echo(f"[deploy] inline ENVIRONMENT_API_KEY prepared ({shown})")
1897
- else:
1898
- click.echo("[deploy] no inline ENVIRONMENT_API_KEY found; relying on Modal secrets/dotenv")
1899
-
1900
- script_path = _write_modal_entrypoint(
1901
- entry,
1902
- modal_cfg,
1903
- modal_name,
1904
- dotenv_paths=dotenv_paths,
1905
- original_path=original_path,
1906
- inline_secret_values=inline_secret_values,
1907
- )
1908
- cmd = [*_modal_command_prefix(modal_cli), command, str(script_path)]
1909
- if modal_name and command == "deploy":
1910
- cmd.extend(["--name", modal_name])
1911
-
1912
- proc_env = os.environ.copy()
1913
- pythonpath_entries: list[str] = [str(REPO_ROOT)]
1914
- if original_path is not None:
1915
- source_dir = Path(original_path).resolve().parent
1916
- pythonpath_entries.insert(0, str(source_dir))
1917
- existing_pp = proc_env.get("PYTHONPATH")
1918
- if existing_pp:
1919
- pythonpath_entries.append(existing_pp)
1920
- proc_env["PYTHONPATH"] = os.pathsep.join(list(dict.fromkeys(pythonpath_entries)))
1921
-
1922
- if dry_run:
1923
- click.echo("Dry run: " + " ".join(shlex.quote(component) for component in cmd))
1924
- script_path.unlink(missing_ok=True)
1925
- return
1926
- click.secho(
1927
- "[modal-exec] " + " ".join(shlex.quote(component) for component in cmd),
1928
- fg="cyan",
1929
- )
1930
-
1931
- try:
1932
- # Stream output live for better diagnostics
1933
- proc = subprocess.Popen(
1934
- cmd,
1935
- stdout=subprocess.PIPE,
1936
- stderr=subprocess.STDOUT,
1937
- text=True,
1938
- bufsize=1,
1939
- env=proc_env,
1940
- )
1941
- task_app_url = None
1942
- assert proc.stdout is not None
1943
- for line in proc.stdout:
1944
- # Echo lines as they arrive
1945
- click.echo(line, nl=False)
1946
- # Look for lines containing modal.run URLs
1947
- if task_app_url is None and ("modal.run" in line and "=>" in line):
1948
- parts = line.split("=>")
1949
- if len(parts) >= 2:
1950
- task_app_url = parts[-1].strip()
1951
- # Save URL immediately for convenience
1952
- if task_app_url and env_paths_list:
1953
- env_file = env_paths_list[0]
1954
- _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1955
- click.echo(f"\n✓ Task app URL: {task_app_url}\n")
1956
- rc = proc.wait()
1957
- if rc != 0:
1958
- raise subprocess.CalledProcessError(rc, cmd)
1959
- except subprocess.CalledProcessError as exc:
1960
- raise click.ClickException(
1961
- f"modal {command} failed with exit code {exc.returncode}"
1962
- ) from exc
1963
- finally:
1964
- script_path.unlink(missing_ok=True)
1965
-
1966
-
1967
- def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[str, str]:
1968
- values: dict[str, str] = {}
1969
- for p in paths:
1970
- try:
1971
- content = p.read_text(encoding="utf-8")
1972
- except FileNotFoundError:
1973
- continue
1974
- for line in content.splitlines():
1975
- if not line or line.lstrip().startswith("#") or "=" not in line:
1976
- continue
1977
- key, value = line.split("=", 1)
1978
- if key and key not in values:
1979
- values[key.strip()] = value.strip()
1980
- if not allow_empty and not values:
1981
- raise click.ClickException("No environment values found")
1982
- os.environ.update({k: v for k, v in values.items() if k and v})
1983
- return values
1984
-
1985
-
1986
- def _interactive_create_env(target_dir: Path) -> Path | None:
1987
- env_path = (target_dir / ".env").resolve()
1988
- if env_path.exists():
1989
- existing = _parse_env_file(env_path)
1990
- env_api = (existing.get("ENVIRONMENT_API_KEY") or "").strip()
1991
- if env_api:
1992
- return env_path
1993
- click.echo(f"Existing {env_path} is missing ENVIRONMENT_API_KEY. Let's update it.")
1994
- return _interactive_fill_env(env_path)
1995
-
1996
- click.echo("No .env found for this task app. Let's create one.")
1997
- return _interactive_fill_env(env_path)
1998
-
1999
-
2000
- def _parse_env_file(path: Path) -> dict[str, str]:
2001
- data: dict[str, str] = {}
2002
- try:
2003
- for line in path.read_text(encoding="utf-8").splitlines():
2004
- if not line or line.lstrip().startswith("#") or "=" not in line:
2005
- continue
2006
- key, value = line.split("=", 1)
2007
- data[key.strip()] = value.strip()
2008
- except FileNotFoundError:
2009
- pass
2010
- return data
2011
-
2012
-
2013
- def _interactive_fill_env(env_path: Path) -> Path | None:
2014
- if not sys.stdin.isatty():
2015
- raise click.ClickException(
2016
- "ENVIRONMENT_API_KEY missing. Provide --env-file or run `synth-ai setup` in an interactive shell to create one."
2017
- )
2018
- existing = _parse_env_file(env_path) if env_path.exists() else {}
2019
-
2020
- def _prompt(label: str, *, default: str = "", required: bool) -> str | None:
2021
- while True:
2022
- try:
2023
- value = click.prompt(
2024
- label, default=default, show_default=bool(default) or not required
2025
- ).strip()
2026
- except (Abort, EOFError, KeyboardInterrupt):
2027
- click.echo("Aborted env creation.")
2028
- return None
2029
- if value or not required:
2030
- return value
2031
- click.echo("This field is required.")
2032
-
2033
- env_default = existing.get("ENVIRONMENT_API_KEY", "").strip()
2034
- env_api_key = _prompt("ENVIRONMENT_API_KEY", default=env_default, required=True)
2035
- if env_api_key is None:
2036
- return None
2037
- synth_default = existing.get("SYNTH_API_KEY", "").strip()
2038
- openai_default = existing.get("OPENAI_API_KEY", "").strip()
2039
- synth_key = _prompt("SYNTH_API_KEY (optional)", default=synth_default, required=False) or ""
2040
- openai_key = _prompt("OPENAI_API_KEY (optional)", default=openai_default, required=False) or ""
2041
-
2042
- lines = [
2043
- f"ENVIRONMENT_API_KEY={env_api_key}",
2044
- f"SYNTH_API_KEY={synth_key}",
2045
- f"OPENAI_API_KEY={openai_key}",
2046
- ]
2047
- env_path.parent.mkdir(parents=True, exist_ok=True)
2048
- env_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
2049
- click.echo(f"Wrote credentials to {env_path}")
2050
- return env_path
2051
-
2052
-
2053
- def _ensure_env_values(env_paths: list[Path], fallback_dir: Path) -> None:
2054
- if (os.environ.get("ENVIRONMENT_API_KEY") or "").strip():
2055
- return
2056
- target = env_paths[0] if env_paths else (fallback_dir / ".env").resolve()
2057
- click.echo(
2058
- "⚠️ ENVIRONMENT_API_KEY not set. Run `uvx synth-ai setup`, "
2059
- "or pass --env-file pointing at a .env with ENVIRONMENT_API_KEY."
2060
- )
2061
- result = _interactive_fill_env(target)
2062
- if result is None:
2063
- raise click.ClickException("ENVIRONMENT_API_KEY required to continue")
2064
- # After generating .env, load it and override any previously-empty values
2065
- _load_env_values([result])
2066
- if not (os.environ.get("ENVIRONMENT_API_KEY") or "").strip():
2067
- raise click.ClickException("Failed to load ENVIRONMENT_API_KEY from generated .env")
2068
-
2069
-
2070
- def _deploy_entry(
2071
- entry: TaskAppEntryType,
2072
- modal_name: str | None,
2073
- dry_run: bool,
2074
- modal_cli: str,
2075
- env_file: Sequence[str],
2076
- original_path: Path | None = None,
2077
- ) -> None:
2078
- modal_cfg = entry.modal
2079
- if modal_cfg is None:
2080
- raise click.ClickException(
2081
- f"Task app '{entry.app_id}' does not define Modal deployment settings"
2082
- )
2083
-
2084
- env_paths = _determine_env_files(entry, env_file, original_path=original_path)
2085
- click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
2086
- _run_modal_with_entry(
2087
- entry,
2088
- modal_cfg,
2089
- modal_cli,
2090
- modal_name,
2091
- env_paths,
2092
- command="deploy",
2093
- dry_run=dry_run,
2094
- original_path=original_path,
2095
- )
2096
-
2097
-
2098
- def _modal_serve_entry(
2099
- entry: TaskAppEntryType,
2100
- modal_name: str | None,
2101
- modal_cli: str,
2102
- env_file: Sequence[str],
2103
- original_path: Path | None = None,
2104
- ) -> None:
2105
- modal_cfg = entry.modal
2106
- if modal_cfg is None:
2107
- raise click.ClickException(
2108
- f"Task app '{entry.app_id}' does not define Modal deployment settings"
2109
- )
2110
-
2111
- env_paths = _determine_env_files(entry, env_file, original_path=original_path)
2112
- click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
2113
- _run_modal_with_entry(
2114
- entry,
2115
- modal_cfg,
2116
- modal_cli,
2117
- modal_name,
2118
- env_paths,
2119
- command="serve",
2120
- original_path=original_path,
2121
- )
2122
-
2123
-
2124
- @click.group(name="task-app", help="Utilities for serving and deploying Synth task apps.")
2125
- def task_app_group() -> None:
2126
- pass
2127
-
2128
-
2129
- @task_app_group.command("list")
2130
- def list_apps() -> None:
2131
- """List registered task apps."""
2132
-
2133
- entries = registry.list()
2134
- if not entries:
2135
- click.echo("No task apps registered.")
2136
- return
2137
- for entry in entries:
2138
- aliases = f" (aliases: {', '.join(entry.aliases)})" if entry.aliases else ""
2139
- click.echo(f"- {entry.app_id}{aliases}: {entry.description}")
2140
-
2141
-
2142
- @task_app_group.command("validate")
2143
- @click.argument("app_id", type=str, required=True)
2144
- @click.option(
2145
- "--url",
2146
- type=str,
2147
- default=None,
2148
- help="Task app URL to validate (if not provided, starts a local server)",
2149
- )
2150
- @click.option(
2151
- "--port",
2152
- type=int,
2153
- default=8765,
2154
- help="Port to use for temporary server (default: 8765)",
2155
- )
2156
- @click.option(
2157
- "--api-key",
2158
- type=str,
2159
- default=None,
2160
- envvar="ENVIRONMENT_API_KEY",
2161
- help="API key for authentication (default: $ENVIRONMENT_API_KEY)",
2162
- )
2163
- @click.option(
2164
- "--min-instances",
2165
- type=int,
2166
- default=10,
2167
- help="Minimum number of task instances required (default: 10)",
2168
- )
2169
- @click.option(
2170
- "--verbose",
2171
- "-v",
2172
- is_flag=True,
2173
- help="Show detailed information about the task app",
2174
- )
2175
- @click.option(
2176
- "--json",
2177
- "output_json",
2178
- is_flag=True,
2179
- help="Output results as JSON",
2180
- )
2181
- def validate_task_app_cmd(
2182
- app_id: str,
2183
- url: str | None,
2184
- port: int,
2185
- api_key: str | None,
2186
- min_instances: int,
2187
- verbose: bool,
2188
- output_json: bool,
2189
- ) -> None:
2190
- """Validate a task app deployment readiness.
2191
-
2192
- This command verifies that a task app is properly configured and ready to run
2193
- by checking all required HTTP endpoints, authentication, and task availability.
2194
-
2195
- By default, it starts a temporary local server for validation. You can also
2196
- validate a remote deployment by passing --url.
2197
-
2198
- \b
2199
- What gets validated:
2200
- • Root endpoint (/) responds correctly
2201
- • Health endpoint (/health) is accessible with proper authentication
2202
- • Info endpoint (/info) returns valid task metadata
2203
- • Task info endpoint (/task_info) provides task instances
2204
- • Rollout endpoint (/rollout) is registered
2205
- • At least N task instances are available (default: 10)
2206
-
2207
- \b
2208
- Examples:
2209
-
2210
- \b
2211
- Validate grpo-crafter (starts local server automatically):
2212
- $ synth-ai task-app validate grpo-crafter
2213
-
2214
- \b
2215
- Validate sokoban with verbose output:
2216
- $ synth-ai task-app validate sokoban --verbose
2217
-
2218
- \b
2219
- Validate with custom port:
2220
- $ synth-ai task-app validate sokoban --port 9000
2221
-
2222
- \b
2223
- Validate a remote deployment:
2224
- $ synth-ai task-app validate grpo-crafter --url https://my-crafter.modal.run
2225
-
2226
- \b
2227
- Require at least 20 task instances:
2228
- $ synth-ai task-app validate grpo-crafter --min-instances 20
2229
-
2230
- \b
2231
- Get JSON output for automation:
2232
- $ synth-ai task-app validate sokoban --json
2233
-
2234
- \b
2235
- Common use cases:
2236
- • Pre-deployment verification: Check task app works before deploying to Modal
2237
- • CI/CD integration: Use --json flag for automated validation in pipelines
2238
- • Debug failing deployments: Use --verbose to see detailed endpoint responses
2239
- • Test API key configuration: Verify authentication is set up correctly
2240
- """
2241
- import asyncio
2242
- import socket
2243
- import subprocess
2244
- import tempfile
2245
- import time
2246
-
2247
- # Import the validate_task_app function defined in this module
2248
- from synth_ai.cli._validate_task_app import validate_task_app # type: ignore[attr-defined]
2249
-
2250
- proc = None
2251
- task_app_url = url
2252
-
2253
- try:
2254
- # If no URL provided, start a temporary server
2255
- if not task_app_url:
2256
- # Find an available port
2257
- def is_port_available(port: int) -> bool:
2258
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2259
- try:
2260
- s.bind(("", port))
2261
- return True
2262
- except OSError:
2263
- return False
2264
-
2265
- while not is_port_available(port):
2266
- port += 1
2267
-
2268
- task_app_url = f"http://localhost:{port}"
2269
-
2270
- if not output_json:
2271
- click.echo(f"Starting temporary {app_id} server on port {port}...")
2272
-
2273
- # Start the server in background
2274
- env = os.environ.copy()
2275
- if api_key:
2276
- env["ENVIRONMENT_API_KEY"] = api_key
2277
-
2278
- # Create a temporary trace DB and trace dir to avoid prompts
2279
- import tempfile
2280
- temp_dir = tempfile.mkdtemp()
2281
- temp_trace_db = os.path.join(temp_dir, "validate_trace.db")
2282
- temp_trace_dir = os.path.join(temp_dir, "traces")
2283
- os.makedirs(temp_trace_dir, exist_ok=True)
2284
-
2285
- proc = subprocess.Popen(
2286
- [
2287
- "uv",
2288
- "run",
2289
- "synth-ai",
2290
- "task-app",
2291
- "serve",
2292
- app_id,
2293
- "--port",
2294
- str(port),
2295
- "--no-reload",
2296
- "--trace",
2297
- temp_trace_dir,
2298
- "--trace-db",
2299
- temp_trace_db,
2300
- ],
2301
- env=env,
2302
- stdin=subprocess.PIPE, # Add stdin to handle any prompts
2303
- stdout=subprocess.DEVNULL if output_json else subprocess.PIPE,
2304
- stderr=subprocess.DEVNULL if output_json else subprocess.PIPE,
2305
- text=True,
2306
- )
2307
-
2308
- # Write empty input to stdin to skip any prompts
2309
- if proc.stdin:
2310
- try:
2311
- proc.stdin.write("\n")
2312
- proc.stdin.flush()
2313
- proc.stdin.close()
2314
- except Exception:
2315
- pass
2316
-
2317
- # Wait for server to be ready
2318
- if not output_json:
2319
- click.echo("Waiting for server to start...")
2320
-
2321
- import httpx
2322
- for _attempt in range(60): # 30 seconds timeout
2323
- try:
2324
- async def check_health():
2325
- async with httpx.AsyncClient(timeout=2.0) as client:
2326
- resp = await client.get(f"{task_app_url}/")
2327
- return resp.status_code == 200
2328
-
2329
- if asyncio.run(check_health()):
2330
- break
2331
- except Exception:
2332
- pass
2333
-
2334
- # Check if process died
2335
- if proc.poll() is not None:
2336
- stderr_output = ""
2337
- if proc.stderr and not output_json:
2338
- stderr_output = proc.stderr.read()
2339
- click.echo(click.style("✗ Server process exited unexpectedly", fg="red"), err=True)
2340
- if stderr_output and not output_json:
2341
- click.echo(f"Error output:\n{stderr_output}", err=True)
2342
- sys.exit(1)
2343
-
2344
- time.sleep(0.5)
2345
- else:
2346
- click.echo(click.style("✗ Server failed to start within 30 seconds", fg="red"), err=True)
2347
- sys.exit(1)
2348
-
2349
- if not output_json:
2350
- click.echo(click.style("✓ Server started", fg="green"))
2351
- click.echo()
2352
-
2353
- # Ensure URL doesn't have trailing slash
2354
- task_app_url = task_app_url.rstrip("/")
2355
-
2356
- async def _run() -> tuple[bool, dict[str, Any]]:
2357
- return await validate_task_app(
2358
- url=task_app_url,
2359
- api_key=api_key,
2360
- min_instances=min_instances,
2361
- verbose=verbose,
2362
- )
2363
-
2364
- success, results = asyncio.run(_run())
2365
-
2366
- if output_json:
2367
- import json as _json
2368
- click.echo(_json.dumps(results, indent=2))
2369
-
2370
- sys.exit(0 if success else 1)
2371
-
2372
- finally:
2373
- # Cleanup: stop the temporary server
2374
- if proc is not None:
2375
- if not output_json:
2376
- click.echo("\nStopping temporary server...")
2377
- try:
2378
- proc.terminate()
2379
- proc.wait(timeout=5)
2380
- except Exception:
2381
- proc.kill()
2382
-
2383
- # Cleanup temp trace DB
2384
- if not url and 'temp_dir' in locals():
2385
- import contextlib
2386
- import shutil
2387
- with contextlib.suppress(Exception):
2388
- shutil.rmtree(temp_dir, ignore_errors=True)
2389
-
2390
-
2391
- def _load_env_files_into_process(paths: Sequence[str]) -> None:
2392
- for p in paths:
2393
- try:
2394
- txt = Path(p).expanduser().read_text()
2395
- except Exception:
2396
- continue
2397
- for line in txt.splitlines():
2398
- if not line or line.startswith("#") or "=" not in line:
2399
- continue
2400
- k, v = line.split("=", 1)
2401
- key = k.strip()
2402
- val = v.strip().strip('"').strip("'")
2403
- # Load into process, but allow overriding if the current value is empty
2404
- if key:
2405
- current = os.environ.get(key)
2406
- if current is None or not str(current).strip():
2407
- os.environ[key] = val
2408
-
2409
-
2410
- @click.command("serve")
2411
- @click.argument("app_id", type=str, required=False)
2412
- @click.option("--host", default="0.0.0.0", show_default=True)
2413
- @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
2414
- @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
2415
- @click.option(
2416
- "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
2417
- )
2418
- @click.option(
2419
- "--force/--no-force",
2420
- "force",
2421
- default=False,
2422
- help="Kill any process already bound to the selected port before starting",
2423
- )
2424
- @click.option(
2425
- "--trace",
2426
- "trace_dir",
2427
- type=click.Path(),
2428
- default=None,
2429
- help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
2430
- )
2431
- @click.option(
2432
- "--trace-db",
2433
- "trace_db",
2434
- type=click.Path(),
2435
- default=None,
2436
- help="Override local trace DB path (default: traces/v3/synth_ai.db)",
2437
- )
2438
- def serve_command(
2439
- app_id: str | None,
2440
- host: str,
2441
- port: int | None,
2442
- env_file: Sequence[str],
2443
- reload_flag: bool,
2444
- force: bool,
2445
- trace_dir: str | None,
2446
- trace_db: str | None,
2447
- ) -> None:
2448
- demo_dir_path = _load_demo_directory()
2449
- if demo_dir_path:
2450
- if not demo_dir_path.is_dir():
2451
- raise click.ClickException(
2452
- f"Demo directory not found: {demo_dir_path}\nRun 'synth-ai setup' to create a demo."
2453
- )
2454
- os.chdir(demo_dir_path)
2455
- click.echo(f"Using demo directory: {demo_dir_path}\n")
2456
- os.environ["SYNTH_DEMO_DIR"] = str(demo_dir_path.resolve())
2457
-
2458
- # Prompt for port if not provided
2459
- if port is None:
2460
- port = click.prompt("Port to serve on", type=int, default=8001)
2461
-
2462
- # Prompt for trace directory if not provided
2463
- if trace_dir is None:
2464
- click.echo(
2465
- "\nTracing captures rollout data (actions, rewards, model outputs) to a local SQLite DB."
2466
- )
2467
- click.echo("This data can be exported to JSONL for supervised fine-tuning (SFT).")
2468
- enable_tracing = click.confirm("Enable tracing?", default=True)
2469
- if enable_tracing:
2470
- demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2471
- default_trace_dir = str((demo_base / "traces/v3").resolve())
2472
- trace_dir = click.prompt(
2473
- "Trace directory", type=str, default=default_trace_dir, show_default=True
2474
- )
2475
- else:
2476
- trace_dir = None
2477
-
2478
- # Prompt for trace DB if not provided and tracing is enabled
2479
- if trace_dir and trace_db is None:
2480
- demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2481
- default_trace_db = str((demo_base / "traces/v3/synth_ai.db").resolve())
2482
- trace_db = click.prompt(
2483
- "Trace DB path", type=str, default=default_trace_db, show_default=True
2484
- )
2485
-
2486
- choice = _select_app_choice(app_id, purpose="serve")
2487
- entry = choice.ensure_entry()
2488
- _serve_entry(
2489
- entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db
2490
- )
2491
-
2492
-
2493
- @task_app_group.command("info")
2494
- @click.option(
2495
- "--base",
2496
- "base_url",
2497
- default=None,
2498
- help="Task app base URL (default: TASK_APP_BASE_URL or http://127.0.0.1:8001)",
2499
- )
2500
- @click.option(
2501
- "--api-key",
2502
- default=None,
2503
- help="Environment API key (default: ENVIRONMENT_API_KEY or dev fallbacks)",
2504
- )
2505
- @click.option(
2506
- "--seed",
2507
- "seeds",
2508
- multiple=True,
2509
- type=int,
2510
- help="Optional seed(s) to request specific instances (repeatable)",
2511
- )
2512
- def info_command(base_url: str | None, api_key: str | None, seeds: tuple[int, ...]) -> None:
2513
- """Fetch Task App /task_info with authentication and print JSON."""
2514
- import json as _json
2515
- import os as _os
2516
-
2517
- import requests as _requests
2518
-
2519
- base = (base_url or _os.getenv("TASK_APP_BASE_URL") or "http://127.0.0.1:8001").rstrip("/")
2520
-
2521
- # Resolve API key, permitting dev fallbacks
2522
- auth_module = _maybe_import("synth_ai.task.auth")
2523
- if auth_module is not None:
2524
- _norm_key = getattr(auth_module, "normalize_environment_api_key", lambda: _os.getenv("ENVIRONMENT_API_KEY"))
2525
- else:
2526
- _norm_key = lambda: _os.getenv("ENVIRONMENT_API_KEY") # noqa: E731
2527
- key = (api_key or _norm_key() or "").strip()
2528
- if not key:
2529
- raise click.ClickException("Missing API key. Provide --api-key or set ENVIRONMENT_API_KEY.")
2530
-
2531
- headers: dict[str, str] = {"X-API-Key": key, "Authorization": f"Bearer {key}"}
2532
- aliases = (_os.getenv("ENVIRONMENT_API_KEY_ALIASES") or "").strip()
2533
- keys_csv = (
2534
- ",".join([key] + [p.strip() for p in aliases.split(",") if p.strip()]) if aliases else key
2535
- )
2536
- if keys_csv:
2537
- headers["X-API-Keys"] = keys_csv
2538
-
2539
- params: list[tuple[str, str]] = []
2540
- for s in seeds:
2541
- params.append(("seed", str(int(s))))
2542
-
2543
- url = f"{base}/task_info"
2544
- try:
2545
- r = _requests.get(url, headers=headers, params=params or None, timeout=30)
2546
- except Exception as exc:
2547
- raise click.ClickException(f"Request failed: {exc}") from exc
2548
- if not (200 <= r.status_code < 300):
2549
- ct = r.headers.get("content-type", "")
2550
- detail = r.text
2551
- if ct.startswith("application/json"):
2552
- with contextlib.suppress(Exception):
2553
- detail = _json.dumps(r.json(), indent=2)
2554
- raise click.ClickException(f"{url} returned {r.status_code}:\n{detail}")
2555
-
2556
- data = (
2557
- r.json()
2558
- if r.headers.get("content-type", "").startswith("application/json")
2559
- else {"raw": r.text}
2560
- )
2561
- click.echo(_json.dumps(data, indent=2, sort_keys=True))
2562
-
2563
-
2564
- @task_app_group.command("serve")
2565
- @click.argument("app_id", type=str, required=False)
2566
- @click.option("--host", default="0.0.0.0", show_default=True)
2567
- @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
2568
- @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
2569
- @click.option(
2570
- "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
2571
- )
2572
- @click.option(
2573
- "--force/--no-force",
2574
- "force",
2575
- default=False,
2576
- help="Kill any process already bound to the selected port before starting",
2577
- )
2578
- @click.option(
2579
- "--trace",
2580
- "trace_dir",
2581
- type=click.Path(),
2582
- default=None,
2583
- help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
2584
- )
2585
- @click.option(
2586
- "--trace-db",
2587
- "trace_db",
2588
- type=click.Path(),
2589
- default=None,
2590
- help="Override local trace DB path (default: traces/v3/synth_ai.db)",
2591
- )
2592
- def serve_task_group(
2593
- app_id: str | None,
2594
- host: str,
2595
- port: int | None,
2596
- env_file: Sequence[str],
2597
- reload_flag: bool,
2598
- force: bool,
2599
- trace_dir: str | None,
2600
- trace_db: str | None,
2601
- ) -> None:
2602
- demo_dir_path = _load_demo_directory()
2603
- if demo_dir_path:
2604
- if not demo_dir_path.is_dir():
2605
- raise click.ClickException(
2606
- f"Demo directory not found: {demo_dir_path}\nRun 'synth-ai setup' to create a demo."
2607
- )
2608
- os.chdir(demo_dir_path)
2609
- click.echo(f"Using demo directory: {demo_dir_path}\n")
2610
- os.environ["SYNTH_DEMO_DIR"] = str(demo_dir_path.resolve())
2611
-
2612
- # Prompt for port if not provided
2613
- if port is None:
2614
- port = click.prompt("Port to serve on", type=int, default=8001)
2615
-
2616
- # Prompt for trace directory if not provided
2617
- if trace_dir is None:
2618
- click.echo(
2619
- "\nTracing captures rollout data (actions, rewards, model outputs) to a local SQLite DB."
2620
- )
2621
- click.echo("This data can be exported to JSONL for supervised fine-tuning (SFT).")
2622
- enable_tracing = click.confirm("Enable tracing?", default=True)
2623
- if enable_tracing:
2624
- demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2625
- default_trace_dir = str((demo_base / "traces/v3").resolve())
2626
- trace_dir = click.prompt(
2627
- "Trace directory", type=str, default=default_trace_dir, show_default=True
2628
- )
2629
- else:
2630
- trace_dir = None
2631
-
2632
- # Prompt for trace DB if not provided and tracing is enabled
2633
- if trace_dir and trace_db is None:
2634
- demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2635
- default_trace_db = str((demo_base / "traces/v3/synth_ai.db").resolve())
2636
- trace_db = click.prompt(
2637
- "Trace DB path", type=str, default=default_trace_db, show_default=True
2638
- )
2639
-
2640
- choice = _select_app_choice(app_id, purpose="serve")
2641
- entry = choice.ensure_entry()
2642
- _serve_entry(
2643
- entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db
2644
- )
2645
-
2646
-
2647
- def _determine_env_files(
2648
- entry: TaskAppEntryType, user_env_files: Sequence[str], *, original_path: Path | None = None
2649
- ) -> list[Path]:
2650
- resolved: list[Path] = []
2651
- for candidate in user_env_files:
2652
- p = Path(candidate).expanduser()
2653
- if not p.exists():
2654
- raise click.ClickException(f"Env file not found: {p}")
2655
- resolved.append(p)
2656
- if resolved:
2657
- return resolved
2658
-
2659
- declared: list[Path] = []
2660
- for candidate in getattr(entry, "env_files", ()) or ():
2661
- try:
2662
- p = Path(candidate).expanduser()
2663
- except Exception:
2664
- continue
2665
- if p.exists() and p.is_file():
2666
- declared.append(p)
2667
- if declared:
2668
- return declared
2669
-
2670
- def _append_candidate(collection: list[Path], candidate: Path) -> None:
2671
- if candidate.exists() and candidate.is_file() and candidate not in collection:
2672
- collection.append(candidate)
2673
-
2674
- auto_candidates: list[Path] = []
2675
-
2676
- search_dirs: list[Path] = []
2677
- if original_path is not None:
2678
- search_dirs.append(original_path.parent.resolve())
2679
- for parent in original_path.parent.resolve().parents:
2680
- search_dirs.append(parent)
2681
- cwd = Path.cwd().resolve()
2682
- if cwd not in search_dirs:
2683
- search_dirs.append(cwd)
2684
- repo_root = REPO_ROOT.resolve()
2685
- if repo_root not in search_dirs:
2686
- search_dirs.append(repo_root)
2687
-
2688
- for directory in search_dirs:
2689
- _append_candidate(auto_candidates, directory / ".env")
2690
- for candidate in sorted(directory.glob("*.env")):
2691
- _append_candidate(auto_candidates, candidate)
2692
-
2693
- if auto_candidates:
2694
- return [auto_candidates[0]]
2695
-
2696
- raise click.ClickException(
2697
- "No .env file discovered automatically. Pass --env-file /path/to/.env or generate one with `uvx synth-ai setup`."
2698
- )
2699
-
2700
-
2701
- def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
2702
- import os
2703
- import socket
2704
- import subprocess
2705
- import time
2706
-
2707
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2708
- in_use = s.connect_ex((host, port)) == 0
2709
- if not in_use:
2710
- return
2711
-
2712
- try:
2713
- out = subprocess.run(
2714
- ["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False
2715
- )
2716
- pids = [pid for pid in out.stdout.strip().splitlines() if pid]
2717
- except FileNotFoundError:
2718
- pids = []
2719
-
2720
- if not force:
2721
- message = f"Port {port} appears to be in use"
2722
- if pids:
2723
- message += f" (PIDs: {', '.join(pids)})"
2724
- raise click.ClickException(message)
2725
-
2726
- for pid in pids:
2727
- try:
2728
- os.kill(int(pid), signal.SIGTERM)
2729
- except Exception as exc:
2730
- raise click.ClickException(f"Failed to terminate PID {pid}: {exc}") from exc
2731
-
2732
- time.sleep(0.5)
2733
-
2734
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2735
- still_in_use = s.connect_ex((host, port)) == 0
2736
-
2737
- if still_in_use:
2738
- for pid in pids:
2739
- try:
2740
- os.kill(int(pid), signal.SIGKILL)
2741
- except Exception as exc:
2742
- raise click.ClickException(f"Failed to force terminate PID {pid}: {exc}") from exc
2743
- time.sleep(0.5)
2744
-
2745
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
2746
- in_use_after = s.connect_ex((host, port)) == 0
2747
- if in_use_after:
2748
- raise click.ClickException(
2749
- f"Port {port} is still in use after attempting to terminate processes."
2750
- )
2751
-
2752
-
2753
- def _save_to_env_file(env_path: Path, key: str, value: str) -> None:
2754
- """Save or update a key-value pair in the .env file."""
2755
- try:
2756
- # Read existing .env
2757
- existing_lines = []
2758
- if env_path.exists():
2759
- existing_lines = env_path.read_text().splitlines()
2760
- else:
2761
- env_path.parent.mkdir(parents=True, exist_ok=True)
2762
-
2763
- # Check if key already exists and update it
2764
- key_updated = False
2765
- new_lines = []
2766
- for line in existing_lines:
2767
- if line.strip().startswith(f"{key}="):
2768
- new_lines.append(f"{key}={value}")
2769
- key_updated = True
2770
- else:
2771
- new_lines.append(line)
2772
-
2773
- if key_updated:
2774
- # Write updated lines back
2775
- env_path.write_text("\n".join(new_lines) + "\n")
2776
- click.echo(f"Updated {key} in {env_path}")
2777
- else:
2778
- # Append to .env
2779
- with open(env_path, "a") as f:
2780
- if existing_lines and not existing_lines[-1].strip():
2781
- # File exists and last line is not empty
2782
- pass
2783
- elif existing_lines:
2784
- # Add newline before appending
2785
- f.write("\n")
2786
- f.write(f"{key}={value}\n")
2787
- click.echo(f"Saved {key} to {env_path}")
2788
- except Exception as e:
2789
- click.echo(f"Warning: Could not save {key} to .env: {e}", err=True)
2790
-
2791
-
2792
- def _persist_env_api_key(env_api_key: str, env_paths: Sequence[Path] | None) -> None:
2793
- """Persist ENVIRONMENT_API_KEY to provided env files (or default .env)."""
2794
- targets: list[Path] = []
2795
- seen: set[Path] = set()
2796
- for path in env_paths or ():
2797
- try:
2798
- resolved = Path(path).resolve()
2799
- except Exception:
2800
- continue
2801
- if resolved in seen:
2802
- continue
2803
- seen.add(resolved)
2804
- targets.append(resolved)
2805
-
2806
- if not targets:
2807
- demo_dir = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2808
- targets.append((demo_dir / ".env").resolve())
2809
-
2810
- for target in targets:
2811
- _save_to_env_file(target, "ENVIRONMENT_API_KEY", env_api_key)
2812
-
2813
-
2814
- def _validate_required_env_keys() -> None:
2815
- """Validate required environment keys are set, prompting if missing."""
2816
- # Use demo directory .env file if set, otherwise current directory
2817
- demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2818
- env_file = demo_base / ".env"
2819
-
2820
- if env_file.exists():
2821
- try:
2822
- from dotenv import load_dotenv
2823
-
2824
- load_dotenv(env_file, override=False)
2825
- except Exception:
2826
- pass # Best effort
2827
-
2828
- env_api_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
2829
-
2830
- if not env_api_key:
2831
- env_api_key = input("Please enter your RL Environment API key:\n> ").strip()
2832
- if not env_api_key:
2833
- raise click.ClickException("RL Environment API key is required to start the server")
2834
- os.environ["ENVIRONMENT_API_KEY"] = env_api_key
2835
- _save_to_env_file(env_file, "ENVIRONMENT_API_KEY", env_api_key)
2836
-
2837
- # Check for Groq API key
2838
- groq_api_key = os.environ.get("GROQ_API_KEY", "").strip()
2839
-
2840
- if not groq_api_key:
2841
- click.echo("\nInference API key configuration:")
2842
- click.echo("This workflow requires a Groq API key.")
2843
- groq_api_key = input("Groq API key (or press Enter to skip): ").strip()
2844
- if groq_api_key:
2845
- os.environ["GROQ_API_KEY"] = groq_api_key
2846
- _save_to_env_file(env_file, "GROQ_API_KEY", groq_api_key)
2847
-
2848
-
2849
- def _print_demo_next_steps_if_applicable() -> None:
2850
- """Print next steps if currently in a demo directory."""
2851
- try:
2852
- cwd = Path.cwd().resolve()
2853
- demo_dir = _load_demo_directory()
2854
-
2855
- if demo_dir and demo_dir == cwd and (cwd / "run_local_rollout_traced.py").exists():
2856
- click.echo("\n" + "=" * 60)
2857
- click.echo("Next step: Collect traced rollouts")
2858
- click.echo("=" * 60)
2859
- click.echo("\nIn another terminal, run:")
2860
- click.echo(f" cd {cwd}")
2861
- click.echo(" uv run python run_local_rollout_traced.py")
2862
- click.echo("\nRun this 5-10 times to collect diverse traces.")
2863
- click.echo("=" * 60 + "\n")
2864
- except Exception:
2865
- pass
2866
-
2867
-
2868
- def _serve_entry(
2869
- entry: TaskAppEntryType,
2870
- host: str,
2871
- port: int,
2872
- env_file: Sequence[str],
2873
- reload_flag: bool,
2874
- force: bool,
2875
- *,
2876
- trace_dir: str | None = None,
2877
- trace_db: str | None = None,
2878
- ) -> None:
2879
- env_files = list(entry.env_files)
2880
- env_files.extend(env_file)
2881
-
2882
- trace_enabled = trace_dir is not None or trace_db is not None
2883
- if trace_enabled:
2884
- os.environ["TASKAPP_TRACING_ENABLED"] = "1"
2885
-
2886
- # Ensure paths are absolute relative to demo directory
2887
- demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2888
-
2889
- if trace_dir is not None:
2890
- dir_path = Path(trace_dir).expanduser()
2891
- if not dir_path.is_absolute():
2892
- dir_path = (demo_base / dir_path).resolve()
2893
- try:
2894
- dir_path.mkdir(parents=True, exist_ok=True)
2895
- except Exception as exc:
2896
- raise click.ClickException(
2897
- f"Failed to create trace directory {dir_path}: {exc}"
2898
- ) from exc
2899
- os.environ["TASKAPP_SFT_OUTPUT_DIR"] = str(dir_path)
2900
- click.echo(f"Tracing enabled. SFT JSONL will be written to {dir_path}")
2901
- if trace_db is not None:
2902
- db_path = Path(trace_db).expanduser()
2903
- if not db_path.is_absolute():
2904
- db_path = (demo_base / db_path).resolve()
2905
- # Construct the sqlite URL from the absolute path
2906
- db_url = f"sqlite+aiosqlite:///{db_path}"
2907
- os.environ["SQLD_DB_PATH"] = str(db_path)
2908
- os.environ["TURSO_LOCAL_DB_URL"] = db_url
2909
- click.echo(f"Tracing DB path set to {db_path}")
2910
- tracing_config_module = _maybe_import("synth_ai.tracing_v3.config")
2911
- if tracing_config_module is not None:
2912
- trace_config = tracing_config_module.CONFIG
2913
- new_db_url = os.getenv("TURSO_LOCAL_DB_URL") or trace_config.db_url
2914
- trace_config.db_url = new_db_url
2915
- if new_db_url:
2916
- click.echo(f"Tracing DB URL resolved to {new_db_url}")
2917
- elif os.getenv("TASKAPP_TRACING_ENABLED"):
2918
- click.echo("Tracing enabled via environment variables")
2919
-
2920
- _ensure_port_free(port, host, force=force)
2921
-
2922
- _validate_required_env_keys()
2923
- env_path_objs = [Path(p) for p in env_files if p]
2924
- _preflight_env_key(env_path_objs)
2925
-
2926
- # Print next steps if in demo context
2927
- if trace_enabled:
2928
- _print_demo_next_steps_if_applicable()
2929
-
2930
- run_task_app(
2931
- entry.config_factory,
2932
- host=host,
2933
- port=port,
2934
- reload=reload_flag,
2935
- env_files=env_files,
2936
- )
2937
-
2938
-
2939
- @task_app_group.command("deploy")
2940
- @click.argument("app_id", type=str, required=False)
2941
- @click.option("--name", "modal_name", default=None, help="Override Modal app name")
2942
- @click.option("--dry-run", is_flag=True, help="Print modal deploy command without executing")
2943
- @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
2944
- @click.option(
2945
- "--env-file",
2946
- multiple=True,
2947
- type=click.Path(),
2948
- help="Env file to load into the container (can be repeated)",
2949
- )
2950
- def deploy_app(
2951
- app_id: str | None,
2952
- modal_name: str | None,
2953
- dry_run: bool,
2954
- modal_cli: str,
2955
- env_file: Sequence[str],
2956
- ) -> None:
2957
- """Deploy a task app to Modal."""
2958
-
2959
- demo_dir_path = _load_demo_directory()
2960
- if demo_dir_path:
2961
- if not demo_dir_path.is_dir():
2962
- raise click.ClickException(
2963
- f"Demo directory not found: {demo_dir_path}\nRun 'synth-ai demo' to create a demo."
2964
- )
2965
- os.chdir(demo_dir_path)
2966
- click.echo(f"Using demo directory: {demo_dir_path}\n")
2967
-
2968
- choice = _select_app_choice(app_id, purpose="deploy")
2969
-
2970
- if choice.modal_script:
2971
- env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
2972
- click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
2973
- _run_modal_script(
2974
- choice.modal_script,
2975
- modal_cli,
2976
- "deploy",
2977
- env_paths,
2978
- modal_name=modal_name,
2979
- dry_run=dry_run,
2980
- )
2981
- return
2982
-
2983
- entry = choice.ensure_entry()
2984
- _deploy_entry(entry, modal_name, dry_run, modal_cli, env_file, original_path=choice.path)
2985
-
2986
-
2987
- @task_app_group.command("modal-serve")
2988
- @click.argument("app_id", type=str, required=False)
2989
- @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
2990
- @click.option("--name", "modal_name", default=None, help="Override Modal app name (optional)")
2991
- @click.option(
2992
- "--env-file",
2993
- multiple=True,
2994
- type=click.Path(),
2995
- help="Env file to load into the container (can be repeated)",
2996
- )
2997
- def modal_serve_app(
2998
- app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]
2999
- ) -> None:
3000
- click.echo(f"[modal-serve] requested app_id={app_id or '(auto)'} modal_cli={modal_cli}")
3001
- try:
3002
- choice = _select_app_choice(app_id, purpose="modal-serve")
3003
- except SystemExit as exc: # bubble up with context (legacy argparse would trigger this)
3004
- raise click.ClickException(
3005
- f"Legacy CLI intercepted modal-serve (exit {exc.code}). "
3006
- "Make sure you're running the Click CLI (synth_ai.cli:cli)."
3007
- ) from exc
3008
-
3009
- if choice.modal_script:
3010
- env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
3011
- click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
3012
- _run_modal_script(choice.modal_script, modal_cli, "serve", env_paths, modal_name=modal_name)
3013
- return
3014
-
3015
- entry = choice.ensure_entry()
3016
- click.echo(f"[modal-serve] serving entry {entry.app_id} from {choice.path}")
3017
- _modal_serve_entry(entry, modal_name, modal_cli, env_file, original_path=choice.path)
3018
-
3019
-
3020
- def _write_modal_entrypoint(
3021
- entry: TaskAppEntryType,
3022
- modal_cfg: ModalDeploymentConfigType,
3023
- override_name: str | None,
3024
- *,
3025
- dotenv_paths: Sequence[str] | None = None,
3026
- original_path: Path | None = None,
3027
- inline_secret_values: dict[str, str] | None = None,
3028
- ) -> Path:
3029
- modal_name = override_name or modal_cfg.app_name
3030
-
3031
- # For dynamically discovered apps, import the module by its package path
3032
- # Compute the module name relative to the mounted repo root (/opt/synth_ai_repo)
3033
- remote_file_str: str | None = None
3034
- if original_path:
3035
- try:
3036
- # Build lookup of local->remote mounts
3037
- mount_map: list[tuple[Path, Path]] = [
3038
- (Path(local).resolve(), Path(remote))
3039
- for (local, remote) in modal_cfg.extra_local_dirs
3040
- ]
3041
- orig = Path(original_path).resolve()
3042
- for local_src, remote_dst in mount_map:
3043
- with contextlib.suppress(Exception):
3044
- if orig.is_relative_to(local_src): # py311+
3045
- remote_file_str = str((remote_dst / orig.relative_to(local_src)).resolve())
3046
- break
3047
- try:
3048
- rel = orig.relative_to(local_src)
3049
- remote_file_str = str((remote_dst / rel).resolve())
3050
- break
3051
- except Exception:
3052
- pass
3053
- except Exception:
3054
- remote_file_str = None
3055
- module_name = entry.config_factory.__module__
3056
-
3057
- # Prefer a guaranteed mount for the discovered file to avoid package import issues
3058
- guaranteed_file_str: str | None = None
3059
- if original_path:
3060
- guaranteed_file_str = str(
3061
- (Path("/opt/synth_ai_repo/__local_task_app__") / Path(original_path).stem).with_suffix(
3062
- ".py"
3063
- )
3064
- )
3065
-
3066
- dotenv_paths = [str(Path(path)) for path in (dotenv_paths or [])]
3067
-
3068
- pip_packages = list(modal_cfg.pip_packages)
3069
- # Ensure synth-ai (matching host version if available) is installed in the container
3070
- synth_pkg = "synth-ai"
3071
- host_synth = _maybe_import("synth_ai")
3072
- if host_synth is not None:
3073
- host_ver = getattr(host_synth, "__version__", None)
3074
- if host_ver:
3075
- synth_pkg = f"synth-ai=={host_ver}"
3076
- if not any(str(p).startswith("synth-ai") for p in pip_packages):
3077
- pip_packages.insert(0, synth_pkg)
3078
-
3079
- apt_packages = list(modal_cfg.apt_packages)
3080
- click.echo(f"[DEBUG] modal_cfg.apt_packages type: {type(modal_cfg.apt_packages)}")
3081
- click.echo(f"[DEBUG] modal_cfg.apt_packages value: {modal_cfg.apt_packages}")
3082
- click.echo(f"[DEBUG] apt_packages after list(): {apt_packages}")
3083
-
3084
- local_dirs = [(str(Path(src)), dst) for src, dst in modal_cfg.extra_local_dirs]
3085
- # Also mount the host synth_ai source if available to ensure latest code is used
3086
- if host_synth is not None:
3087
- try:
3088
- host_synth_dir = Path(host_synth.__file__).resolve().parent
3089
- sy_dst = "/opt/synth_ai_repo/synth_ai"
3090
- candidate = (str(host_synth_dir), sy_dst)
3091
- if candidate not in local_dirs:
3092
- local_dirs.insert(0, candidate)
3093
- except Exception:
3094
- pass
3095
- # Ensure the discovered app directory is mounted, regardless of modal_cfg
3096
- if original_path:
3097
- discovered_dir = str(Path(original_path).resolve().parent)
3098
- mount_dst = "/opt/synth_ai_repo/__local_task_app__"
3099
- if (discovered_dir, mount_dst) not in local_dirs:
3100
- local_dirs.append((discovered_dir, mount_dst))
3101
- secret_names = list(modal_cfg.secret_names)
3102
- volume_mounts = [(name, mount) for name, mount in modal_cfg.volume_mounts]
3103
- inline_secret_values = {k: v for k, v in (inline_secret_values or {}).items() if v}
3104
-
3105
- script = f"""from __future__ import annotations
3106
-
3107
- import importlib
3108
- import importlib.util
3109
- import sys
3110
- import os
3111
- import shutil
3112
- import tempfile
3113
- from pathlib import Path as _Path
3114
- import fnmatch
3115
- sys.path.insert(0, '/opt/synth_ai_repo')
3116
-
3117
- from modal import App, Image, Secret, Volume, asgi_app
3118
-
3119
- # Defer importing synth_ai until inside fastapi_app to avoid local import errors
3120
-
3121
- ENTRY_ID = {entry.app_id!r}
3122
- MODAL_APP_NAME = {modal_name!r}
3123
- MODULE_NAME = {module_name!r}
3124
- MODULE_FILE = {guaranteed_file_str or remote_file_str!r}
3125
- DOTENV_PATHS = {dotenv_paths!r}
3126
- INLINE_SECRET_VALUES = {inline_secret_values!r}
3127
-
3128
- image = Image.debian_slim(python_version={modal_cfg.python_version!r})
3129
-
3130
- # CRITICAL: Install iverilog for Verilog task app (hardcoded to prevent config issues)
3131
- if {entry.app_id!r} == "grpo-verilog":
3132
- image = image.apt_install("iverilog")
3133
-
3134
- # Install apt packages first (before pip)
3135
- apt_packages = {apt_packages!r}
3136
- if apt_packages:
3137
- image = image.apt_install(*apt_packages)
3138
-
3139
- pip_packages = {pip_packages!r}
3140
- if pip_packages:
3141
- image = image.pip_install(*pip_packages)
3142
-
3143
- local_dirs = {local_dirs!r}
3144
-
3145
- def _copy_tree_filtered(src_dir: str) -> str:
3146
- src = _Path(src_dir)
3147
- temp_dir = _Path(tempfile.mkdtemp(prefix='synth_mount_'))
3148
-
3149
- exclude_dirs = {".cache", ".git", "__pycache__"}
3150
- exclude_globs = ['*.db', '*.db-journal', '*-wal', '*-shm']
3151
-
3152
- for root, dirs, files in os.walk(src):
3153
- rel_root = _Path(root).relative_to(src)
3154
- # filter dirs in-place
3155
- dirs[:] = [d for d in dirs if d not in exclude_dirs]
3156
- # ensure target directory exists
3157
- target_dir = (temp_dir / rel_root)
3158
- target_dir.mkdir(parents=True, exist_ok=True)
3159
- # copy files with filtering
3160
- for name in files:
3161
- if any(fnmatch.fnmatch(name, pat) for pat in exclude_globs):
3162
- continue
3163
- src_file = _Path(root) / name
3164
- dst_file = target_dir / name
3165
- try:
3166
- shutil.copy2(src_file, dst_file)
3167
- except Exception:
3168
- # ignore problematic files
3169
- continue
3170
- return str(temp_dir)
3171
-
3172
- for local_src, remote_dst in local_dirs:
3173
- safe_src = _copy_tree_filtered(local_src)
3174
- image = image.add_local_dir(safe_src, remote_dst)
3175
-
3176
- secrets = {secret_names!r}
3177
- secret_objs = [Secret.from_name(name) for name in secrets]
3178
-
3179
- if INLINE_SECRET_VALUES:
3180
- secret_objs.append(Secret.from_dict(INLINE_SECRET_VALUES))
3181
-
3182
- if DOTENV_PATHS:
3183
- secret_objs.extend(Secret.from_dotenv(path) for path in DOTENV_PATHS)
3184
-
3185
- volume_mounts = {volume_mounts!r}
3186
- volume_map = {{}}
3187
- for vol_name, mount_path in volume_mounts:
3188
- volume_map[mount_path] = Volume.from_name(vol_name, create_if_missing=True)
3189
-
3190
- app = App(MODAL_APP_NAME)
3191
-
3192
- @app.function(
3193
- image=image,
3194
- timeout={modal_cfg.timeout},
3195
- memory={modal_cfg.memory},
3196
- cpu={modal_cfg.cpu},
3197
- min_containers={modal_cfg.min_containers},
3198
- max_containers={modal_cfg.max_containers},
3199
- secrets=secret_objs,
3200
- volumes=volume_map,
3201
- )
3202
- @asgi_app()
3203
- def fastapi_app():
3204
- # Import the module to trigger registration (inside container)
3205
- import os
3206
- # Prefer mounted source over any preinstalled site-packages version
3207
- import sys as _sys
3208
- for k in list(_sys.modules.keys()):
3209
- if k == 'synth_ai' or k.startswith('synth_ai.'):
3210
- _sys.modules.pop(k, None)
3211
- import importlib as _importlib
3212
- _importlib.invalidate_caches()
3213
- try:
3214
- if MODULE_FILE and os.path.exists(MODULE_FILE):
3215
- spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', MODULE_FILE)
3216
- if not spec or not spec.loader:
3217
- raise RuntimeError("Failed to prepare spec for: " + str(MODULE_FILE))
3218
- mod = importlib.util.module_from_spec(spec)
3219
- sys.modules[MODULE_NAME or 'task_app_module'] = mod
3220
- spec.loader.exec_module(mod)
3221
- else:
3222
- try:
3223
- importlib.import_module(MODULE_NAME)
3224
- except Exception:
3225
- fallback_file = '/opt/synth_ai_repo/__local_task_app__/' + (MODULE_NAME.split('.')[-1] if MODULE_NAME else 'task_app') + '.py'
3226
- if os.path.exists(fallback_file):
3227
- spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', fallback_file)
3228
- if not spec or not spec.loader:
3229
- raise RuntimeError("Failed to prepare fallback spec for: " + str(fallback_file))
3230
- mod = importlib.util.module_from_spec(spec)
3231
- sys.modules[MODULE_NAME or 'task_app_module'] = mod
3232
- spec.loader.exec_module(mod)
3233
- else:
3234
- raise
3235
- except Exception as e:
3236
- raise RuntimeError("Task app import failed: " + str(e))
3237
-
3238
- # Get the entry from registry (now that it's registered)
3239
- from synth_ai.task.apps import registry
3240
- from synth_ai.task.server import create_task_app
3241
- entry = registry.get(ENTRY_ID)
3242
- cfg = entry.modal
3243
- if cfg is None:
3244
- raise RuntimeError("Modal configuration missing for task app " + ENTRY_ID)
3245
- config = entry.config_factory()
3246
- return create_task_app(config)
3247
- """
3248
-
3249
- with tempfile.NamedTemporaryFile("w", suffix=f"_{entry.app_id}_modal.py", delete=False) as tmp:
3250
- tmp.write(script)
3251
- tmp.flush()
3252
- name = tmp.name
3253
- return Path(name)
3254
-
3255
-
3256
- def register(cli: click.Group) -> None:
3257
- cli.add_command(serve_command)
3258
- cli.add_command(task_app_group)
3259
- cli.add_command(eval_command)
3260
- cli.add_command(filter_command)
3261
-
3262
-
3263
- @click.command(
3264
- "eval",
3265
- help="Run one-off rollouts against a task app and print judge/eval summaries.",
3266
- )
3267
- @click.argument("app_id", type=str, required=False)
3268
- @click.option(
3269
- "--config",
3270
- type=click.Path(),
3271
- default=None,
3272
- help="Path to eval TOML (short schema). Auto-discovers the first matching file when omitted.",
3273
- )
3274
- @click.option(
3275
- "--url",
3276
- "task_app_url",
3277
- type=str,
3278
- default=None,
3279
- help="Base URL of a running task app instead of spawning locally (requires --env-file for secrets).",
3280
- )
3281
- @click.option(
3282
- "--seeds",
3283
- default="0,1,2,3,4",
3284
- help="Comma-separated seeds/indices to evaluate. Use negative numbers to wrap around the dataset.",
3285
- )
3286
- @click.option("--split", default="train", show_default=True, help="Dataset split to use")
3287
- @click.option(
3288
- "--model",
3289
- default=None,
3290
- help="Model identifier. When omitted the CLI will prompt based on task metadata.",
3291
- )
3292
- @click.option(
3293
- "--env-file",
3294
- multiple=True,
3295
- type=click.Path(),
3296
- help="Env file(s) to load (API keys, etc.). Required when using --url or remote judges.",
3297
- )
3298
- @click.option(
3299
- "--trace-db",
3300
- default="traces/v3/synth_ai.db",
3301
- show_default=True,
3302
- help="SQLite/Turso URL for storing rollout traces set to 'none' to disable persistence.",
3303
- )
3304
- @click.option(
3305
- "--metadata",
3306
- multiple=True,
3307
- help="Filter tasks by key=value metadata (e.g., --metadata difficulty=easy)",
3308
- )
3309
- @click.option(
3310
- "--metadata-sql",
3311
- default=None,
3312
- help="SQLite query that returns seeds to evaluate (e.g., SELECT seed FROM tasks WHERE difficulty='easy' LIMIT 5)",
3313
- )
3314
- def eval_command(
3315
- app_id: str | None,
3316
- config: str | None,
3317
- task_app_url: str | None,
3318
- seeds: str,
3319
- split: str,
3320
- model: str | None,
3321
- env_file: Sequence[str],
3322
- trace_db: str,
3323
- metadata: Sequence[str],
3324
- metadata_sql: str | None,
3325
- ) -> None:
3326
- """Run rollouts against a task app and report judge statistics.
3327
-
3328
- By default the command spins up the selected task app in-process, executes the
3329
- requested seeds, and prints aggregate scores (official and custom judges). When
3330
- pointing at a remote `--url`, supply matching `--env-file` values so the CLI can
3331
- forward authentication headers to the running service.
3332
- """
3333
- # Parse and validate TOML config
3334
- from synth_ai.task.config import EvalConfig
3335
-
3336
- cfg: dict[str, Any] = {}
3337
- eval_cfg: EvalConfig | None = None
3338
- config_path: Path | None = None
3339
-
3340
- if config:
3341
- config_path = Path(config)
3342
- else:
3343
- auto_configs = _discover_eval_config_paths()
3344
- if auto_configs:
3345
- config_path = auto_configs[0]
3346
- click.echo(f"Using eval config: {config_path}")
3347
-
3348
- if config_path:
3349
- if _toml is None:
3350
- raise click.ClickException(
3351
- "TOML parser not available; use Python 3.11+ or install tomli"
3352
- )
3353
- if not config_path.exists():
3354
- raise click.ClickException(f"Eval config not found: {config_path}")
3355
- try:
3356
- data = config_path.read_bytes()
3357
- parsed = _toml.loads(data.decode("utf-8"))
3358
- if isinstance(parsed, dict):
3359
- section = parsed.get("eval")
3360
- cfg = dict(section) if isinstance(section, dict) else dict(parsed)
3361
-
3362
- # Validate config with dataclass
3363
- try:
3364
- eval_cfg = EvalConfig.from_dict(cfg)
3365
- click.echo(f"✓ Config validated: {len(eval_cfg.seeds)} seeds, model={eval_cfg.model}")
3366
- except (ValueError, TypeError) as validation_error:
3367
- raise click.ClickException(f"Invalid eval config: {validation_error}") from validation_error
3368
- except click.ClickException:
3369
- raise
3370
- except Exception as exc:
3371
- raise click.ClickException(f"Failed to parse TOML '{config_path}': {exc}") from exc
3372
-
3373
- # CLI args override config
3374
- if eval_cfg:
3375
- app_id = app_id or eval_cfg.app_id
3376
- else:
3377
- app_id = app_id or (cfg.get("app_id") if isinstance(cfg.get("app_id"), str) else None) # type: ignore
3378
-
3379
- metadata_filters: dict[str, str] = {}
3380
- if eval_cfg:
3381
- metadata_filters.update(eval_cfg.metadata)
3382
- else:
3383
- cfg_metadata = cfg.get("metadata")
3384
- if isinstance(cfg_metadata, dict):
3385
- for key, value in cfg_metadata.items():
3386
- metadata_filters[str(key)] = str(value)
3387
- elif isinstance(cfg_metadata, list):
3388
- for item in cfg_metadata:
3389
- if isinstance(item, str) and "=" in item:
3390
- key, value = item.split("=", 1)
3391
- metadata_filters[key.strip()] = value.strip()
3392
-
3393
- for item in metadata or ():
3394
- if "=" not in item:
3395
- raise click.ClickException(f"Metadata filters must be key=value (got: {item})")
3396
- key, value = item.split("=", 1)
3397
- key = key.strip()
3398
- value = value.strip()
3399
- if not key or not value:
3400
- raise click.ClickException(f"Invalid metadata filter: {item}")
3401
- metadata_filters[key] = value
3402
-
3403
- metadata_sql_query: str | None = None
3404
- if eval_cfg and eval_cfg.metadata_sql:
3405
- metadata_sql_query = eval_cfg.metadata_sql
3406
- else:
3407
- cfg_metadata_sql = cfg.get("metadata_sql")
3408
- if isinstance(cfg_metadata_sql, dict):
3409
- metadata_sql_query = cfg_metadata_sql.get("query") or cfg_metadata_sql.get("sql")
3410
- elif isinstance(cfg_metadata_sql, str):
3411
- metadata_sql_query = cfg_metadata_sql
3412
-
3413
- if metadata_sql:
3414
- metadata_sql_query = metadata_sql
3415
- if metadata_sql_query is not None:
3416
- metadata_sql_query = str(metadata_sql_query)
3417
-
3418
- trace_db_url: str | None = None
3419
- trace_db = (trace_db or "").strip()
3420
- if trace_db and trace_db.lower() not in {"none", "off", "disable"}:
3421
- if "://" in trace_db:
3422
- trace_db_url = trace_db
3423
- else:
3424
- trace_path = Path(trace_db).expanduser()
3425
- trace_path.parent.mkdir(parents=True, exist_ok=True)
3426
- trace_db_url = f"sqlite+aiosqlite:///{trace_path}"
3427
- trace_tracer: SessionTracer | None = SessionTracer(db_url=trace_db_url, auto_save=True) if trace_db_url else None
3428
-
3429
- # Determine selection params (CLI takes precedence; TOML only fills unset model/seeds/env)
3430
- if cfg.get("model") and not model:
3431
- model = str(cfg["model"]) # type: ignore[index]
3432
- if cfg.get("seeds") and seeds == "0,1,2,3,4":
3433
- val = cfg["seeds"]
3434
- if isinstance(val, list):
3435
- with contextlib.suppress(Exception):
3436
- seeds = ",".join(str(int(x)) for x in val)
3437
- elif isinstance(val, str):
3438
- seeds = val
3439
- elif isinstance(val, int):
3440
- seeds = str(val)
3441
- if cfg.get("env_file") and not env_file:
3442
- ef = cfg["env_file"]
3443
- if isinstance(ef, str):
3444
- env_file = (ef,) # type: ignore[assignment]
3445
- elif isinstance(ef, list):
3446
- env_file = tuple(str(x) for x in ef) # type: ignore[assignment]
3447
-
3448
- choice_for_env: AppChoice | None = None
3449
- entry: TaskAppEntryType | None = None
3450
- if task_app_url is None:
3451
- choice_for_env = _select_app_choice(app_id, purpose="eval")
3452
- entry = choice_for_env.ensure_entry()
3453
-
3454
- env_paths: list[Path] = []
3455
- if entry is not None:
3456
- original_env_path = choice_for_env.path if choice_for_env is not None else None
3457
- env_paths = _determine_env_files(entry, env_file, original_path=original_env_path)
3458
- else:
3459
- if not env_file:
3460
- raise click.ClickException("--env-file is required when using --url")
3461
- for candidate in env_file:
3462
- p = Path(candidate).expanduser()
3463
- if not p.exists():
3464
- raise click.ClickException(f"Env file not found: {p}")
3465
- env_paths.append(p)
3466
-
3467
- click.echo("Using env file(s): " + ", ".join(str(p) for p in env_paths))
3468
- _load_env_files_into_process([str(Path(p)) for p in env_paths])
3469
-
3470
- if task_app_url is None:
3471
- config = entry.config_factory() # type: ignore[union-attr]
3472
- # Help the type checker; runtime check also enforced in server.run_task_app
3473
- if not isinstance(config, TaskAppConfig):
3474
- raise click.ClickException(
3475
- "Invalid task app: config_factory did not return TaskAppConfig"
3476
- )
3477
- app = create_task_app(config)
3478
-
3479
- # Determine supported models
3480
- inference_meta: dict[str, Any] = {}
3481
- supported: list[str] = []
3482
- seen_models: set[str] = set()
3483
-
3484
- def _add_supported_model(candidate: Any) -> None:
3485
- if not candidate:
3486
- return
3487
- text = str(candidate).strip()
3488
- if not text or text in seen_models:
3489
- return
3490
- supported.append(text)
3491
- seen_models.add(text)
3492
-
3493
- if task_app_url is None:
3494
- try:
3495
- if hasattr(config, "base_task_info") and config.base_task_info:
3496
- inf_obj = getattr(config.base_task_info, "inference", None)
3497
- if inf_obj is not None:
3498
- if hasattr(inf_obj, "model_dump"):
3499
- inference_meta = dict(inf_obj.model_dump(exclude_none=True)) # type: ignore[attr-defined]
3500
- elif isinstance(inf_obj, dict):
3501
- inference_meta = dict(inf_obj)
3502
- except Exception:
3503
- inference_meta = {}
3504
- else:
3505
- try:
3506
- import httpx as _hx
3507
-
3508
- headers = {}
3509
- api_key = (os.environ.get("ENVIRONMENT_API_KEY") or "").strip()
3510
- if api_key:
3511
- headers["X-API-Key"] = api_key
3512
- with _hx.Client(base_url=task_app_url, headers=headers, timeout=15.0) as c:
3513
- info = c.get("/info").json()
3514
- inf = info.get("inference") if isinstance(info, dict) else None
3515
- if isinstance(inf, dict):
3516
- inference_meta = dict(inf)
3517
- except Exception:
3518
- inference_meta = {}
3519
-
3520
- default_model = inference_meta.get("model")
3521
- if isinstance(default_model, str):
3522
- _add_supported_model(default_model)
3523
-
3524
- models_field = inference_meta.get("models")
3525
- if isinstance(models_field, list):
3526
- for candidate in models_field:
3527
- _add_supported_model(candidate)
3528
-
3529
- supported_models = inference_meta.get("supported_models")
3530
- if isinstance(supported_models, list):
3531
- for candidate in supported_models:
3532
- _add_supported_model(candidate)
3533
-
3534
- providers = inference_meta.get("providers")
3535
- if isinstance(providers, list):
3536
- if "openai" in providers:
3537
- _add_supported_model("gpt-5")
3538
- if "groq" in providers:
3539
- _add_supported_model("groq:llama-3.1-70b-versatile")
3540
-
3541
- _add_supported_model("synth:qwen-0.6b")
3542
-
3543
- selected_model = model
3544
- if not selected_model:
3545
- if not supported:
3546
- raise click.ClickException(
3547
- "No supported models; supply --model or add base_task_info.inference.model"
3548
- )
3549
- click.echo("Select model to evaluate:")
3550
- for idx, m in enumerate(supported, start=1):
3551
- click.echo(f" {idx}) {m}")
3552
- choice_idx = click.prompt("Enter choice", type=click.IntRange(1, len(supported)))
3553
- selected_model = supported[choice_idx - 1]
3554
-
3555
- try:
3556
- seed_values = [int(s.strip()) for s in seeds.split(",") if s.strip()]
3557
- except Exception as exc:
3558
- raise click.ClickException("Invalid --seeds; expected comma-separated integers") from exc
3559
-
3560
- import httpx
3561
-
3562
- headers = {}
3563
- api_key = (os.environ.get("ENVIRONMENT_API_KEY") or "").strip()
3564
- if api_key:
3565
- headers["X-API-Key"] = api_key
3566
-
3567
- # Precompute optional policy overrides from TOML
3568
- policy_overrides: dict[str, Any] = {}
3569
- try:
3570
- # Accept [eval.policy] table or top-level keys for convenience
3571
- if isinstance(cfg.get("policy"), dict):
3572
- policy_overrides.update(dict(cfg["policy"]))
3573
- # Back-compat: allow temperature/max_tokens at top level
3574
- for k in (
3575
- "temperature",
3576
- "max_tokens",
3577
- "reasoning_effort",
3578
- "system_hint",
3579
- "tool_choice",
3580
- "inference_url",
3581
- ):
3582
- if k in cfg and k not in policy_overrides:
3583
- policy_overrides[k] = cfg.get(k)
3584
- except Exception:
3585
- policy_overrides = {}
3586
-
3587
- raw_concurrency = cfg.get("concurrency")
3588
- try:
3589
- concurrency_limit = int(raw_concurrency) if raw_concurrency is not None else 1
3590
- except Exception:
3591
- concurrency_limit = 1
3592
- if concurrency_limit <= 0:
3593
- concurrency_limit = 1
3594
- concurrency_limit = min(concurrency_limit, max(1, len(seed_values)))
3595
-
3596
- judge_specs: list[JudgeSpec] = []
3597
-
3598
- def _register_judge(name_hint: str | None, judge_cfg: dict[str, Any]) -> None:
3599
- if not judge_cfg:
3600
- return
3601
- judge_module = judge_cfg.get("module")
3602
- judge_path = judge_cfg.get("path")
3603
- judge_callable_name = judge_cfg.get("callable") or judge_cfg.get("function")
3604
- if judge_module and judge_path:
3605
- raise click.ClickException("Judge config cannot set both 'module' and 'path'")
3606
- if not judge_module and not judge_path:
3607
- raise click.ClickException("Judge config requires 'module' or 'path'")
3608
- try:
3609
- if judge_module:
3610
- module = importlib.import_module(str(judge_module))
3611
- else:
3612
- path = Path(str(judge_path)).expanduser()
3613
- if not path.exists():
3614
- raise click.ClickException(f"Judge module path not found: {path}")
3615
- spec = importlib.util.spec_from_file_location(
3616
- f"_eval_judge_{path.stem}", path
3617
- )
3618
- if not spec or not spec.loader:
3619
- raise click.ClickException(f"Failed to load judge module from {path}")
3620
- module = importlib.util.module_from_spec(spec)
3621
- sys.modules[spec.name] = module
3622
- spec.loader.exec_module(module)
3623
- except click.ClickException:
3624
- raise
3625
- except Exception as exc:
3626
- raise click.ClickException(f"Unable to load judge module: {exc}") from exc
3627
-
3628
- if judge_callable_name:
3629
- try:
3630
- judge_fn = getattr(module, str(judge_callable_name))
3631
- except AttributeError as exc:
3632
- raise click.ClickException(
3633
- f"Judge callable '{judge_callable_name}' not found in module"
3634
- ) from exc
3635
- else:
3636
- if hasattr(module, "judge"):
3637
- judge_fn = module.judge
3638
- else:
3639
- raise click.ClickException("Judge module must expose 'judge' callable")
3640
-
3641
- if not callable(judge_fn):
3642
- raise click.ClickException("Judge callable is not callable")
3643
-
3644
- judge_kwargs = {
3645
- k: v
3646
- for k, v in judge_cfg.items()
3647
- if k not in {"module", "path", "callable", "function", "name"}
3648
- }
3649
- display_name = str(
3650
- judge_cfg.get("name")
3651
- or name_hint
3652
- or f"judge{len(judge_specs) + 1}"
3653
- )
3654
- judge_specs.append(JudgeSpec(display_name, judge_fn, judge_kwargs))
3655
-
3656
- raw_judge_cfg = cfg.get("judge")
3657
- if isinstance(raw_judge_cfg, dict) and raw_judge_cfg:
3658
- direct_keys = {"module", "path", "callable", "function", "name"}
3659
- has_direct_keys = any(key in raw_judge_cfg for key in direct_keys)
3660
- nested_candidates = [
3661
- (key, value)
3662
- for key, value in raw_judge_cfg.items()
3663
- if isinstance(value, dict)
3664
- ]
3665
- if has_direct_keys and not nested_candidates:
3666
- _register_judge(None, raw_judge_cfg)
3667
- else:
3668
- for sub_name, sub_cfg in nested_candidates:
3669
- _register_judge(sub_name, sub_cfg)
3670
-
3671
- raw_judges_list = cfg.get("judges")
3672
- if isinstance(raw_judges_list, list):
3673
- for _index, entry in enumerate(raw_judges_list, start=1):
3674
- if isinstance(entry, dict):
3675
- _register_judge(entry.get("name") or f"judge{len(judge_specs) + 1}", entry)
3676
-
3677
- records: list[dict[str, Any]] = []
3678
-
3679
- successes = 0
3680
- failures = 0
3681
- # Aggregate outcome stats across successful seeds
3682
- outcome_sum: float = 0.0
3683
- outcome_count: int = 0
3684
- outcome_correct: int = 0
3685
-
3686
- def _build_task_rows(taskset: Any) -> dict[int, dict[str, Any]]:
3687
- rows: dict[int, dict[str, Any]] = {}
3688
- if not isinstance(taskset, dict):
3689
- return rows
3690
-
3691
- scenario_ids = taskset.get("scenario_ids") or []
3692
- loop_ids = taskset.get("loop_ids") or []
3693
- thread_ids = taskset.get("thread_ids") or []
3694
- difficulty_map = taskset.get("difficulty_map") or {}
3695
-
3696
- max_len = max(len(scenario_ids), len(loop_ids), len(thread_ids))
3697
- for seed in range(max_len):
3698
- scenario_id = scenario_ids[seed] if seed < len(scenario_ids) else None
3699
- loop_id = loop_ids[seed] if seed < len(loop_ids) else None
3700
- thread_id = thread_ids[seed] if seed < len(thread_ids) else None
3701
- difficulty = None
3702
- if isinstance(difficulty_map, dict):
3703
- if scenario_id and scenario_id in difficulty_map:
3704
- difficulty = difficulty_map.get(scenario_id)
3705
- elif str(seed) in difficulty_map:
3706
- difficulty = difficulty_map.get(str(seed))
3707
-
3708
- rows[seed] = {
3709
- "seed": seed,
3710
- "scenario_id": scenario_id,
3711
- "loop_id": loop_id,
3712
- "thread_id": thread_id,
3713
- "difficulty": difficulty,
3714
- }
3715
- return rows
3716
-
3717
- def _apply_metadata_filters(
3718
- rows: dict[int, dict[str, Any]], seeds_list: list[int], filters: dict[str, str]
3719
- ) -> list[int]:
3720
- if not filters:
3721
- return seeds_list
3722
- filtered: list[int] = []
3723
- for seed in seeds_list:
3724
- row = rows.get(seed)
3725
- if not row:
3726
- continue
3727
- include = True
3728
- for key, expected in filters.items():
3729
- actual = row.get(key)
3730
- if actual is None:
3731
- include = False
3732
- break
3733
- if str(actual).lower() != expected.lower():
3734
- include = False
3735
- break
3736
- if include:
3737
- filtered.append(seed)
3738
- return filtered
3739
-
3740
- def _apply_metadata_sql(
3741
- rows: dict[int, dict[str, Any]], seeds_list: list[int], query: str
3742
- ) -> list[int]:
3743
- """Return seeds that satisfy an arbitrary SQL query.
3744
-
3745
- The query is executed against an in-memory SQLite table named `tasks`
3746
- with columns (seed INTEGER, scenario_id TEXT, loop_id TEXT, thread_id TEXT, difficulty TEXT).
3747
- Any rows whose `seed` value (or first column if `seed` is absent) appear in the result set are retained.
3748
- """
3749
- if not query:
3750
- return seeds_list
3751
- conn = sqlite3.connect(":memory:")
3752
- try:
3753
- cur = conn.cursor()
3754
- cur.execute(
3755
- "CREATE TABLE tasks (seed INTEGER, scenario_id TEXT, loop_id TEXT, thread_id TEXT, difficulty TEXT)"
3756
- )
3757
- insert_stmt = (
3758
- "INSERT INTO tasks (seed, scenario_id, loop_id, thread_id, difficulty) VALUES (?,?,?,?,?)"
3759
- )
3760
- for seed in seeds_list:
3761
- row = rows.get(seed, {})
3762
- cur.execute(
3763
- insert_stmt,
3764
- [
3765
- seed,
3766
- row.get("scenario_id"),
3767
- row.get("loop_id"),
3768
- row.get("thread_id"),
3769
- row.get("difficulty"),
3770
- ],
3771
- )
3772
-
3773
- result = cur.execute(query)
3774
- fetched = result.fetchall()
3775
- if not fetched:
3776
- return []
3777
- description = result.description or []
3778
- col_names = [col[0] for col in description]
3779
- seeds_out: list[int] = []
3780
- for entry in fetched:
3781
- value = entry[col_names.index("seed")] if "seed" in col_names else entry[0]
3782
- try:
3783
- seeds_out.append(int(value))
3784
- except Exception as exc:
3785
- raise click.ClickException(
3786
- "metadata SQL query must return seed integers"
3787
- ) from exc
3788
- seeds_set = set(seeds_out)
3789
- return [seed for seed in seeds_list if seed in seeds_set]
3790
- except sqlite3.Error as exc:
3791
- raise click.ClickException(f"Failed to execute metadata SQL query: {exc}") from exc
3792
- finally:
3793
- conn.close()
3794
-
3795
- async def _run_eval() -> None:
3796
- nonlocal successes, failures, outcome_sum, outcome_count, outcome_correct, records, seed_values
3797
-
3798
- if trace_tracer is not None and trace_tracer.db is None:
3799
- await trace_tracer.initialize()
3800
-
3801
- if task_app_url is None:
3802
- transport = httpx.ASGITransport(app=app) # type: ignore[name-defined]
3803
- async_client = httpx.AsyncClient(
3804
- transport=cast(Any, transport),
3805
- base_url="http://eval.local",
3806
- timeout=300.0,
3807
- follow_redirects=True,
3808
- headers=headers,
3809
- )
3810
- else:
3811
- async_client = httpx.AsyncClient(
3812
- base_url=task_app_url,
3813
- timeout=300.0,
3814
- follow_redirects=True,
3815
- headers=headers,
3816
- )
3817
-
3818
- try:
3819
- taskset_payload: dict[str, Any] | None = None
3820
- try:
3821
- task_info_response = await async_client.get("/task_info")
3822
- except Exception:
3823
- task_info_response = None
3824
- if task_info_response is not None and task_info_response.status_code == 200:
3825
- with contextlib.suppress(Exception):
3826
- payload_json = task_info_response.json()
3827
- if isinstance(payload_json, dict) and "taskset" in payload_json:
3828
- taskset_payload = payload_json.get("taskset")
3829
- if not isinstance(taskset_payload, dict):
3830
- taskset_payload = None
3831
- elif isinstance(payload_json, dict):
3832
- taskset_payload = payload_json
3833
-
3834
- available_seeds = list(seed_values)
3835
- if metadata_sql_query or metadata_filters:
3836
- if not taskset_payload:
3837
- raise click.ClickException(
3838
- "Task metadata filters require the task app to expose /task_info metadata"
3839
- )
3840
- rows = _build_task_rows(taskset_payload)
3841
- if metadata_sql_query:
3842
- available_seeds = _apply_metadata_sql(rows, available_seeds, metadata_sql_query)
3843
- if metadata_filters:
3844
- available_seeds = _apply_metadata_filters(rows, available_seeds, metadata_filters)
3845
- if not available_seeds:
3846
- raise click.ClickException("No seeds match the provided metadata filters")
3847
- seed_values = available_seeds
3848
-
3849
- semaphore = asyncio.Semaphore(concurrency_limit)
3850
-
3851
- async def _run_seed(seed_val: int) -> None:
3852
- nonlocal successes, failures, outcome_sum, outcome_count, outcome_correct, records
3853
- # Read env_name and policy_name from config if available
3854
- env_name = cfg.get("env_name") or (cfg.get("env", {}).get("env_name") if isinstance(cfg.get("env"), dict) else None)
3855
- policy_name = cfg.get("policy_name") or (cfg.get("policy", {}).get("policy_name") if isinstance(cfg.get("policy"), dict) else None)
3856
- env_config_overrides = cfg.get("env_config", {}) if isinstance(cfg.get("env_config"), dict) else {}
3857
- policy_config_overrides = cfg.get("policy_config", {}) if isinstance(cfg.get("policy_config"), dict) else {}
3858
-
3859
- # Debug: print config parsing
3860
- if seed_val == 0:
3861
- click.echo(f"[DEBUG] env_name from config: {env_name}")
3862
- click.echo(f"[DEBUG] policy_name from config: {policy_name}")
3863
-
3864
- # Generate default ops sequence if not provided
3865
- max_llm_calls = policy_config_overrides.get("max_llm_calls", 10)
3866
- ops_list = cfg.get("ops", [])
3867
- if not ops_list:
3868
- # Generate default "agent, env" pairs for max_llm_calls
3869
- ops_list = ["agent", "env"] * int(max_llm_calls)
3870
-
3871
- body = {
3872
- "run_id": str(uuid.uuid4()),
3873
- "env": {"config": {"split": split, "index": seed_val, **env_config_overrides}, "seed": seed_val},
3874
- "policy": {
3875
- "policy_name": policy_name or selected_model,
3876
- "config": {"model": selected_model, **policy_overrides, **policy_config_overrides},
3877
- },
3878
- "ops": ops_list,
3879
- "record": {
3880
- "return_trace": cfg.get("return_trace", True),
3881
- "trace_format": cfg.get("trace_format", "structured"),
3882
- },
3883
- "mode": "eval", # RolloutMode.EVAL: use inference URLs as-is, no transformations
3884
- }
3885
- if env_name:
3886
- body["env"]["env_name"] = env_name
3887
-
3888
- # Debug: print the body being sent
3889
- if seed_val == 0:
3890
- click.echo(f"[DEBUG] rollout body env: {body['env']}")
3891
- click.echo(f"[DEBUG] rollout body policy: {body['policy']}")
3892
- click.echo(f"[DEBUG] rollout body mode: {body.get('mode', 'NOT SET')}")
3893
- rollout_elapsed: float | None = None
3894
- rollout_start = time.perf_counter()
3895
- try:
3896
- import logging
3897
- _log = logging.getLogger(__name__)
3898
- _log.info(f"[EVAL_BODY_DEBUG] Sending body with mode={body.get('mode')}")
3899
- async with semaphore:
3900
- response = await async_client.post("/rollout", json=body)
3901
- rollout_elapsed = time.perf_counter() - rollout_start
3902
- except Exception as exc:
3903
- failures += 1
3904
- click.echo(f"seed={seed_val} error={exc}")
3905
- return
3906
-
3907
- ok = 200 <= response.status_code < 300
3908
- if ok:
3909
- successes += 1
3910
- else:
3911
- failures += 1
3912
-
3913
- summary = [f"seed={seed_val}", f"status={response.status_code}"]
3914
- data: Any
3915
- try:
3916
- data = response.json()
3917
- except Exception:
3918
- data = None
3919
-
3920
- # Debug: print validation errors
3921
- if response.status_code == 422 and data:
3922
- click.echo(f"[DEBUG] 422 Validation Error: {data}")
3923
-
3924
- metrics: dict[str, Any] | None = None
3925
- completion: str | None = None
3926
- prompt_index: int | None = None
3927
- prompt_text: str | None = None
3928
- task_id: str | None = None
3929
- task_split: str | None = None
3930
- task_rubric_id: str | None = None
3931
-
3932
- trace_namespace: dict[str, Any] | None = None
3933
- session_trace_dict: dict[str, Any] | None = None
3934
-
3935
- if isinstance(data, dict):
3936
- import logging
3937
- _logger = logging.getLogger(__name__)
3938
- _logger.info(f"[EVAL_DEBUG] Response data keys: {list(data.keys())}")
3939
- if "detail" in data:
3940
- _logger.error(f"[EVAL_DEBUG] Task app returned error: {data['detail']}")
3941
- trace_namespace = data.get("trace")
3942
- _logger.info(f"[EVAL_DEBUG] trace_namespace type: {type(trace_namespace)}, value: {trace_namespace if not isinstance(trace_namespace, dict) else 'dict with keys: ' + str(list(trace_namespace.keys()) if trace_namespace else 'None')}")
3943
- if not isinstance(trace_namespace, dict):
3944
- raise RuntimeError(
3945
- "The 'synth-ai eval' command requires trace payloads in rollout responses. "
3946
- "Ensure the rollout request includes 'trace_format': 'structured' and 'return_trace': true, "
3947
- "and that task app tracing is enabled (TASKAPP_TRACING_ENABLED=1). "
3948
- "Note: This is specific to the eval command - general rollout endpoints don't require traces."
3949
- )
3950
- # Handle both "compact" and "full" trace formats:
3951
- # - compact: trace_namespace contains {session_id, metadata, ...}
3952
- # - full: trace_namespace IS the full session_trace dict
3953
- session_trace_dict = trace_namespace.get("session_trace")
3954
- if not isinstance(session_trace_dict, dict):
3955
- # If no session_trace key, assume "full" format where trace itself is the session_trace
3956
- if "session_id" in trace_namespace:
3957
- session_trace_dict = trace_namespace
3958
- else:
3959
- raise RuntimeError(
3960
- "The 'synth-ai eval' command requires 'session_trace' in the trace payload or a valid full trace format. "
3961
- "Ensure the task app is using tracing_v3 and returning structured trace data."
3962
- )
3963
- metrics = data.get("metrics") if isinstance(data.get("metrics"), dict) else None
3964
- if metrics:
3965
- mean_return = metrics.get("mean_return") or metrics.get("total_reward")
3966
- outcome = metrics.get("outcome_score")
3967
- if mean_return is not None:
3968
- summary.append(f"mean_return={mean_return}")
3969
- if outcome is not None:
3970
- summary.append(f"outcome={outcome}")
3971
- try:
3972
- val = float(outcome)
3973
- outcome_sum += val
3974
- outcome_count += 1
3975
- if val >= 0.5:
3976
- outcome_correct += 1
3977
- except Exception:
3978
- pass
3979
- trajs = (
3980
- data.get("trajectories")
3981
- if isinstance(data.get("trajectories"), list)
3982
- else None
3983
- )
3984
- if trajs:
3985
- first = trajs[0] if trajs else None
3986
- steps = first.get("steps") if isinstance(first, dict) else None
3987
- if isinstance(steps, list) and steps:
3988
- step0 = steps[0]
3989
- tool_calls = step0.get("tool_calls") or step0.get("tools") or []
3990
- if isinstance(tool_calls, list):
3991
- summary.append(f"tool_calls={len(tool_calls)}")
3992
- obs = step0.get("obs") if isinstance(step0, dict) else None
3993
- if isinstance(obs, dict):
3994
- idx_val = obs.get("prompt_index")
3995
- if isinstance(idx_val, int):
3996
- prompt_index = idx_val
3997
- prompt_raw = obs.get("prompt")
3998
- if isinstance(prompt_raw, str):
3999
- prompt_text = prompt_raw
4000
- if task_id is None:
4001
- candidate_id = obs.get("task_id")
4002
- if isinstance(candidate_id, str) and candidate_id:
4003
- task_id = candidate_id
4004
- if task_split is None:
4005
- candidate_split = obs.get("task_split")
4006
- if isinstance(candidate_split, str) and candidate_split:
4007
- task_split = candidate_split
4008
- if task_rubric_id is None:
4009
- candidate_rid = obs.get("task_rubric_id")
4010
- if isinstance(candidate_rid, str) and candidate_rid:
4011
- task_rubric_id = candidate_rid
4012
- final = first.get("final") if isinstance(first, dict) else None
4013
- if isinstance(final, dict):
4014
- final_obs = final.get("observation")
4015
- if isinstance(final_obs, dict):
4016
- comp_val = final_obs.get("completion")
4017
- if isinstance(comp_val, str):
4018
- completion = comp_val
4019
- if task_id is None:
4020
- candidate_id = final_obs.get("task_id")
4021
- if isinstance(candidate_id, str) and candidate_id:
4022
- task_id = candidate_id
4023
- if task_split is None:
4024
- candidate_split = final_obs.get("task_split")
4025
- if isinstance(candidate_split, str) and candidate_split:
4026
- task_split = candidate_split
4027
- if task_rubric_id is None:
4028
- candidate_rid = final_obs.get("task_rubric_id")
4029
- if isinstance(candidate_rid, str) and candidate_rid:
4030
- task_rubric_id = candidate_rid
4031
- final_info = final.get("info")
4032
- if isinstance(final_info, dict):
4033
- if task_id is None:
4034
- candidate_id = final_info.get("task_id")
4035
- if isinstance(candidate_id, str) and candidate_id:
4036
- task_id = candidate_id
4037
- if task_split is None:
4038
- candidate_split = final_info.get("task_split")
4039
- if isinstance(candidate_split, str) and candidate_split:
4040
- task_split = candidate_split
4041
- if task_rubric_id is None:
4042
- candidate_rid = final_info.get("task_rubric_id")
4043
- if isinstance(candidate_rid, str) and candidate_rid:
4044
- task_rubric_id = candidate_rid
4045
- if task_id:
4046
- summary.append(f"task_id={task_id}")
4047
- click.echo(" ".join(summary))
4048
- with contextlib.suppress(Exception):
4049
- click.echo(json.dumps(data, indent=2))
4050
- else:
4051
- click.echo(" ".join(summary))
4052
-
4053
- official_score = None
4054
- if isinstance(metrics, dict):
4055
- for key in ("mean_return", "total_reward", "outcome_score"):
4056
- val = metrics.get(key)
4057
- if isinstance(val, int | float):
4058
- official_score = float(val)
4059
- break
4060
- if official_score is None and isinstance(data, dict):
4061
- try:
4062
- reward_val = data["trajectories"][0]["steps"][0].get("reward")
4063
- if isinstance(reward_val, int | float):
4064
- official_score = float(reward_val)
4065
- except Exception:
4066
- pass
4067
-
4068
- if official_score is not None:
4069
- if official_score < 0.0:
4070
- official_score = 0.0
4071
- elif official_score > 1.0:
4072
- official_score = min(1.0, official_score)
4073
-
4074
- judge_scores: dict[str, float | None] = {}
4075
- judges_timings: dict[str, float | None] = {}
4076
- timings: dict[str, Any] = {
4077
- "rollout_s": rollout_elapsed,
4078
- "judges": judges_timings,
4079
- }
4080
- if judge_specs:
4081
- for spec in judge_specs:
4082
- score_value: float | None = None
4083
- judge_elapsed: float | None = None
4084
- # Run judges for all tasks (text-based and trajectory-based)
4085
- # Text-based tasks have completion, trajectory-based tasks use response
4086
- judge_payload = {
4087
- "seed": seed_val,
4088
- "prompt_index": prompt_index,
4089
- "prompt": prompt_text,
4090
- "completion": completion,
4091
- "metrics": metrics,
4092
- "response": data,
4093
- "trace": trace_namespace,
4094
- }
4095
- try:
4096
- judge_start = time.perf_counter()
4097
- result = spec.fn(judge_payload, **spec.kwargs)
4098
- judge_elapsed = time.perf_counter() - judge_start
4099
- if isinstance(result, int | float):
4100
- score_value = float(result)
4101
- except Exception as exc:
4102
- if judge_elapsed is None:
4103
- judge_elapsed = time.perf_counter() - judge_start
4104
- click.echo(f"seed={seed_val} judge[{spec.name}]_error={exc}")
4105
- judges_timings[spec.name] = judge_elapsed
4106
- judge_scores[spec.name] = score_value
4107
-
4108
- if trace_tracer is not None and trace_namespace:
4109
- storage_metadata = {
4110
- "eval_seed": seed_val,
4111
- "prompt_index": prompt_index,
4112
- "task_id": task_id,
4113
- "task_split": task_split,
4114
- "task_rubric_id": task_rubric_id,
4115
- "official_score": official_score,
4116
- "judge_scores": judge_scores,
4117
- "model": selected_model,
4118
- "prompt": prompt_text,
4119
- "completion": completion,
4120
- }
4121
- await _store_trace(trace_tracer, trace_namespace, storage_metadata)
4122
-
4123
- records.append(
4124
- {
4125
- "seed": seed_val,
4126
- "prompt_index": prompt_index,
4127
- "task_id": task_id,
4128
- "task_split": task_split,
4129
- "task_rubric_id": task_rubric_id,
4130
- "official_score": official_score,
4131
- "judge_scores": judge_scores,
4132
- "timings": timings,
4133
- }
4134
- )
4135
-
4136
- await asyncio.gather(*[_run_seed(seed_val) for seed_val in seed_values])
4137
- finally:
4138
- await async_client.aclose()
4139
-
4140
- try:
4141
- asyncio.run(_run_eval())
4142
- finally:
4143
- if trace_tracer is not None and trace_tracer.db is not None:
4144
- asyncio.run(trace_tracer.db.close())
4145
-
4146
- click.echo(
4147
- f"Eval complete: {successes} ok, {failures} failed; model={selected_model}, split={split}"
4148
- )
4149
-
4150
- if outcome_count > 0:
4151
- mean_outcome = outcome_sum / float(outcome_count)
4152
- frac_right = outcome_correct / float(outcome_count)
4153
- click.echo(
4154
- f"Outcome summary: correct={outcome_correct}/{outcome_count} ({frac_right:.2%}), mean_outcome={mean_outcome:.3f}"
4155
- )
4156
-
4157
- if records:
4158
- judge_specs = judge_specs or [] # ensure iterable
4159
- official_scores = [
4160
- r["official_score"] for r in records if r["official_score"] is not None
4161
- ]
4162
- if official_scores:
4163
- click.echo(f" Official mean: {sum(official_scores) / len(official_scores):.3f}")
4164
- else:
4165
- click.echo(" Official mean: n/a")
4166
-
4167
- for spec in judge_specs:
4168
- spec_scores = [
4169
- record["judge_scores"].get(spec.name)
4170
- for record in records
4171
- if record["judge_scores"].get(spec.name) is not None
4172
- ]
4173
- if spec_scores:
4174
- mean_spec = sum(spec_scores) / len(spec_scores)
4175
- click.echo(f" [{spec.name}] mean: {mean_spec:.3f}")
4176
- else:
4177
- click.echo(f" [{spec.name}] mean: n/a")
4178
-
4179
- paired = [
4180
- (
4181
- record["official_score"],
4182
- record["judge_scores"].get(spec.name),
4183
- )
4184
- for record in records
4185
- if record["official_score"] is not None
4186
- and record["judge_scores"].get(spec.name) is not None
4187
- ]
4188
- if len(paired) >= 2:
4189
- corr = _pearson(
4190
- [p[0] for p in paired if p[0] is not None],
4191
- [p[1] for p in paired if p[1] is not None],
4192
- )
4193
- if corr is not None:
4194
- click.echo(f" Pearson r: {corr:.3f}")
4195
- else:
4196
- click.echo(" Pearson r: undefined (zero variance)")
4197
- else:
4198
- click.echo(" Pearson r: n/a (need ≥2 paired scores)")
4199
-
4200
- header = ["Seed", "Prompt", "Official"]
4201
- header.extend(spec.name for spec in judge_specs)
4202
- rows: list[list[str]] = []
4203
- for record in sorted(records, key=lambda r: (r["seed"], r.get("prompt_index") or -1)):
4204
- seed_val = str(record["seed"])
4205
- prompt_idx = (
4206
- str(record["prompt_index"])
4207
- if record["prompt_index"] is not None
4208
- else "-"
4209
- )
4210
- official_val = (
4211
- f"{record['official_score']:.3f}"
4212
- if record["official_score"] is not None
4213
- else "-"
4214
- )
4215
- row = [seed_val, prompt_idx, official_val]
4216
- for spec in judge_specs:
4217
- score_val = record["judge_scores"].get(spec.name)
4218
- row.append(f"{score_val:.3f}" if isinstance(score_val, int | float) else "-")
4219
- rows.append(row)
4220
-
4221
- widths = [len(col) for col in header]
4222
- for row in rows:
4223
- for idx, cell in enumerate(row):
4224
- widths[idx] = max(widths[idx], len(cell))
4225
-
4226
- click.echo("")
4227
- click.echo(" ".join(h.ljust(widths[idx]) for idx, h in enumerate(header)))
4228
- click.echo(" ".join("-" * widths[idx] for idx in range(len(header))))
4229
- for row in rows:
4230
- click.echo(" ".join(cell.ljust(widths[idx]) for idx, cell in enumerate(row)))
4231
-
4232
-
4233
-
4234
- @click.command(
4235
- "filter",
4236
- help="Export filtered tracing sessions to SFT-ready JSONL based on a TOML config.",
4237
- )
4238
- @click.option(
4239
- "--config",
4240
- "config_path",
4241
- type=click.Path(),
4242
- required=True,
4243
- help="Path to TOML config describing the input trace DB, score thresholds, and output JSONL.",
4244
- )
4245
- def filter_command(config_path: str) -> None:
4246
- """Render tracing sessions that match filter rules into SFT JSONL.
4247
-
4248
- The TOML file should contain a `[filter]` table with at least:
4249
-
4250
- db = \"path/to/traces.db\" # sqlite path or URL (sqlite+aiosqlite://...)
4251
- output = \"ft_data/out.jsonl\" # destination JSONL
4252
-
4253
- Optional keys such as `splits`, `task_ids`, `models`, `min_official_score`, or
4254
- `min_judge_scores.my_judge = 0.7` allow you to narrow the dataset down to
4255
- high-quality traces. See `customers/agora_single_file/configs/filter_local.toml`
4256
- for a working example.
4257
- """
4258
- # Parse and validate TOML config
4259
- from synth_ai.task.config import FilterConfig
4260
-
4261
- if _toml is None:
4262
- raise click.ClickException("TOML parser not available; install tomli or use Python 3.11+")
4263
-
4264
- cfg_path = Path(config_path)
4265
- if not cfg_path.exists():
4266
- raise click.ClickException(f"Filter config not found: {cfg_path}")
4267
-
4268
- try:
4269
- config_data = _toml.loads(cfg_path.read_text(encoding="utf-8"))
4270
- except Exception as exc:
4271
- raise click.ClickException(f"Failed to parse TOML '{cfg_path}': {exc}") from exc
4272
-
4273
- filter_cfg_dict = config_data.get("filter") if isinstance(config_data, dict) else None
4274
- if not isinstance(filter_cfg_dict, dict):
4275
- raise click.ClickException("Config must contain a [filter] table")
4276
-
4277
- # Validate config with dataclass
4278
- try:
4279
- filter_cfg = FilterConfig.from_dict(filter_cfg_dict)
4280
- click.echo(f"✓ Config validated: db={filter_cfg.db}, output={filter_cfg.output}")
4281
- if filter_cfg.min_official_score is not None:
4282
- click.echo(f" → Filtering for official score >= {filter_cfg.min_official_score}")
4283
- if filter_cfg.limit:
4284
- click.echo(f" → Limiting to {filter_cfg.limit} examples")
4285
- except (ValueError, TypeError) as validation_error:
4286
- raise click.ClickException(f"Invalid filter config: {validation_error}") from validation_error
4287
-
4288
- # Use validated config
4289
- db_url = filter_cfg.get_db_url()
4290
- output_path = filter_cfg.get_output_path()
4291
-
4292
- # Extract validated fields from dataclass
4293
- splits = set(filter_cfg.splits)
4294
- task_ids = set(filter_cfg.task_ids)
4295
- models = set(filter_cfg.models)
4296
- min_official = filter_cfg.min_official_score
4297
- max_official = filter_cfg.max_official_score
4298
- min_judge_scores = filter_cfg.min_judge_scores
4299
- max_judge_scores = filter_cfg.max_judge_scores
4300
- # Note: min_created_at and max_created_at not yet in FilterConfig dataclass
4301
- min_created = _parse_datetime_for_trace(filter_cfg_dict.get("min_created_at"))
4302
- max_created = _parse_datetime_for_trace(filter_cfg_dict.get("max_created_at"))
4303
- limit = filter_cfg.limit
4304
-
4305
- def _score_ok(value: Any, min_val: Any, max_val: Any) -> bool:
4306
- try:
4307
- if value is None:
4308
- return min_val is None
4309
- value = float(value)
4310
- except Exception:
4311
- return False
4312
- if min_val is not None and value < float(min_val):
4313
- return False
4314
- return not (max_val is not None and value > float(max_val))
4315
-
4316
- async def _run_filter() -> None:
4317
- tracer = SessionTracer(db_url=db_url, auto_save=False)
4318
- await tracer.initialize()
4319
-
4320
- df = await tracer.db.query_traces(
4321
- "SELECT session_id, created_at, metadata FROM session_traces ORDER BY created_at"
4322
- )
4323
- if getattr(df, "empty", True):
4324
- raise click.ClickException("No traces found in database")
4325
-
4326
- sessions = df.to_dict("records")
4327
- accepted: list[dict[str, Any]] = []
4328
-
4329
- for row in sessions:
4330
- metadata_raw = row.get("metadata")
4331
- if isinstance(metadata_raw, str):
4332
- try:
4333
- metadata = json.loads(metadata_raw)
4334
- except Exception:
4335
- metadata = {}
4336
- elif isinstance(metadata_raw, dict):
4337
- metadata = dict(metadata_raw)
4338
- else:
4339
- metadata = {}
4340
-
4341
- created_at_raw = row.get("created_at")
4342
- created_at_dt = _parse_datetime_for_trace(created_at_raw)
4343
-
4344
- session_id = row.get("session_id")
4345
-
4346
- if splits and metadata.get("task_split") not in splits:
4347
- continue
4348
- if task_ids and metadata.get("task_id") not in task_ids:
4349
- continue
4350
- if models and metadata.get("model") not in models:
4351
- continue
4352
-
4353
- if min_created and (created_at_dt is None or created_at_dt < min_created):
4354
- continue
4355
- if max_created and (created_at_dt is None or created_at_dt > max_created):
4356
- continue
4357
-
4358
- # Check against outcome_rewards if score filter is set
4359
- total_reward = None
4360
- achievements_count = None
4361
- if min_official is not None or max_official is not None:
4362
- reward_query = "SELECT total_reward, achievements_count FROM outcome_rewards WHERE session_id = :session_id"
4363
- reward_rows = await tracer.db.query_traces(reward_query, {"session_id": session_id})
4364
- reward_records = reward_rows.to_dict("records") if hasattr(reward_rows, "to_dict") else []
4365
- if reward_records:
4366
- total_reward = reward_records[0].get("total_reward")
4367
- achievements_count = reward_records[0].get("achievements_count")
4368
- if not _score_ok(total_reward, min_official, max_official):
4369
- continue
4370
- elif min_official is not None:
4371
- # No reward found, but score filter requires it
4372
- continue
4373
-
4374
- judge_scores = metadata.get("judge_scores") or {}
4375
- include = True
4376
- for judge_name, threshold in (min_judge_scores or {}).items():
4377
- if not _score_ok(judge_scores.get(judge_name), threshold, None):
4378
- include = False
4379
- break
4380
- if not include:
4381
- continue
4382
- for judge_name, threshold in (max_judge_scores or {}).items():
4383
- if not _score_ok(judge_scores.get(judge_name), None, threshold):
4384
- include = False
4385
- break
4386
- if not include:
4387
- continue
4388
-
4389
- # Query messages for this session
4390
- messages_query = """
4391
- SELECT message_type, content, timestamp
4392
- FROM messages
4393
- WHERE session_id = :session_id
4394
- ORDER BY timestamp ASC, id ASC
4395
- """
4396
- msg_df = await tracer.db.query_traces(messages_query, {"session_id": session_id})
4397
- message_rows = msg_df.to_dict("records") if hasattr(msg_df, "to_dict") else []
4398
-
4399
- if not message_rows:
4400
- # Fallback: check if prompt/completion in metadata (old format)
4401
- prompt = metadata.get("prompt") or ""
4402
- completion = metadata.get("completion") or ""
4403
- if prompt and completion:
4404
- record = {
4405
- "messages": [
4406
- {"role": "user", "content": str(prompt)},
4407
- {"role": "assistant", "content": str(completion)},
4408
- ],
4409
- "metadata": {
4410
- "session_id": session_id,
4411
- "env_name": metadata.get("env_name"),
4412
- "policy_name": metadata.get("policy_name"),
4413
- "seed": metadata.get("seed"),
4414
- "total_reward": total_reward,
4415
- "achievements_count": achievements_count,
4416
- "model": metadata.get("model"),
4417
- "created_at": created_at_dt.isoformat() if created_at_dt else created_at_raw,
4418
- },
4419
- }
4420
- accepted.append(record)
4421
- continue
4422
-
4423
- # Extract user/assistant pairs from messages
4424
- for i, msg_row in enumerate(message_rows):
4425
- msg_type = msg_row.get("message_type")
4426
- content_raw = msg_row.get("content")
4427
-
4428
- # Look for user message
4429
- if msg_type in ("user", "policy_user_prompt"):
4430
- # Find next policy_system_prompt or assistant
4431
- assistant_msg = None
4432
- for j in range(i + 1, len(message_rows)):
4433
- next_type = message_rows[j].get("message_type")
4434
- if next_type in ("assistant", "policy_system_prompt"):
4435
- if next_type == "assistant":
4436
- assistant_msg = message_rows[j]
4437
- break
4438
-
4439
- # Parse content
4440
- try:
4441
- user_content = json.loads(content_raw) if isinstance(content_raw, str) else content_raw
4442
- except Exception:
4443
- user_content = content_raw
4444
-
4445
- # Extract text from structured content
4446
- def extract_text(content: Any) -> str:
4447
- if isinstance(content, str):
4448
- return content
4449
- if isinstance(content, dict):
4450
- # Try payload.content for user prompts
4451
- if "payload" in content and isinstance(content["payload"], dict):
4452
- payload = content["payload"]
4453
- if "content" in payload:
4454
- return extract_text(payload["content"])
4455
- # Try common keys
4456
- for key in ["text", "content", "content_text"]:
4457
- if key in content:
4458
- val = content[key]
4459
- if isinstance(val, str):
4460
- return val
4461
- return json.dumps(content)
4462
- if isinstance(content, list):
4463
- # Multimodal content - concatenate text parts
4464
- parts = []
4465
- for item in content:
4466
- if isinstance(item, dict) and item.get("type") == "text":
4467
- parts.append(item.get("text", ""))
4468
- return " ".join(parts) if parts else str(content)
4469
- return str(content)
4470
-
4471
- user_text = extract_text(user_content)
4472
-
4473
- # For assistant, we might not have it recorded, so use tool calls as completion
4474
- assistant_text = ""
4475
- if assistant_msg:
4476
- assistant_content_raw = assistant_msg.get("content")
4477
- try:
4478
- assistant_content = json.loads(assistant_content_raw) if isinstance(assistant_content_raw, str) else assistant_content_raw
4479
- except Exception:
4480
- assistant_content = assistant_content_raw
4481
- assistant_text = extract_text(assistant_content)
4482
-
4483
- if not user_text:
4484
- continue
4485
-
4486
- record = {
4487
- "messages": [
4488
- {"role": "user", "content": user_text},
4489
- {"role": "assistant", "content": assistant_text if assistant_text else "[no response recorded]"},
4490
- ],
4491
- "metadata": {
4492
- "session_id": session_id,
4493
- "env_name": metadata.get("env_name"),
4494
- "policy_name": metadata.get("policy_name"),
4495
- "seed": metadata.get("seed"),
4496
- "total_reward": total_reward,
4497
- "achievements_count": achievements_count,
4498
- "model": metadata.get("model"),
4499
- "created_at": created_at_dt.isoformat() if created_at_dt else created_at_raw,
4500
- },
4501
- }
4502
- accepted.append(record)
4503
-
4504
- if not accepted:
4505
- raise click.ClickException("No sessions matched the provided filters")
4506
-
4507
- if limit is not None and limit > 0:
4508
- accepted = accepted[:limit]
4509
-
4510
- output_path.parent.mkdir(parents=True, exist_ok=True)
4511
- with output_path.open("w", encoding="utf-8") as handle:
4512
- for item in accepted:
4513
- handle.write(json.dumps(item, ensure_ascii=False))
4514
- handle.write("\n")
4515
-
4516
- click.echo(f"Wrote {len(accepted)} examples -> {output_path}")
4517
- await tracer.db.close()
4518
-
4519
- asyncio.run(_run_filter())
4520
-
4521
-
4522
- def register_eval(cli: click.Group) -> None:
4523
- cli.add_command(eval_command)