synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1086) hide show
  1. synth_ai/__init__.py +25 -46
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +98 -72
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +258 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +51 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
  63. synth_ai/cli/commands/train/validation.py +392 -0
  64. synth_ai/cli/commands/train/verifier_schemas.py +200 -0
  65. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  70. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  71. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  72. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  73. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  75. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  79. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  80. synth_ai/cli/demo_apps/math/_common.py +16 -0
  81. synth_ai/cli/demo_apps/math/app.py +38 -0
  82. synth_ai/cli/demo_apps/math/config.toml +75 -0
  83. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  84. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  85. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  86. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  87. synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
  88. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  89. synth_ai/cli/demos/__init__.py +12 -0
  90. synth_ai/cli/demos/demo.py +32 -0
  91. synth_ai/cli/demos/rl_demo.py +254 -0
  92. synth_ai/cli/deploy.py +216 -0
  93. synth_ai/cli/infra/__init__.py +14 -0
  94. synth_ai/cli/infra/balance.py +216 -0
  95. synth_ai/cli/infra/mcp.py +35 -0
  96. synth_ai/cli/infra/modal_app.py +36 -0
  97. synth_ai/cli/infra/setup.py +69 -0
  98. synth_ai/cli/infra/status.py +16 -0
  99. synth_ai/cli/infra/turso.py +77 -0
  100. synth_ai/cli/lib/__init__.py +10 -0
  101. synth_ai/cli/lib/agents.py +76 -0
  102. synth_ai/cli/lib/apps/modal_app.py +101 -0
  103. synth_ai/cli/lib/apps/task_app.py +642 -0
  104. synth_ai/cli/lib/bin.py +39 -0
  105. synth_ai/cli/lib/env.py +375 -0
  106. synth_ai/cli/lib/errors.py +85 -0
  107. synth_ai/cli/lib/modal.py +315 -0
  108. synth_ai/cli/lib/plotting.py +126 -0
  109. synth_ai/cli/lib/prompt_args.py +39 -0
  110. synth_ai/cli/lib/prompts.py +284 -0
  111. synth_ai/cli/lib/sqld.py +122 -0
  112. synth_ai/cli/lib/task_app_discovery.py +884 -0
  113. synth_ai/cli/lib/task_app_env.py +295 -0
  114. synth_ai/cli/lib/train_cfgs.py +300 -0
  115. synth_ai/cli/lib/tunnel_records.py +207 -0
  116. synth_ai/cli/local/__init__.py +14 -0
  117. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  118. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  119. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  120. synth_ai/cli/local/experiment_queue/config.py +128 -0
  121. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  122. synth_ai/cli/local/experiment_queue/database.py +175 -0
  123. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  124. synth_ai/cli/local/experiment_queue/models.py +231 -0
  125. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  126. synth_ai/cli/local/experiment_queue/results.py +373 -0
  127. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  128. synth_ai/cli/local/experiment_queue/service.py +344 -0
  129. synth_ai/cli/local/experiment_queue/status.py +372 -0
  130. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  131. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  132. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  133. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  134. synth_ai/cli/local/session/__init__.py +92 -0
  135. synth_ai/cli/local/session/client.py +383 -0
  136. synth_ai/cli/local/session/constants.py +63 -0
  137. synth_ai/cli/local/session/exceptions.py +105 -0
  138. synth_ai/cli/local/session/manager.py +139 -0
  139. synth_ai/cli/local/session/models.py +89 -0
  140. synth_ai/cli/local/session/query.py +110 -0
  141. synth_ai/cli/root.py +30 -6
  142. synth_ai/cli/task_apps/__init__.py +37 -0
  143. synth_ai/cli/task_apps/commands.py +3145 -0
  144. synth_ai/cli/task_apps/deploy.py +7 -0
  145. synth_ai/cli/task_apps/list.py +26 -0
  146. synth_ai/cli/task_apps/main.py +36 -0
  147. synth_ai/cli/task_apps/modal_serve.py +11 -0
  148. synth_ai/cli/task_apps/serve.py +11 -0
  149. synth_ai/cli/training/__init__.py +8 -0
  150. synth_ai/cli/training/train.py +5 -0
  151. synth_ai/cli/training/train_cfg.py +34 -0
  152. synth_ai/cli/training/watch.py +506 -0
  153. synth_ai/cli/turso.py +34 -55
  154. synth_ai/cli/utils/__init__.py +8 -0
  155. synth_ai/cli/utils/experiments.py +235 -0
  156. synth_ai/cli/utils/queue.py +504 -0
  157. synth_ai/cli/utils/recent.py +133 -0
  158. synth_ai/cli/utils/traces.py +164 -0
  159. synth_ai/contracts/__init__.py +67 -0
  160. synth_ai/core/__init__.py +100 -0
  161. synth_ai/core/_utils/__init__.py +54 -0
  162. synth_ai/core/_utils/base_url.py +10 -0
  163. synth_ai/core/_utils/http.py +10 -0
  164. synth_ai/core/_utils/prompts.py +14 -0
  165. synth_ai/core/_utils/task_app_state.py +12 -0
  166. synth_ai/core/_utils/user_config.py +10 -0
  167. synth_ai/core/apps/common.py +116 -0
  168. synth_ai/core/auth.py +95 -0
  169. synth_ai/core/cfgs.py +240 -0
  170. synth_ai/core/config/__init__.py +16 -0
  171. synth_ai/core/config/base.py +168 -0
  172. synth_ai/core/config/resolver.py +89 -0
  173. synth_ai/core/env.py +231 -0
  174. synth_ai/core/errors.py +125 -0
  175. synth_ai/core/http.py +230 -0
  176. synth_ai/core/integrations/__init__.py +11 -0
  177. synth_ai/core/integrations/cloudflare.py +1886 -0
  178. synth_ai/core/integrations/mcp/__init__.py +6 -0
  179. synth_ai/core/integrations/mcp/__main__.py +8 -0
  180. synth_ai/core/integrations/mcp/claude.py +36 -0
  181. synth_ai/core/integrations/mcp/main.py +254 -0
  182. synth_ai/core/integrations/mcp/setup.py +100 -0
  183. synth_ai/core/integrations/modal.py +277 -0
  184. synth_ai/core/json.py +72 -0
  185. synth_ai/core/log_filter.py +99 -0
  186. synth_ai/core/logging.py +82 -0
  187. synth_ai/core/paths.py +107 -0
  188. synth_ai/core/pricing.py +109 -0
  189. synth_ai/core/process.py +233 -0
  190. synth_ai/core/ssl.py +25 -0
  191. synth_ai/core/storage/__init__.py +71 -0
  192. synth_ai/core/task_app_state.py +318 -0
  193. synth_ai/core/telemetry.py +282 -0
  194. synth_ai/core/tracing_v3/__init__.py +99 -0
  195. synth_ai/core/tracing_v3/abstractions.py +348 -0
  196. synth_ai/core/tracing_v3/config.py +229 -0
  197. synth_ai/core/tracing_v3/constants.py +21 -0
  198. synth_ai/core/tracing_v3/db_config.py +182 -0
  199. synth_ai/core/tracing_v3/decorators.py +401 -0
  200. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  201. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  202. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  203. synth_ai/core/tracing_v3/storage/base.py +211 -0
  204. synth_ai/core/tracing_v3/storage/config.py +109 -0
  205. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  206. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  207. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  208. synth_ai/core/tracing_v3/turso/models.py +470 -0
  209. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  210. synth_ai/core/tracing_v3/utils.py +108 -0
  211. synth_ai/core/urls.py +18 -0
  212. synth_ai/core/user_config.py +137 -0
  213. synth_ai/core/uvicorn.py +222 -0
  214. synth_ai/data/__init__.py +83 -0
  215. synth_ai/data/enums.py +122 -0
  216. synth_ai/data/rewards.py +249 -0
  217. synth_ai/data/traces.py +35 -0
  218. synth_ai/products/__init__.py +6 -0
  219. synth_ai/products/graph_evolve/__init__.py +45 -0
  220. synth_ai/products/graph_evolve/client.py +226 -0
  221. synth_ai/products/graph_evolve/config.py +591 -0
  222. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  223. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  224. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  225. synth_ai/products/graph_evolve/run.py +222 -0
  226. synth_ai/products/graph_gepa/__init__.py +23 -0
  227. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  228. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  229. synth_ai/sdk/__init__.py +129 -0
  230. synth_ai/sdk/api/__init__.py +1 -0
  231. synth_ai/sdk/api/eval/__init__.py +33 -0
  232. synth_ai/sdk/api/eval/job.py +732 -0
  233. synth_ai/sdk/api/models/supported.py +514 -0
  234. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  235. synth_ai/sdk/api/train/__init__.py +85 -0
  236. synth_ai/sdk/api/train/builders.py +1076 -0
  237. synth_ai/sdk/api/train/cli.py +2196 -0
  238. synth_ai/sdk/api/train/config_finder.py +267 -0
  239. synth_ai/sdk/api/train/configs/__init__.py +67 -0
  240. synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
  241. synth_ai/sdk/api/train/configs/rl.py +436 -0
  242. synth_ai/sdk/api/train/configs/sft.py +263 -0
  243. synth_ai/sdk/api/train/configs/shared.py +81 -0
  244. synth_ai/sdk/api/train/context_learning.py +312 -0
  245. synth_ai/sdk/api/train/env_resolver.py +418 -0
  246. synth_ai/sdk/api/train/graph_validators.py +216 -0
  247. synth_ai/sdk/api/train/graphgen.py +1102 -0
  248. synth_ai/sdk/api/train/graphgen_models.py +873 -0
  249. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  250. synth_ai/sdk/api/train/local_api.py +10 -0
  251. synth_ai/sdk/api/train/pollers.py +160 -0
  252. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  253. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  254. synth_ai/sdk/api/train/progress/events.py +326 -0
  255. synth_ai/sdk/api/train/progress/results.py +428 -0
  256. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  257. synth_ai/sdk/api/train/prompt_learning.py +800 -0
  258. synth_ai/sdk/api/train/rl.py +478 -0
  259. synth_ai/sdk/api/train/sft.py +398 -0
  260. synth_ai/sdk/api/train/summary.py +522 -0
  261. synth_ai/sdk/api/train/supported_algos.py +147 -0
  262. synth_ai/sdk/api/train/task_app.py +351 -0
  263. synth_ai/sdk/api/train/utils.py +279 -0
  264. synth_ai/sdk/api/train/validators.py +2424 -0
  265. synth_ai/sdk/graphs/__init__.py +15 -0
  266. synth_ai/sdk/graphs/completions.py +776 -0
  267. synth_ai/sdk/graphs/verifier_schemas.py +222 -0
  268. synth_ai/sdk/inference/__init__.py +6 -0
  269. synth_ai/sdk/inference/client.py +128 -0
  270. synth_ai/sdk/jobs/__init__.py +16 -0
  271. synth_ai/sdk/jobs/client.py +371 -0
  272. synth_ai/sdk/learning/__init__.py +99 -0
  273. synth_ai/sdk/learning/client.py +240 -0
  274. synth_ai/sdk/learning/context_learning_client.py +531 -0
  275. synth_ai/sdk/learning/context_learning_types.py +294 -0
  276. synth_ai/sdk/learning/ft_client.py +7 -0
  277. synth_ai/sdk/learning/health.py +49 -0
  278. synth_ai/sdk/learning/jobs.py +202 -0
  279. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  280. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  281. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  282. synth_ai/sdk/learning/rl/__init__.py +35 -0
  283. synth_ai/sdk/learning/rl/client.py +268 -0
  284. synth_ai/sdk/learning/rl/contracts.py +23 -0
  285. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  286. synth_ai/sdk/learning/rl/secrets.py +13 -0
  287. synth_ai/sdk/learning/sft/client.py +95 -0
  288. synth_ai/sdk/learning/sft/config.py +270 -0
  289. synth_ai/sdk/learning/sft/data.py +698 -0
  290. synth_ai/sdk/learning/validators.py +52 -0
  291. synth_ai/sdk/localapi/__init__.py +40 -0
  292. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  293. synth_ai/sdk/localapi/client.py +10 -0
  294. synth_ai/sdk/localapi/contracts.py +10 -0
  295. synth_ai/sdk/localapi/helpers.py +519 -0
  296. synth_ai/sdk/localapi/rollouts.py +93 -0
  297. synth_ai/sdk/localapi/server.py +29 -0
  298. synth_ai/sdk/localapi/template.py +49 -0
  299. synth_ai/sdk/streaming/__init__.py +35 -0
  300. synth_ai/sdk/streaming/config.py +94 -0
  301. synth_ai/sdk/streaming/handlers.py +1997 -0
  302. synth_ai/sdk/streaming/streamer.py +708 -0
  303. synth_ai/sdk/streaming/types.py +112 -0
  304. synth_ai/sdk/task/__init__.py +164 -0
  305. synth_ai/sdk/task/apps/__init__.py +169 -0
  306. synth_ai/sdk/task/client.py +175 -0
  307. synth_ai/sdk/task/config.py +256 -0
  308. synth_ai/sdk/task/contracts.py +340 -0
  309. synth_ai/sdk/task/datasets.py +108 -0
  310. synth_ai/sdk/task/in_process.py +1200 -0
  311. synth_ai/sdk/task/in_process_runner.py +314 -0
  312. synth_ai/sdk/task/inference_api.py +299 -0
  313. synth_ai/sdk/task/proxy.py +287 -0
  314. synth_ai/sdk/task/rubrics/__init__.py +54 -0
  315. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  316. synth_ai/sdk/task/rubrics/strict.py +148 -0
  317. synth_ai/sdk/task/rubrics.py +219 -0
  318. synth_ai/sdk/task/server.py +640 -0
  319. synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
  320. synth_ai/sdk/task/tracing_utils.py +95 -0
  321. synth_ai/sdk/task/validators.py +441 -0
  322. synth_ai/sdk/training/__init__.py +93 -0
  323. synth_ai/sdk/tunnels/__init__.py +118 -0
  324. synth_ai/sdk/tunnels/cleanup.py +83 -0
  325. synth_ai/sdk/tunnels/ports.py +120 -0
  326. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  327. synth_ai/utils/__init__.py +213 -0
  328. synth_ai-0.4.4.dist-info/METADATA +262 -0
  329. synth_ai-0.4.4.dist-info/RECORD +369 -0
  330. synth_ai-0.4.4.dist-info/top_level.txt +1 -0
  331. examples/__init__.py +0 -16
  332. examples/analyze_semantic_words.sh +0 -17
  333. examples/crafter_debug_render.py +0 -186
  334. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  335. examples/multi_step/configs/README_verilog_rl.md +0 -77
  336. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  337. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  338. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  339. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  340. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  341. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  342. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  343. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  344. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  345. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  346. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  347. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  348. examples/multi_step/crafter_rl_lora.md +0 -70
  349. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  350. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  351. examples/multi_step/readme.md +0 -48
  352. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  353. examples/multi_step/task_app_config_notes.md +0 -494
  354. examples/multi_step/verilog_rl_lora.md +0 -218
  355. examples/qwen_coder/README.md +0 -102
  356. examples/qwen_coder/_shared.py +0 -113
  357. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  358. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  359. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  360. examples/qwen_coder/generate_dataset.py +0 -98
  361. examples/qwen_coder/infer_ft_smoke.py +0 -65
  362. examples/qwen_coder/infer_prod_proxy.py +0 -73
  363. examples/qwen_coder/infer_via_synth.py +0 -87
  364. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  365. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  366. examples/qwen_coder/sft_full_17b.py +0 -103
  367. examples/qwen_coder/sft_lora_30b.py +0 -110
  368. examples/qwen_coder/subset_jsonl.py +0 -39
  369. examples/qwen_coder/todos.md +0 -38
  370. examples/qwen_coder/validate_jsonl.py +0 -60
  371. examples/rl/README.md +0 -169
  372. examples/rl/download_dataset.py +0 -80
  373. examples/run_crafter_demo.sh +0 -10
  374. examples/sft/README.md +0 -139
  375. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  376. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  377. examples/sft/evaluate.py +0 -119
  378. examples/sft/export_dataset.py +0 -117
  379. examples/sft/generate_traces.py +0 -164
  380. examples/swe/__init__.py +0 -12
  381. examples/swe/task_app/README.md +0 -105
  382. examples/swe/task_app/__init__.py +0 -2
  383. examples/swe/task_app/grpo_swe_mini.py +0 -601
  384. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  385. examples/swe/task_app/hosted/README.md +0 -173
  386. examples/swe/task_app/hosted/__init__.py +0 -5
  387. examples/swe/task_app/hosted/branching.py +0 -143
  388. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  389. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  390. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  391. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  392. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  393. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  394. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  395. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  396. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  397. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  398. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  399. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  400. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  401. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  402. examples/swe/task_app/hosted/hosted_app.py +0 -204
  403. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  404. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  405. examples/swe/task_app/hosted/main.py +0 -100
  406. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  407. examples/swe/task_app/hosted/registry.py +0 -195
  408. examples/swe/task_app/hosted/rollout.py +0 -1911
  409. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  410. examples/swe/task_app/hosted/storage/volume.py +0 -211
  411. examples/swe/task_app/hosted/test_agents.py +0 -161
  412. examples/swe/task_app/hosted/test_service.py +0 -136
  413. examples/swe/task_app/hosted/utils.py +0 -62
  414. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  415. examples/task_apps/TESTING.md +0 -275
  416. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  417. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  418. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  419. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  420. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  421. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  422. examples/task_apps/crafter/__init__.py +0 -0
  423. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  424. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  425. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  426. examples/task_apps/crafter/task_app/README.md +0 -42
  427. examples/task_apps/crafter/task_app/__init__.py +0 -5
  428. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  429. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  430. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  431. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  432. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  433. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  434. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  454. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  455. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  456. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  457. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  458. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  459. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  510. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  511. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  512. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  513. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  514. examples/task_apps/enron/__init__.py +0 -1
  515. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  516. examples/task_apps/enron/filter_sft.toml +0 -5
  517. examples/task_apps/enron/task_app/README.md +0 -14
  518. examples/task_apps/enron/task_app/__init__.py +0 -1
  519. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  520. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  521. examples/task_apps/enron/tests/__init__.py +0 -4
  522. examples/task_apps/enron/tests/conftest.py +0 -115
  523. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  524. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  525. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  526. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  527. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  528. examples/task_apps/math/README.md +0 -22
  529. examples/task_apps/math/__init__.py +0 -0
  530. examples/task_apps/math/math_single_step.py +0 -1000
  531. examples/task_apps/math/math_task_app.py +0 -115
  532. examples/task_apps/pokemon_battle/__init__.py +0 -2
  533. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  534. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  535. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  536. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  537. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  538. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  539. examples/task_apps/pokemon_red/README.md +0 -357
  540. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  541. examples/task_apps/pokemon_red/__init__.py +0 -3
  542. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  543. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  544. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  545. examples/task_apps/pokemon_red/task_app.py +0 -799
  546. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  547. examples/task_apps/sokoban/README.md +0 -307
  548. examples/task_apps/sokoban/__init__.py +0 -3
  549. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  550. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  551. examples/task_apps/sokoban/filter_sft.toml +0 -5
  552. examples/task_apps/sokoban/task_app.py +0 -1058
  553. examples/task_apps/sokoban/tests/__init__.py +0 -4
  554. examples/task_apps/sokoban/tests/conftest.py +0 -113
  555. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  556. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  557. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  558. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  560. examples/task_apps/verilog/__init__.py +0 -1
  561. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  562. examples/task_apps/verilog/filter_sft.toml +0 -5
  563. examples/task_apps/verilog/task_app/README.md +0 -12
  564. examples/task_apps/verilog/task_app/__init__.py +0 -1
  565. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  566. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  567. examples/task_apps/verilog/tests/__init__.py +0 -4
  568. examples/task_apps/verilog/tests/conftest.py +0 -115
  569. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  570. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  571. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  572. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  574. examples/vlm/PROPOSAL.md +0 -53
  575. examples/vlm/README.md +0 -68
  576. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  577. examples/vlm/crafter_image_only_agent.py +0 -207
  578. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  579. examples/vlm/filter_image_rows.py +0 -63
  580. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  581. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  582. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  583. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  584. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  585. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  586. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  587. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  588. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  589. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  590. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  591. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  592. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  593. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  594. examples/warming_up_to_rl/groq_test.py +0 -97
  595. examples/warming_up_to_rl/manage_secrets.py +0 -131
  596. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  597. examples/warming_up_to_rl/old/notes.md +0 -73
  598. examples/warming_up_to_rl/readme.md +0 -179
  599. examples/warming_up_to_rl/run_eval.py +0 -736
  600. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  601. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  602. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  603. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  604. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  605. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  606. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  607. examples/workflows/__init__.py +0 -0
  608. examples/workflows/math_rl/__init__.py +0 -0
  609. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  610. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  611. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  612. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  613. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  614. examples/workflows/math_rl/download_dataset.py +0 -80
  615. examples/workflows/math_rl/run_eval.py +0 -436
  616. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  617. synth_ai/api/models/supported.py +0 -377
  618. synth_ai/api/train/__init__.py +0 -5
  619. synth_ai/api/train/builders.py +0 -351
  620. synth_ai/api/train/cli.py +0 -635
  621. synth_ai/api/train/config_finder.py +0 -228
  622. synth_ai/api/train/configs/__init__.py +0 -44
  623. synth_ai/api/train/configs/rl.py +0 -134
  624. synth_ai/api/train/configs/sft.py +0 -95
  625. synth_ai/api/train/configs/shared.py +0 -24
  626. synth_ai/api/train/env_resolver.py +0 -349
  627. synth_ai/api/train/pollers.py +0 -75
  628. synth_ai/api/train/supported_algos.py +0 -147
  629. synth_ai/api/train/task_app.py +0 -195
  630. synth_ai/api/train/utils.py +0 -225
  631. synth_ai/cli/_modal_wrapper.py +0 -29
  632. synth_ai/cli/_storage.py +0 -20
  633. synth_ai/cli/_typer_patch.py +0 -49
  634. synth_ai/cli/_validate_task_app.py +0 -11
  635. synth_ai/cli/balance.py +0 -216
  636. synth_ai/cli/calc.py +0 -84
  637. synth_ai/cli/demo.py +0 -165
  638. synth_ai/cli/legacy_root_backup.py +0 -468
  639. synth_ai/cli/man.py +0 -106
  640. synth_ai/cli/recent.py +0 -132
  641. synth_ai/cli/rl_demo.py +0 -254
  642. synth_ai/cli/status.py +0 -134
  643. synth_ai/cli/task_apps.py +0 -4523
  644. synth_ai/cli/traces.py +0 -164
  645. synth_ai/cli/tui.py +0 -57
  646. synth_ai/cli/watch.py +0 -506
  647. synth_ai/compound/cais.py +0 -0
  648. synth_ai/config/base_url.py +0 -107
  649. synth_ai/core/experiment.py +0 -13
  650. synth_ai/core/system.py +0 -15
  651. synth_ai/demo_registry.py +0 -295
  652. synth_ai/demos/core/__init__.py +0 -1
  653. synth_ai/demos/core/cli.py +0 -1718
  654. synth_ai/demos/demo_task_apps/core.py +0 -440
  655. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  656. synth_ai/demos/demo_task_apps/math/config.toml +0 -74
  657. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  658. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  659. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  660. synth_ai/environments/__init__.py +0 -31
  661. synth_ai/environments/environment/__init__.py +0 -1
  662. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  663. synth_ai/environments/environment/artifacts/base.py +0 -52
  664. synth_ai/environments/environment/core.py +0 -67
  665. synth_ai/environments/environment/db/__init__.py +0 -1
  666. synth_ai/environments/environment/db/sqlite.py +0 -45
  667. synth_ai/environments/environment/registry.py +0 -233
  668. synth_ai/environments/environment/resources/sqlite.py +0 -45
  669. synth_ai/environments/environment/results.py +0 -1
  670. synth_ai/environments/environment/rewards/__init__.py +0 -1
  671. synth_ai/environments/environment/rewards/core.py +0 -29
  672. synth_ai/environments/environment/shared_engine.py +0 -26
  673. synth_ai/environments/environment/tools/__init__.py +0 -200
  674. synth_ai/environments/examples/__init__.py +0 -1
  675. synth_ai/environments/examples/bandit/__init__.py +0 -33
  676. synth_ai/environments/examples/bandit/engine.py +0 -302
  677. synth_ai/environments/examples/bandit/environment.py +0 -194
  678. synth_ai/environments/examples/bandit/taskset.py +0 -200
  679. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  680. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  681. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  682. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  683. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  684. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  716. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  717. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  718. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  719. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  720. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  744. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  745. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  746. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  747. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  748. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  749. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  750. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  751. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  752. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  753. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  754. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  755. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  756. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  757. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  758. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  759. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  760. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  761. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  762. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  763. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  764. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  765. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  766. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  767. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  768. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  769. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  770. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  771. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  772. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  773. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  774. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  775. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  776. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  777. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  778. synth_ai/environments/examples/enron/engine.py +0 -300
  779. synth_ai/environments/examples/enron/environment.py +0 -234
  780. synth_ai/environments/examples/enron/taskset.py +0 -112
  781. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  782. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  783. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  784. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  785. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  786. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  787. synth_ai/environments/examples/minigrid/engine.py +0 -589
  788. synth_ai/environments/examples/minigrid/environment.py +0 -274
  789. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  790. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  791. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  792. synth_ai/environments/examples/nethack/__init__.py +0 -7
  793. synth_ai/environments/examples/nethack/achievements.py +0 -337
  794. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  795. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  796. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  797. synth_ai/environments/examples/nethack/engine.py +0 -739
  798. synth_ai/environments/examples/nethack/environment.py +0 -256
  799. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  800. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  801. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  802. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  803. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  804. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  805. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  806. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  807. synth_ai/environments/examples/nethack/taskset.py +0 -323
  808. synth_ai/environments/examples/red/__init__.py +0 -7
  809. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  810. synth_ai/environments/examples/red/config_logging.py +0 -110
  811. synth_ai/environments/examples/red/engine.py +0 -721
  812. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  813. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  814. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  815. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  816. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  817. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  818. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  828. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  829. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  830. synth_ai/environments/examples/red/environment.py +0 -298
  831. synth_ai/environments/examples/red/taskset.py +0 -79
  832. synth_ai/environments/examples/red/units/__init__.py +0 -1
  833. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  834. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  835. synth_ai/environments/examples/sokoban/engine.py +0 -678
  836. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  838. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  839. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  840. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  841. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  848. synth_ai/environments/examples/sokoban/environment.py +0 -229
  849. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  850. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  851. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  852. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  853. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  854. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  855. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  856. synth_ai/environments/examples/verilog/__init__.py +0 -10
  857. synth_ai/environments/examples/verilog/engine.py +0 -421
  858. synth_ai/environments/examples/verilog/environment.py +0 -350
  859. synth_ai/environments/examples/verilog/taskset.py +0 -420
  860. synth_ai/environments/examples/wordle/__init__.py +0 -29
  861. synth_ai/environments/examples/wordle/engine.py +0 -398
  862. synth_ai/environments/examples/wordle/environment.py +0 -159
  863. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  864. synth_ai/environments/examples/wordle/taskset.py +0 -230
  865. synth_ai/environments/reproducibility/core.py +0 -42
  866. synth_ai/environments/reproducibility/helpers.py +0 -0
  867. synth_ai/environments/reproducibility/tree.py +0 -363
  868. synth_ai/environments/service/app.py +0 -97
  869. synth_ai/environments/service/core_routes.py +0 -1021
  870. synth_ai/environments/service/external_registry.py +0 -56
  871. synth_ai/environments/service/registry.py +0 -9
  872. synth_ai/environments/stateful/__init__.py +0 -1
  873. synth_ai/environments/stateful/core.py +0 -163
  874. synth_ai/environments/stateful/engine.py +0 -21
  875. synth_ai/environments/stateful/state.py +0 -7
  876. synth_ai/environments/tasks/api.py +0 -19
  877. synth_ai/environments/tasks/core.py +0 -81
  878. synth_ai/environments/tasks/filters.py +0 -40
  879. synth_ai/environments/tasks/utils.py +0 -90
  880. synth_ai/environments/v0_observability/history.py +0 -3
  881. synth_ai/environments/v0_observability/log.py +0 -2
  882. synth_ai/evals/__init__.py +0 -15
  883. synth_ai/evals/base.py +0 -13
  884. synth_ai/evals/client.py +0 -82
  885. synth_ai/evals/types.py +0 -42
  886. synth_ai/handshake.py +0 -109
  887. synth_ai/http.py +0 -26
  888. synth_ai/http_client.py +0 -136
  889. synth_ai/inference/__init__.py +0 -5
  890. synth_ai/inference/client.py +0 -34
  891. synth_ai/jobs/client.py +0 -295
  892. synth_ai/judge_schemas.py +0 -127
  893. synth_ai/learning/__init__.py +0 -59
  894. synth_ai/learning/client.py +0 -241
  895. synth_ai/learning/ft_client.py +0 -7
  896. synth_ai/learning/health.py +0 -49
  897. synth_ai/learning/jobs.py +0 -201
  898. synth_ai/learning/rl/__init__.py +0 -39
  899. synth_ai/learning/rl/client.py +0 -267
  900. synth_ai/learning/rl/contracts.py +0 -27
  901. synth_ai/learning/rl/env_keys.py +0 -166
  902. synth_ai/learning/rl/secrets.py +0 -13
  903. synth_ai/learning/sft/client.py +0 -68
  904. synth_ai/learning/sft/config.py +0 -270
  905. synth_ai/learning/sft/data.py +0 -295
  906. synth_ai/learning/validators.py +0 -49
  907. synth_ai/lm/__init__.py +0 -25
  908. synth_ai/task/__init__.py +0 -121
  909. synth_ai/task/apps/__init__.py +0 -129
  910. synth_ai/task/client.py +0 -167
  911. synth_ai/task/config.py +0 -257
  912. synth_ai/task/contracts.py +0 -236
  913. synth_ai/task/datasets.py +0 -108
  914. synth_ai/task/proxy.py +0 -251
  915. synth_ai/task/rubrics/__init__.py +0 -56
  916. synth_ai/task/rubrics/loaders.py +0 -152
  917. synth_ai/task/rubrics/strict.py +0 -149
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1057. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1058. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1068. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1069. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1077. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1078. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1079. /synth_ai/{task → sdk/task}/health.py +0 -0
  1080. /synth_ai/{task → sdk/task}/json.py +0 -0
  1081. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1082. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1083. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1084. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  1085. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  1086. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1997 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import json
5
+ import re
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from collections import deque
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+
13
+ import click
14
+
15
+ from .types import StreamMessage, StreamType
16
+
17
+
18
+ def _mask_sensitive_urls(text: str) -> str:
19
+ """Mask S3/Wasabi URLs and sensitive paths in log messages.
20
+
21
+ Replaces full S3/Wasabi URLs with masked versions to prevent leaking
22
+ bucket names, paths, and infrastructure details in public SDK logs.
23
+
24
+ Examples:
25
+ s3://synth-artifacts/models/... -> s3://***/***/[masked]
26
+ Wasabi s3://bucket/path/file.tar.gz -> Wasabi s3://***/***/[masked]
27
+ """
28
+ if not text:
29
+ return text
30
+
31
+ # Pattern matches:
32
+ # - Optional "Wasabi " prefix
33
+ # - s3:// or http(s):// scheme
34
+ # - Any bucket/host
35
+ # - Any path
36
+ # - Common model file extensions
37
+ pattern = r'(Wasabi\s+)?((s3|https?)://[^\s]+\.(tar\.gz|zip|pt|pth|safetensors|ckpt|bin))'
38
+
39
+ def replace_url(match: re.Match) -> str:
40
+ prefix = match.group(1) or "" # "Wasabi " or empty
41
+ url = match.group(2)
42
+ # Extract just the filename
43
+ filename = url.split("/")[-1] if "/" in url else "file"
44
+ return f'{prefix}s3://***/***/[{filename}]'
45
+
46
+ return re.sub(pattern, replace_url, text, flags=re.IGNORECASE)
47
+
48
+
49
+ class StreamHandler(ABC):
50
+ """Base class for log handlers that consume ``StreamMessage`` objects."""
51
+
52
+ @abstractmethod
53
+ def handle(self, message: StreamMessage) -> None:
54
+ """Process a message produced by the streamer."""
55
+
56
+ def should_handle(self, message: StreamMessage) -> bool: # pragma: no cover - trivial
57
+ """Predicate allowing handlers to filter messages before processing."""
58
+ return True
59
+
60
+ def flush(self) -> None: # pragma: no cover - optional
61
+ """Flush buffered output."""
62
+ return None
63
+
64
+
65
+ class CLIHandler(StreamHandler):
66
+ """Simple CLI output mirroring current poller behaviour."""
67
+
68
+ def __init__(
69
+ self,
70
+ *,
71
+ hidden_event_types: set[str] | None = None,
72
+ hidden_event_substrings: set[str] | None = None,
73
+ ) -> None:
74
+ self._hidden_event_types = set(hidden_event_types or set())
75
+ self._hidden_event_substrings = {s.lower() for s in (hidden_event_substrings or set())}
76
+
77
+ def handle(self, message: StreamMessage) -> None:
78
+ if not self.should_handle(message):
79
+ return
80
+
81
+ timestamp = datetime.now().strftime("%H:%M:%S")
82
+ if message.stream_type is StreamType.STATUS:
83
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
84
+ click.echo(f"[{timestamp}] status={status}")
85
+ return
86
+
87
+ if message.stream_type is StreamType.EVENTS:
88
+ event_type = message.data.get("type", "event")
89
+ if event_type in self._hidden_event_types:
90
+ return
91
+ level = message.data.get("level")
92
+ msg = message.data.get("message") or ""
93
+ # Evaluate substring filters against lower-cased concatenated text
94
+ if self._hidden_event_substrings:
95
+ blob = " ".join(
96
+ [
97
+ event_type or "",
98
+ str(msg),
99
+ json.dumps(message.data.get("data", "")),
100
+ ]
101
+ ).lower()
102
+ if any(sub in blob for sub in self._hidden_event_substrings):
103
+ return
104
+ prefix = f"[{timestamp}] [{message.seq}] {event_type}"
105
+ if level:
106
+ prefix += f" ({level})"
107
+ # Mask sensitive URLs before displaying
108
+ sanitized_msg = _mask_sensitive_urls(msg)
109
+
110
+ # For error events, show full details including underlying errors
111
+ if level == "error" or event_type.endswith(".failed"):
112
+ click.echo(f"{prefix}: {sanitized_msg}")
113
+ # Show error details from data field if available
114
+ data = message.data.get("data", {})
115
+ if isinstance(data, dict):
116
+ error_detail = data.get("detail") or data.get("error") or data.get("error_detail")
117
+ if error_detail and str(error_detail) != sanitized_msg:
118
+ # Show underlying error if different from main message
119
+ click.echo(f" Error details: {error_detail}")
120
+ # Show traceback or stack if available
121
+ traceback_info = data.get("traceback") or data.get("stack")
122
+ if traceback_info:
123
+ lines = str(traceback_info).split("\n")
124
+ # Show last few lines of traceback (most relevant)
125
+ for line in lines[-5:]:
126
+ if line.strip():
127
+ click.echo(f" {line}")
128
+ else:
129
+ click.echo(f"{prefix}: {sanitized_msg}".rstrip(": "))
130
+
131
+ data = message.data.get("data") if isinstance(message.data.get("data"), dict) else {}
132
+ if event_type == "prompt.learning.mipro.complete" and data:
133
+ best_prompt = data.get("best_prompt")
134
+ if isinstance(best_prompt, dict):
135
+ sections = best_prompt.get("sections")
136
+ if isinstance(sections, list) and sections:
137
+ click.echo(" --- BEST PROMPT ---")
138
+ for section in sections:
139
+ if not isinstance(section, dict):
140
+ continue
141
+ role = section.get("role", "unknown").upper()
142
+ name = section.get("name")
143
+ header = f" [{role}]"
144
+ if name:
145
+ header += f" {name}"
146
+ click.echo(header)
147
+ content = section.get("content", "")
148
+ if isinstance(content, str) and content:
149
+ click.echo(f" {content}")
150
+ click.echo(" -------------------")
151
+
152
+ if event_type == "mipro.topk.evaluated" and data:
153
+ rank = data.get("rank")
154
+ train_score = data.get("train_score")
155
+ test_score = data.get("test_score")
156
+ instruction_text = data.get("instruction_text", "")
157
+ demo_indices = data.get("demo_indices", [])
158
+ lift_abs = data.get("lift_absolute")
159
+ lift_pct = data.get("lift_percent")
160
+ stage_payloads = data.get("stage_payloads", {})
161
+ details: list[str] = []
162
+ if rank is not None:
163
+ details.append(f"Rank {rank}")
164
+ if isinstance(train_score, int | float):
165
+ train_score_float = float(train_score)
166
+ details.append(f"train={train_score_float:.3f} ({train_score_float*100:.1f}%)")
167
+ if isinstance(test_score, int | float):
168
+ test_score_float = float(test_score)
169
+ details.append(f"test={test_score_float:.3f} ({test_score_float*100:.1f}%)")
170
+ if isinstance(lift_abs, int | float) and isinstance(lift_pct, int | float):
171
+ details.append(f"lift={lift_abs:+.3f} ({lift_pct:+.1f}%)")
172
+ if details:
173
+ click.echo(" --- TOP-K CANDIDATE ---")
174
+ click.echo(f" {' | '.join(details)}")
175
+ if isinstance(instruction_text, str) and instruction_text.strip():
176
+ snippet = instruction_text.strip()
177
+ click.echo(f" Instruction: {snippet}")
178
+ if isinstance(demo_indices, list) and demo_indices:
179
+ click.echo(f" Demo indices: {demo_indices}")
180
+
181
+ # Display per-stage information if available
182
+ if isinstance(stage_payloads, dict) and stage_payloads:
183
+ click.echo(" Per-stage breakdown:")
184
+ for stage_id, payload in stage_payloads.items():
185
+ if isinstance(payload, dict):
186
+ module_id = payload.get("module_id", stage_id)
187
+ instr_ids = payload.get("instruction_indices", [])
188
+ demo_ids = payload.get("demo_indices", [])
189
+ click.echo(f" [{module_id}/{stage_id}] instr_ids={instr_ids} demo_ids={demo_ids}")
190
+
191
+ seed_scores = data.get("test_seed_scores")
192
+ if isinstance(seed_scores, list) and seed_scores:
193
+ formatted_scores = ", ".join(
194
+ f"{item.get('seed')}: {item.get('score'):.2f}"
195
+ for item in seed_scores
196
+ if isinstance(item, dict) and isinstance(item.get("seed"), int) and isinstance(item.get("score"), int | float)
197
+ )
198
+ if formatted_scores:
199
+ click.echo(f" Test per-seed: {formatted_scores}")
200
+ click.echo(" ----------------------")
201
+ return
202
+
203
+ if message.stream_type is StreamType.METRICS:
204
+ name = message.data.get("name")
205
+ value = message.data.get("value")
206
+ step = message.data.get("step")
207
+ data = message.data.get("data", {})
208
+
209
+ # Format metric display
210
+ metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
211
+ if step is not None:
212
+ metric_str += f" (step={step})"
213
+
214
+ # Add any additional context from data field
215
+ if isinstance(data, dict):
216
+ n = data.get("n")
217
+ if n is not None:
218
+ metric_str += f" n={n}"
219
+
220
+ click.echo(metric_str)
221
+ return
222
+
223
+ if message.stream_type is StreamType.TIMELINE:
224
+ phase = message.data.get("phase", "phase")
225
+ click.echo(f"[{timestamp}] timeline={phase}")
226
+
227
+
228
+ class JSONHandler(StreamHandler):
229
+ """Emit messages as JSON lines suitable for machine parsing."""
230
+
231
+ def __init__(self, output_file: str | None = None, *, indent: int | None = None) -> None:
232
+ self.output_file = Path(output_file).expanduser() if output_file else None
233
+ self._indent = indent
234
+
235
+ def handle(self, message: StreamMessage) -> None:
236
+ if not self.should_handle(message):
237
+ return
238
+
239
+ payload: dict[str, Any] = {
240
+ "stream_type": message.stream_type.name,
241
+ "timestamp": message.timestamp,
242
+ "job_id": message.job_id,
243
+ "data": message.data,
244
+ }
245
+ if message.seq is not None:
246
+ payload["seq"] = message.seq
247
+ if message.step is not None:
248
+ payload["step"] = message.step
249
+ if message.phase is not None:
250
+ payload["phase"] = message.phase
251
+
252
+ line = json.dumps(payload, indent=self._indent)
253
+ if self.output_file:
254
+ with self.output_file.open("a", encoding="utf-8") as fh:
255
+ fh.write(line)
256
+ if self._indent is None:
257
+ fh.write("\n")
258
+ else:
259
+ click.echo(line)
260
+
261
+ def flush(self) -> None:
262
+ return None
263
+
264
+
265
+ class CallbackHandler(StreamHandler):
266
+ """Invoke user-provided callbacks for specific stream types."""
267
+
268
+ def __init__(
269
+ self,
270
+ *,
271
+ on_status: Callable[[dict[str, Any]], None] | None = None,
272
+ on_event: Callable[[dict[str, Any]], None] | None = None,
273
+ on_metric: Callable[[dict[str, Any]], None] | None = None,
274
+ on_timeline: Callable[[dict[str, Any]], None] | None = None,
275
+ ) -> None:
276
+ self._on_status = on_status
277
+ self._on_event = on_event
278
+ self._on_metric = on_metric
279
+ self._on_timeline = on_timeline
280
+
281
+ def handle(self, message: StreamMessage) -> None:
282
+ if not self.should_handle(message):
283
+ return
284
+
285
+ if message.stream_type is StreamType.STATUS and self._on_status:
286
+ self._on_status(message.data)
287
+ elif message.stream_type is StreamType.EVENTS and self._on_event:
288
+ self._on_event(message.data)
289
+ elif message.stream_type is StreamType.METRICS and self._on_metric:
290
+ self._on_metric(message.data)
291
+ elif message.stream_type is StreamType.TIMELINE and self._on_timeline:
292
+ self._on_timeline(message.data)
293
+
294
+
295
+ class BufferedHandler(StreamHandler):
296
+ """Collect messages and emit them in batches."""
297
+
298
+ def __init__(self, *, flush_interval: float = 5.0, max_buffer_size: int = 100) -> None:
299
+ self.flush_interval = flush_interval
300
+ self.max_buffer_size = max_buffer_size
301
+ self._buffer: list[StreamMessage] = []
302
+ self._last_flush = time.time()
303
+
304
+ def handle(self, message: StreamMessage) -> None:
305
+ if not self.should_handle(message):
306
+ return
307
+
308
+ self._buffer.append(message)
309
+ now = time.time()
310
+ if len(self._buffer) >= self.max_buffer_size or now - self._last_flush >= self.flush_interval:
311
+ self.flush()
312
+
313
+ def flush(self) -> None:
314
+ if not self._buffer:
315
+ return
316
+ self.process_batch(self._buffer)
317
+ self._buffer.clear()
318
+ self._last_flush = time.time()
319
+
320
+ def process_batch(self, messages: list[StreamMessage]) -> None: # pragma: no cover - abstract
321
+ """Override to define how buffered messages should be processed."""
322
+
323
+
324
+ class IntegrationTestHandler(StreamHandler):
325
+ """Collect messages for integration tests or programmatic assertions."""
326
+
327
+ def __init__(self) -> None:
328
+ self.messages: list[StreamMessage] = []
329
+
330
+ def handle(self, message: StreamMessage) -> None:
331
+ self.messages.append(message)
332
+
333
+ def clear(self) -> None:
334
+ self.messages.clear()
335
+
336
+
337
+ class GraphGenHandler(StreamHandler):
338
+ """Handler for Graph Opt jobs that delegate child job streams to an underlying handler.
339
+
340
+ Graph Opt jobs emit events from child jobs (GEPA, MIPRO, RL, SFT, etc.). This handler
341
+ provides light Graph Opt-aware filtering and routing while keeping child job output
342
+ intact via a delegate handler. The delegate can be supplied directly or created
343
+ via a factory; by default we choose a prompt-learning handler for GEPA/MIPRO and
344
+ a basic CLI handler for other job types.
345
+ """
346
+
347
+ def __init__(
348
+ self,
349
+ *,
350
+ child_handler: StreamHandler | None = None,
351
+ child_handler_factory: Callable[[str | None], StreamHandler | None] | None = None,
352
+ show_trial_results: bool = True,
353
+ show_transformations: bool = False,
354
+ show_validation: bool = True,
355
+ filter_verbose_events: bool = True,
356
+ wrap_child_events: bool = True,
357
+ ) -> None:
358
+ # User-supplied delegate or factory; both are optional.
359
+ self.child_handler = child_handler
360
+ self._child_handler_factory = child_handler_factory
361
+
362
+ # Options for the default prompt-learning delegate
363
+ self._pl_show_trial_results = show_trial_results
364
+ self._pl_show_transformations = show_transformations
365
+ self._pl_show_validation = show_validation
366
+
367
+ self.filter_verbose_events = filter_verbose_events
368
+ # If False, skip Graph Opt-specific filtering/transformations and just pass through.
369
+ self.wrap_child_events = wrap_child_events
370
+
371
+ # Detected child job type (gepa/mipro/rl/sft/etc.)
372
+ self.child_job_type: str | None = None
373
+ # Track whether we created the delegate automatically (so we can swap if needed)
374
+ self._delegate_auto_created = False
375
+
376
+ def handle(self, message: StreamMessage) -> None:
377
+ if not self.should_handle(message):
378
+ return
379
+
380
+ if message.stream_type is StreamType.EVENTS:
381
+ self._detect_child_job_type(message)
382
+ self._maybe_reset_delegate_for_child_type()
383
+
384
+ if self.wrap_child_events and self.filter_verbose_events:
385
+ if self._should_filter_event(message):
386
+ return
387
+
388
+ if self.wrap_child_events:
389
+ message = self._transform_event_message(message)
390
+
391
+ delegate = self._get_child_handler()
392
+ if delegate:
393
+ delegate.handle(message)
394
+
395
+ def _get_child_handler(self) -> StreamHandler:
396
+ """Return or create the delegate handler used for child job events."""
397
+ if self.child_handler:
398
+ return self.child_handler
399
+
400
+ handler: StreamHandler | None = None
401
+ if self._child_handler_factory:
402
+ handler = self._child_handler_factory(self.child_job_type)
403
+
404
+ if handler is None:
405
+ # Choose a sensible default based on detected child job type
406
+ if self._is_prompt_learning_type(self.child_job_type):
407
+ handler = PromptLearningHandler(
408
+ show_trial_results=self._pl_show_trial_results,
409
+ show_transformations=self._pl_show_transformations,
410
+ show_validation=self._pl_show_validation,
411
+ )
412
+ else:
413
+ handler = CLIHandler()
414
+
415
+ self.child_handler = handler
416
+ self._delegate_auto_created = self._child_handler_factory is None and self.child_handler is not None
417
+ return handler
418
+
419
+ def _detect_child_job_type(self, message: StreamMessage) -> None:
420
+ """Infer the child job type from event types."""
421
+ if self.child_job_type:
422
+ return
423
+
424
+ event_type = str(message.data.get("type") or "").lower()
425
+ if not event_type:
426
+ return
427
+
428
+ if event_type.startswith("graph_evolve."):
429
+ self.child_job_type = "graph_evolve"
430
+ elif "mipro" in event_type:
431
+ self.child_job_type = "mipro"
432
+ elif "gepa" in event_type or event_type.startswith("prompt.learning"):
433
+ self.child_job_type = "prompt_learning"
434
+ elif event_type.startswith("rl.") or ".rl." in event_type:
435
+ self.child_job_type = "rl"
436
+ elif event_type.startswith("sft.") or ".sft." in event_type:
437
+ self.child_job_type = "sft"
438
+ else:
439
+ # Fall back to the first segment as a hint (e.g., "graphgen.child_type")
440
+ parts = event_type.split(".")
441
+ if parts:
442
+ self.child_job_type = parts[0]
443
+
444
+ def _maybe_reset_delegate_for_child_type(self) -> None:
445
+ """Swap out auto-created delegates when we later detect a different child type."""
446
+ if not self.child_handler or not self._delegate_auto_created:
447
+ return
448
+
449
+ # If the detected type does not match the current delegate choice, rebuild.
450
+ wants_prompt_learning = self._is_prompt_learning_type(self.child_job_type)
451
+ has_prompt_learning_handler = isinstance(self.child_handler, PromptLearningHandler)
452
+
453
+ if wants_prompt_learning and not has_prompt_learning_handler:
454
+ self.child_handler = None
455
+ self._delegate_auto_created = False
456
+ elif not wants_prompt_learning and has_prompt_learning_handler:
457
+ self.child_handler = None
458
+ self._delegate_auto_created = False
459
+
460
+ def _should_filter_event(self, message: StreamMessage) -> bool:
461
+ """Determine if an event should be hidden from output."""
462
+ event_type = message.data.get("type", "") or ""
463
+ event_type_lower = event_type.lower()
464
+
465
+ # Never filter graph_evolve events - they're important for GraphGen jobs
466
+ if event_type.startswith("graph_evolve."):
467
+ return False
468
+
469
+ # Only filter prompt-learning style events; leave other job types untouched.
470
+ if not any(key in event_type_lower for key in ("prompt.learning", "gepa", "mipro")):
471
+ return False
472
+
473
+ important_events = {
474
+ "prompt.learning.created",
475
+ "prompt.learning.gepa.start",
476
+ "prompt.learning.gepa.complete",
477
+ "prompt.learning.mipro.job.started",
478
+ "prompt.learning.mipro.optimization.exhausted",
479
+ "prompt.learning.trial.results",
480
+ "prompt.learning.progress",
481
+ "prompt.learning.gepa.new_best",
482
+ "prompt.learning.validation.summary",
483
+ "prompt.learning.candidate.evaluated",
484
+ "prompt.learning.candidate.evaluation.started",
485
+ # GraphGen/graph_evolve important events
486
+ "graph_evolve.job_started",
487
+ "graph_evolve.generation_started",
488
+ "graph_evolve.generation_completed",
489
+ "graph_evolve.candidate_evaluated",
490
+ "graph_evolve.archive_updated",
491
+ "graph_evolve.job_completed",
492
+ "graph_evolve.job_failed",
493
+ }
494
+ if event_type in important_events:
495
+ return False
496
+
497
+ verbose_patterns = [
498
+ "gepa.transformation.proposed",
499
+ "gepa.proposal.scored",
500
+ "prompt.learning.proposal.scored",
501
+ "mipro.tpe.update",
502
+ "prompt.learning.stream.connected",
503
+ ]
504
+ return any(pattern in event_type_lower for pattern in verbose_patterns)
505
+
506
+ def _transform_event_message(self, message: StreamMessage) -> StreamMessage:
507
+ """Transform event messages for Graph Opt context (currently passthrough)."""
508
+ return message
509
+
510
+ def flush(self) -> None:
511
+ # Ensure delegate flushes buffered output if needed.
512
+ if self.child_handler and hasattr(self.child_handler, "flush"):
513
+ with contextlib.suppress(Exception):
514
+ self.child_handler.flush()
515
+
516
+ @staticmethod
517
+ def _is_prompt_learning_type(job_type: str | None) -> bool:
518
+ """Return True if the child job type should use prompt-learning formatting."""
519
+ return job_type in {"gepa", "mipro", "prompt_learning", "prompt-learning", None}
520
+
521
+
522
+ class LossCurveHandler(StreamHandler):
523
+ """Render a live-updating loss chart inside a fixed Rich panel."""
524
+
525
+ def __init__(
526
+ self,
527
+ *,
528
+ metric_name: str = "train.loss",
529
+ max_points: int = 200,
530
+ width: int = 60,
531
+ console: Any | None = None,
532
+ live: Any | None = None,
533
+ ) -> None:
534
+ try:
535
+ from rich.console import Console
536
+ from rich.live import Live
537
+ from rich.panel import Panel
538
+ from rich.text import Text
539
+ except ImportError as exc: # pragma: no cover - optional dependency guard
540
+ raise RuntimeError(
541
+ "LossCurveHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
542
+ ) from exc
543
+
544
+ self.metric_name = metric_name
545
+ self.max_points = max_points
546
+ self.width = width
547
+
548
+ self._console_class = Console
549
+ self._panel_class = Panel
550
+ self._text_class = Text
551
+
552
+ self._console = console or Console()
553
+ self._live = live or Live(console=self._console, transient=False, refresh_per_second=8)
554
+ self._started = False
555
+
556
+ self._steps: list[int] = []
557
+ self._values: list[float] = []
558
+ self._status = "waiting"
559
+ self._last_event: str | None = None
560
+
561
+ def handle(self, message: StreamMessage) -> None:
562
+ updated = False
563
+
564
+ if message.stream_type is StreamType.STATUS:
565
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
566
+ if status != self._status:
567
+ self._status = status
568
+ updated = True
569
+
570
+ elif message.stream_type is StreamType.EVENTS:
571
+ event_type = message.data.get("type", "")
572
+ msg = message.data.get("message") or ""
573
+ level = message.data.get("level")
574
+ summary = f"{event_type}".strip()
575
+ if level:
576
+ summary += f" ({level})"
577
+ if msg:
578
+ summary += f": {msg}"
579
+ if summary != self._last_event:
580
+ self._last_event = summary
581
+ updated = True
582
+
583
+ elif message.stream_type is StreamType.METRICS:
584
+ if message.data.get("name") != self.metric_name:
585
+ return
586
+ value = message.data.get("value")
587
+ step = message.data.get("step")
588
+ if not isinstance(value, int | float) or not isinstance(step, int):
589
+ return
590
+ self._values.append(float(value))
591
+ self._steps.append(step)
592
+ if len(self._values) > self.max_points:
593
+ self._values = self._values[-self.max_points :]
594
+ self._steps = self._steps[-self.max_points :]
595
+ updated = True
596
+
597
+ elif message.stream_type is StreamType.TIMELINE:
598
+ phase = message.data.get("phase")
599
+ if phase:
600
+ self._status = str(phase)
601
+ updated = True
602
+
603
+ if updated:
604
+ self._refresh()
605
+
606
+ def flush(self) -> None:
607
+ if self._started:
608
+ with contextlib.suppress(Exception):
609
+ self._live.stop()
610
+ self._started = False
611
+
612
+ def _ensure_live(self) -> None:
613
+ if not self._started:
614
+ with contextlib.suppress(Exception):
615
+ self._live.start()
616
+ self._started = True
617
+
618
+ def _refresh(self) -> None:
619
+ self._ensure_live()
620
+ body = self._build_body()
621
+ title = f"{self.metric_name} | status={self._status}"
622
+ self._live.update(self._panel_class(body, title=title, border_style="cyan"))
623
+
624
+ def _build_body(self) -> Any:
625
+ if not self._values:
626
+ return self._text_class("Waiting for metrics…", style="yellow")
627
+
628
+ chart = self._render_sparkline()
629
+ last_value = self._values[-1]
630
+ lines = [
631
+ chart,
632
+ f"latest: {last_value:.4f} (step {self._steps[-1]})",
633
+ ]
634
+ if self._last_event:
635
+ lines.append(f"event: {self._last_event}")
636
+ return "\n".join(lines)
637
+
638
+ def _render_sparkline(self) -> str:
639
+ blocks = "▁▂▃▄▅▆▇█"
640
+ tail_len = min(self.width, len(self._values))
641
+ tail = self._values[-tail_len:]
642
+ minimum = min(tail)
643
+ maximum = max(tail)
644
+ if maximum == minimum:
645
+ level = blocks[0]
646
+ return f"{minimum:.2f} {level * tail_len} {maximum:.2f}"
647
+ scale = (len(blocks) - 1) / (maximum - minimum)
648
+ chars = "".join(blocks[int((v - minimum) * scale + 0.5)] for v in tail)
649
+ return f"{minimum:.2f} {chars} {maximum:.2f}"
650
+
651
+ def __del__(self) -> None: # pragma: no cover - defensive cleanup
652
+ with contextlib.suppress(Exception):
653
+ self.flush()
654
+
655
+ class RichHandler(StreamHandler):
656
+ """Rich powered handler with live progress and metrics table."""
657
+
658
+ def __init__(
659
+ self,
660
+ *,
661
+ event_log_size: int = 20,
662
+ console: Any | None = None,
663
+ ) -> None:
664
+ try:
665
+ from rich.console import Console
666
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
667
+ from rich.table import Table
668
+ except ImportError as exc: # pragma: no cover - requires optional dependency
669
+ raise RuntimeError(
670
+ "RichHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
671
+ ) from exc
672
+
673
+ self._console_class = Console
674
+ self._progress_class = Progress
675
+ self._spinner_column = SpinnerColumn
676
+ self._text_column = TextColumn
677
+ self._bar_column = BarColumn
678
+ self._table_class = Table
679
+
680
+ self._console = console or Console()
681
+ self._progress = Progress(
682
+ SpinnerColumn(),
683
+ TextColumn("[progress.description]{task.description}"),
684
+ BarColumn(),
685
+ TextColumn("{task.completed}/{task.total}" if console else ""),
686
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
687
+ transient=False,
688
+ console=self._console,
689
+ )
690
+ self._task_id: int | None = None
691
+ self._current_status = "unknown"
692
+ self._latest_metrics: dict[str, Any] = {}
693
+ self._event_log: deque[str] = deque(maxlen=event_log_size)
694
+ self._progress_started = False
695
+
696
+ def handle(self, message: StreamMessage) -> None:
697
+ if not self.should_handle(message):
698
+ return
699
+
700
+ if message.stream_type is StreamType.STATUS:
701
+ self._current_status = str(message.data.get("status") or message.data.get("state"))
702
+ self._ensure_progress_started()
703
+ if self._task_id is not None:
704
+ description = f"Status: {self._current_status}"
705
+ self._progress.update(self._task_id, description=description) # type: ignore[arg-type]
706
+ self._render_summary()
707
+ return
708
+
709
+ if message.stream_type is StreamType.EVENTS:
710
+ event_type = message.data.get("type", "event")
711
+ summary = message.data.get("message") or ""
712
+ level = message.data.get("level")
713
+ # Mask sensitive URLs before displaying
714
+ sanitized_summary = _mask_sensitive_urls(summary)
715
+ formatted = f"[{event_type}] {sanitized_summary}".strip()
716
+ if level:
717
+ formatted = f"{formatted} ({level})"
718
+ self._event_log.append(formatted)
719
+ data = message.data.get("data") or {}
720
+ step = data.get("step") or data.get("current_step")
721
+ total_steps = data.get("total_steps") or data.get("max_steps")
722
+ if step and total_steps:
723
+ self._ensure_progress_started(total_steps)
724
+ if self._task_id is not None:
725
+ self._progress.update(self._task_id, completed=int(step), total=int(total_steps)) # type: ignore[arg-type]
726
+ self._render_summary()
727
+ return
728
+
729
+ if message.stream_type is StreamType.METRICS:
730
+ name = message.data.get("name", "")
731
+ value = message.data.get("value")
732
+ if name:
733
+ self._latest_metrics[name] = value
734
+ self._render_summary()
735
+ return
736
+
737
+ if message.stream_type is StreamType.TIMELINE:
738
+ phase = message.data.get("phase", "")
739
+ if phase and phase.lower() not in {"training", "running"}:
740
+ self._event_log.append(f"[timeline] {phase}")
741
+ self._render_summary()
742
+
743
+ def flush(self) -> None:
744
+ if self._progress_started:
745
+ self._progress.stop()
746
+ self._progress_started = False
747
+ self._render_summary(force=True)
748
+
749
+ def _ensure_progress_started(self, total: int | float | None = None) -> None:
750
+ if not self._progress_started:
751
+ self._progress.start()
752
+ self._progress_started = True
753
+ if self._task_id is None:
754
+ self._task_id = self._progress.add_task(
755
+ f"Status: {self._current_status}", total=total or 100
756
+ )
757
+ elif total is not None and self._task_id is not None:
758
+ self._progress.update(self._task_id, total=total) # type: ignore[arg-type]
759
+
760
+ def _render_summary(self, force: bool = False) -> None:
761
+ if force and self._progress_started:
762
+ self._progress.refresh()
763
+
764
+ table = self._table_class(title="Latest Metrics")
765
+ table.add_column("Metric")
766
+ table.add_column("Value")
767
+
768
+ if not self._latest_metrics:
769
+ table.add_row("—", "—")
770
+ else:
771
+ for name, value in sorted(self._latest_metrics.items()):
772
+ table.add_row(str(name), str(value))
773
+
774
+ if self._progress_started:
775
+ self._progress.console.print(table)
776
+ else:
777
+ self._console.print(table)
778
+
779
+ if self._event_log:
780
+ self._console.print("\nRecent events:")
781
+ for entry in list(self._event_log):
782
+ self._console.print(f" • {entry}")
783
+
784
+ class ContextLearningHandler(StreamHandler):
785
+ """CLI-friendly handler for Context Learning jobs.
786
+
787
+ Emits high-signal progress similar to other infra job handlers,
788
+ specialized for generation-based bash context optimization.
789
+ """
790
+
791
+ def __init__(self) -> None:
792
+ self.best_score_so_far = 0.0
793
+ self.current_generation = 0
794
+
795
+ def handle(self, message: StreamMessage) -> None:
796
+ if not self.should_handle(message):
797
+ return
798
+
799
+ timestamp = datetime.now().strftime("%H:%M:%S")
800
+
801
+ if message.stream_type is StreamType.STATUS:
802
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
803
+ click.echo(f"[{timestamp}] status={status}")
804
+ return
805
+
806
+ if message.stream_type is StreamType.METRICS:
807
+ name = message.data.get("name")
808
+ value = message.data.get("value")
809
+ step = message.data.get("step")
810
+ if isinstance(value, int | float):
811
+ try:
812
+ val_f = float(value)
813
+ if val_f > self.best_score_so_far:
814
+ self.best_score_so_far = val_f
815
+ if isinstance(step, int):
816
+ self.current_generation = max(self.current_generation, step)
817
+ click.echo(f"[{timestamp}] gen={step} best={val_f:.3f}")
818
+ return
819
+ except Exception:
820
+ pass
821
+ click.echo(f"[{timestamp}] metric {name}={value}")
822
+ return
823
+
824
+ if message.stream_type is StreamType.EVENTS:
825
+ event_type = str(message.data.get("type") or "")
826
+ msg = message.data.get("message") or ""
827
+ data = message.data.get("data") or {}
828
+
829
+ if event_type == "context.learning.generation.completed":
830
+ gen = data.get("generation") or data.get("gen") or self.current_generation
831
+ score = data.get("best_score") or data.get("score") or self.best_score_so_far
832
+ try:
833
+ score_f = float(score)
834
+ if score_f > self.best_score_so_far:
835
+ self.best_score_so_far = score_f
836
+ click.echo(f"[{timestamp}] generation {gen} best={score_f:.3f}")
837
+ except Exception:
838
+ click.echo(f"[{timestamp}] generation {gen} completed")
839
+ return
840
+
841
+ if event_type.endswith(".failed"):
842
+ click.echo(f"[{timestamp}] {event_type}: {msg}")
843
+ return
844
+
845
+ if msg:
846
+ click.echo(f"[{timestamp}] {event_type}: {msg}")
847
+ else:
848
+ click.echo(f"[{timestamp}] {event_type}")
849
+
850
+
851
+ class PromptLearningHandler(StreamHandler):
852
+ """Enhanced handler for GEPA/MIPRO prompt optimization jobs with rich formatting and metrics tracking.
853
+
854
+ This handler processes streaming events from both GEPA (Genetic Evolutionary Prompt
855
+ Algorithm) and MIPRO (Meta-Instruction PROposer) optimization jobs. It provides:
856
+
857
+ - **Real-time progress tracking**: Shows trial results, rollouts, iterations, and budget usage
858
+ - **Optimization curve tracking**: Maintains a history of best scores over time
859
+ - **GEPA-specific features**: Tracks transformations, rollouts, and validation results
860
+ - **MIPRO-specific features**: Tracks iterations, trials, minibatch/full evaluations, and budget
861
+ - **Dual output**: Writes to both console (via click.echo) and optional log file
862
+
863
+ The handler filters verbose events (like TPE updates, proposed instructions) to keep
864
+ output readable while preserving important progress information. It formats output
865
+ consistently between GEPA and MIPRO for easier comparison.
866
+
867
+ Example:
868
+ >>> handler = PromptLearningHandler(
869
+ ... show_trial_results=True,
870
+ ... max_tokens=1_000_000,
871
+ ... log_file=Path("optimization.log")
872
+ ... )
873
+ >>> # Handler is used by JobStreamer to process events
874
+ """
875
+
876
+ def __init__(
877
+ self,
878
+ *,
879
+ show_trial_results: bool = True,
880
+ show_transformations: bool = False,
881
+ show_validation: bool = True,
882
+ max_tokens: int | None = None,
883
+ max_time_seconds: float | None = None,
884
+ max_rollouts: int | None = None,
885
+ log_file: Path | None = None,
886
+ ):
887
+ """Initialize the prompt learning handler.
888
+
889
+ Args:
890
+ show_trial_results: Whether to display individual trial scores (default: True).
891
+ When True, shows each trial's score and best score so far.
892
+ show_transformations: Whether to display transformation/proposal details
893
+ (default: False). When True, shows verbose transformation events.
894
+ show_validation: Whether to display validation summaries (default: True).
895
+ Shows validation results comparing candidates against baseline.
896
+ max_tokens: Maximum token budget for MIPRO (from TOML termination_config).
897
+ Used to track progress and enforce limits.
898
+ max_time_seconds: Maximum time budget in seconds (from TOML termination_config).
899
+ Used to track elapsed time and ETA.
900
+ max_rollouts: Maximum rollouts budget (from TOML termination_config).
901
+ Used to track rollout progress for both GEPA and MIPRO.
902
+ log_file: Optional path to log file for persistent logging. If provided,
903
+ all output is written to both console and file. File is opened in
904
+ append mode and remains open for streaming.
905
+ """
906
+ self.show_trial_results = show_trial_results
907
+ self.show_transformations = show_transformations
908
+ self.show_validation = show_validation
909
+ self.optimization_curve: list[tuple[int, float]] = []
910
+ self.trial_counter = 0
911
+ self.best_score_so_far = 0.0
912
+
913
+ # MIPRO progress tracking
914
+ self.mipro_start_time: float | None = None
915
+ self.mipro_total_trials: int | None = None
916
+ self.mipro_completed_trials: int = 0
917
+ self.mipro_total_tokens: int = 0
918
+ self.mipro_policy_tokens: int = 0 # Rollout tokens (policy only)
919
+ self.mipro_max_tokens: int | None = max_tokens # From TOML termination_config
920
+ self.mipro_total_cost: float = 0.0
921
+ self.mipro_max_cost: float | None = None
922
+ self.mipro_current_iteration: int = 0
923
+ self.mipro_num_iterations: int | None = None
924
+ self.mipro_trials_per_iteration: int | None = None
925
+ self.mipro_best_score: float = 0.0 # Track best full eval score
926
+ self.mipro_baseline_score: float | None = None # Track baseline for comparison
927
+ self.mipro_batch_size: int | None = None # Track minibatch size (N for minibatch scores)
928
+ self.mipro_rollouts_completed: int = 0 # Total rollouts completed
929
+ self.mipro_max_rollouts: int | None = max_rollouts # From TOML termination_config
930
+ self.mipro_max_time_seconds: float | None = max_time_seconds # From TOML termination_config
931
+ self._last_progress_emit_time: float | None = None # Throttle progress updates
932
+ self._progress_emit_interval: float = 5.0 # Emit progress at most every 5 seconds
933
+
934
+ # Log file for real-time streaming
935
+ self.log_file: Path | None = log_file
936
+ self._log_file_handle = None
937
+ if self.log_file:
938
+ try:
939
+ # Create parent directory if needed
940
+ self.log_file.parent.mkdir(parents=True, exist_ok=True)
941
+ # Open file in append mode for live streaming
942
+ # Note: File must remain open for streaming, so we can't use context manager
943
+ from datetime import datetime
944
+ self._log_file_handle = open(self.log_file, "a", encoding="utf-8") # noqa: SIM115
945
+ # Write header
946
+ self._log_file_handle.write("=" * 80 + "\n")
947
+ self._log_file_handle.write("PROMPT LEARNING VERBOSE LOG\n")
948
+ self._log_file_handle.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
949
+ self._log_file_handle.write("=" * 80 + "\n\n")
950
+ self._log_file_handle.flush()
951
+ except Exception as e:
952
+ # If we can't open the log file, continue without it
953
+ click.echo(f"⚠️ Could not open log file {log_file}: {e}", err=True)
954
+ self.log_file = None
955
+ self._log_file_handle = None
956
+
957
+ def _write_log(self, text: str) -> None:
958
+ """Write text to both console and log file."""
959
+ click.echo(text)
960
+ if self._log_file_handle:
961
+ try:
962
+ self._log_file_handle.write(text + "\n")
963
+ self._log_file_handle.flush()
964
+ except Exception:
965
+ # If write fails, close handle and continue without logging
966
+ from contextlib import suppress
967
+ with suppress(Exception):
968
+ self._log_file_handle.close()
969
+ self._log_file_handle = None
970
+
971
+ def handle(self, message: StreamMessage) -> None:
972
+ """Handle a stream message from the prompt learning job.
973
+
974
+ Routes messages to appropriate handlers based on stream type:
975
+ - STATUS: Job status updates (queued, running, completed, etc.)
976
+ - EVENTS: Algorithm-specific events (trials, iterations, transformations)
977
+ - METRICS: Performance metrics (scores, accuracies, costs)
978
+ - TIMELINE: Phase transitions
979
+
980
+ Filters verbose events (TPE updates, proposed instructions) to keep output
981
+ readable. MIPRO and GEPA events are handled by specialized methods.
982
+
983
+ Args:
984
+ message: StreamMessage containing event data from the backend
985
+ """
986
+ if not self.should_handle(message):
987
+ return
988
+
989
+ timestamp = datetime.now().strftime("%H:%M:%S")
990
+
991
+ if message.stream_type is StreamType.STATUS:
992
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
993
+ self._write_log(f"[{timestamp}] status={status}")
994
+ return
995
+
996
+ if message.stream_type is StreamType.EVENTS:
997
+ event_type = message.data.get("type", "event")
998
+ level = message.data.get("level")
999
+ msg = message.data.get("message") or ""
1000
+
1001
+ # Handle MIPRO-specific events for progress tracking (before skipping hidden events)
1002
+ if event_type == "mipro.job.started":
1003
+ self._handle_mipro_job_started(message.data)
1004
+ # Continue to default display
1005
+
1006
+ if event_type == "mipro.budget.update":
1007
+ self._handle_mipro_budget_update(message.data)
1008
+ # Continue to default display
1009
+
1010
+ if event_type == "mipro.trial.complete":
1011
+ self._handle_mipro_trial_complete(message.data)
1012
+ # Continue to default display
1013
+
1014
+ # Show more MIPRO events - only hide the most verbose ones
1015
+ _hidden_mipro_events = {
1016
+ # Keep only the most verbose TPE updates hidden
1017
+ "mipro.tpe.update", # Very frequent, low value
1018
+ }
1019
+ if event_type in _hidden_mipro_events:
1020
+ return
1021
+
1022
+ # Show GEPA transformation proposals - they're useful for debugging
1023
+ # if event_type == "gepa.transformation.proposed":
1024
+ # return
1025
+
1026
+ # Handle trial results for optimization curve tracking
1027
+ if event_type == "prompt.learning.trial.results":
1028
+ self._handle_trial_results(message.data)
1029
+ # Continue to default display
1030
+
1031
+ # Handle validation summary
1032
+ if event_type == "prompt.learning.validation.summary":
1033
+ if self.show_validation:
1034
+ self._handle_validation_summary(message.data)
1035
+ # Continue to default display
1036
+
1037
+ # Handle progress events
1038
+ if event_type == "prompt.learning.progress":
1039
+ self._handle_progress(message.data)
1040
+ # Continue to default display
1041
+
1042
+ # Handle MIPRO-specific events for progress tracking
1043
+ if event_type == "mipro.iteration.start":
1044
+ self._handle_mipro_iteration_start(message.data)
1045
+ # Continue to default display
1046
+
1047
+ if event_type == "mipro.iteration.complete":
1048
+ self._handle_mipro_iteration_complete(message.data)
1049
+ # Continue to default display
1050
+
1051
+ if event_type == "mipro.fulleval.complete":
1052
+ self._handle_mipro_fulleval_complete(message.data)
1053
+ # Continue to default display
1054
+
1055
+ if event_type == "mipro.optimization.exhausted":
1056
+ # Graceful conclusion - show final progress
1057
+ self._emit_mipro_progress()
1058
+ # Continue to default display
1059
+
1060
+ if event_type == "mipro.new_incumbent":
1061
+ self._handle_mipro_new_incumbent(message.data)
1062
+ # Continue to default display
1063
+
1064
+ # Handle rollouts start event
1065
+ if event_type == "prompt.learning.rollouts.start":
1066
+ self._handle_rollouts_start(message.data)
1067
+ # Continue to default display
1068
+
1069
+ # Handle GEPA new best event
1070
+ if event_type == "prompt.learning.gepa.new_best":
1071
+ self._handle_gepa_new_best(message.data)
1072
+ # Continue to default display
1073
+
1074
+ # Handle phase changed event
1075
+ if event_type == "prompt.learning.phase.changed":
1076
+ self._handle_phase_changed(message.data)
1077
+ # Continue to default display
1078
+
1079
+ # Handle stream connected event (connection lifecycle)
1080
+ if event_type == "prompt.learning.stream.connected":
1081
+ self._handle_stream_connected(message.data)
1082
+ # Continue to default display
1083
+
1084
+ # Handle proposal scored events (transformations) - show by default
1085
+ if event_type == "prompt.learning.proposal.scored":
1086
+ self._handle_proposal_scored(message.data)
1087
+ # Continue to default display
1088
+
1089
+ # Show verbose transformation events by default - they're useful
1090
+ # Only skip if explicitly disabled via show_transformations=False
1091
+ # verbose_event_types = [
1092
+ # "prompt.learning.proposal.scored",
1093
+ # "prompt.learning.eval.summary",
1094
+ # "prompt.learning.validation.scored",
1095
+ # "prompt.learning.final.results",
1096
+ # ]
1097
+ # if event_type in verbose_event_types and not self.show_transformations:
1098
+ # return
1099
+
1100
+ # Default event display - show more details
1101
+ prefix = f"[{timestamp}] {event_type}"
1102
+ if level:
1103
+ prefix += f" ({level})"
1104
+ sanitized_msg = _mask_sensitive_urls(msg)
1105
+
1106
+ # Include key data fields if message is empty or short
1107
+ if not sanitized_msg or len(sanitized_msg) < 50:
1108
+ data = message.data.get("data", {})
1109
+ if isinstance(data, dict):
1110
+ # Show useful fields
1111
+ useful_fields = []
1112
+ for key in ["score", "accuracy", "mean", "step", "iteration", "trial", "completed", "total", "version_id"]:
1113
+ if key in data:
1114
+ value = data[key]
1115
+ if isinstance(value, (int, float)):
1116
+ useful_fields.append(f"{key}={value:.4f}" if isinstance(value, float) else f"{key}={value}")
1117
+ else:
1118
+ useful_fields.append(f"{key}={value}")
1119
+ if useful_fields:
1120
+ sanitized_msg = sanitized_msg + (" " if sanitized_msg else "") + " ".join(useful_fields[:5]) # Limit to 5 fields
1121
+
1122
+ self._write_log(f"{prefix}: {sanitized_msg}".rstrip(": "))
1123
+ return
1124
+
1125
+ if message.stream_type is StreamType.METRICS:
1126
+ name = message.data.get("name")
1127
+ value = message.data.get("value")
1128
+ step = message.data.get("step")
1129
+ data = message.data.get("data", {})
1130
+
1131
+ metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
1132
+ if step is not None:
1133
+ metric_str += f" (step={step})"
1134
+
1135
+ if isinstance(data, dict):
1136
+ n = data.get("n")
1137
+ if n is not None:
1138
+ metric_str += f" n={n}"
1139
+
1140
+ self._write_log(metric_str)
1141
+ return
1142
+
1143
+ if message.stream_type is StreamType.TIMELINE:
1144
+ phase = message.data.get("phase", "phase")
1145
+ self._write_log(f"[{timestamp}] timeline={phase}")
1146
+
1147
+ def _handle_trial_results(self, event_data: dict[str, Any]) -> None:
1148
+ """Handle GEPA trial results events and track optimization curve.
1149
+
1150
+ Processes trial completion events from GEPA optimization, tracking:
1151
+ - Mean score for the trial
1152
+ - Best score achieved so far
1153
+ - Number of rollouts completed (N)
1154
+ - Optimization curve data points
1155
+
1156
+ Updates the optimization curve with (trial_number, best_score) tuples
1157
+ for visualization. Displays trial results if show_trial_results is True.
1158
+
1159
+ Args:
1160
+ event_data: Event data dictionary containing:
1161
+ - data.mean: Mean score for this trial
1162
+ - data.completed: Number of rollouts completed
1163
+ - data.total: Total rollouts planned
1164
+ """
1165
+ data = event_data.get("data", {})
1166
+ if not isinstance(data, dict):
1167
+ return
1168
+
1169
+ mean_score = data.get("mean")
1170
+ if mean_score is not None:
1171
+ self.trial_counter += 1
1172
+ self.best_score_so_far = max(self.best_score_so_far, float(mean_score))
1173
+ self.optimization_curve.append((self.trial_counter, self.best_score_so_far))
1174
+
1175
+ if self.show_trial_results:
1176
+ timestamp = datetime.now().strftime("%H:%M:%S")
1177
+
1178
+ # Extract N (number of rollouts)
1179
+ completed = data.get("completed")
1180
+ total = data.get("total")
1181
+
1182
+ n_str = f" N={completed}/{total}" if completed is not None and total is not None else (f" N={completed}" if completed is not None else "")
1183
+
1184
+ self._write_log(f"[{timestamp}] [Trial {self.trial_counter}] Score: {mean_score:.4f} (Best: {self.best_score_so_far:.4f}){n_str}")
1185
+
1186
+ def _handle_validation_summary(self, event_data: dict[str, Any]) -> None:
1187
+ """Handle validation summary events showing candidate performance.
1188
+
1189
+ Displays validation results comparing optimized prompts against a baseline.
1190
+ Shows baseline score, number of candidates evaluated (N), and top candidate
1191
+ scores. Only displayed if show_validation is True.
1192
+
1193
+ Args:
1194
+ event_data: Event data dictionary containing:
1195
+ - data.baseline: Baseline score (dict with accuracy/score or number)
1196
+ - data.results: List of candidate results with accuracy/score fields
1197
+ """
1198
+ data = event_data.get("data", {})
1199
+ if not isinstance(data, dict):
1200
+ return
1201
+
1202
+ timestamp = datetime.now().strftime("%H:%M:%S")
1203
+
1204
+ # Extract baseline
1205
+ baseline = data.get("baseline")
1206
+ baseline_score = None
1207
+ if isinstance(baseline, dict):
1208
+ baseline_score = baseline.get("accuracy") or baseline.get("score")
1209
+ elif isinstance(baseline, int | float):
1210
+ baseline_score = baseline
1211
+
1212
+ # Extract results
1213
+ results = data.get("results", [])
1214
+ if not isinstance(results, list):
1215
+ results = []
1216
+
1217
+ # Display validation summary
1218
+ self._write_log(f"[{timestamp}] Validation Summary:")
1219
+
1220
+ # Show baseline if available
1221
+ if baseline_score is not None:
1222
+ self._write_log(f" Baseline: {baseline_score:.4f}")
1223
+
1224
+ # Show N (number of candidates)
1225
+ n_candidates = len(results)
1226
+ if n_candidates > 0:
1227
+ self._write_log(f" N={n_candidates}")
1228
+
1229
+ # Display validation results
1230
+ if results:
1231
+ for i, result in enumerate(results[:10]): # Show top 10
1232
+ if isinstance(result, dict):
1233
+ accuracy = result.get("accuracy") or result.get("score")
1234
+ if accuracy is not None:
1235
+ self._write_log(f" Candidate {i+1}: {accuracy:.4f}")
1236
+
1237
+ def _handle_progress(self, event_data: dict[str, Any]) -> None:
1238
+ """Handle GEPA progress events with detailed rollout and transformation tracking.
1239
+
1240
+ Displays comprehensive progress information including:
1241
+ - Overall completion percentage
1242
+ - Rollout progress (completed/total with percentage)
1243
+ - Transformation progress (tried/planned with percentage)
1244
+ - Token usage (used/budget in millions)
1245
+ - Elapsed time and ETA
1246
+
1247
+ Formats progress in a human-readable format similar to CLI progress bars.
1248
+
1249
+ Args:
1250
+ event_data: Event data dictionary containing:
1251
+ - data.rollouts_completed: Number of rollouts completed
1252
+ - data.rollouts_total: Total rollouts planned
1253
+ - data.transformations_tried: Number of transformations tried
1254
+ - data.transformations_planned: Total transformations planned
1255
+ - data.rollout_tokens_used: Tokens consumed
1256
+ - data.rollout_tokens_budget: Token budget
1257
+ - data.elapsed_seconds: Time elapsed
1258
+ - data.eta_seconds: Estimated time remaining
1259
+ - data.percent_overall: Overall completion percentage
1260
+ """
1261
+ data = event_data.get("data", {})
1262
+ if not isinstance(data, dict):
1263
+ return
1264
+
1265
+ timestamp = datetime.now().strftime("%H:%M:%S")
1266
+
1267
+ # Extract rollout progress
1268
+ rollouts_completed = data.get("rollouts_completed")
1269
+ rollouts_total = data.get("rollouts_total")
1270
+ percent_rollouts = data.get("percent_rollouts")
1271
+
1272
+ # Extract transformation progress
1273
+ transformations_tried = data.get("transformations_tried")
1274
+ transformations_planned = data.get("transformations_planned")
1275
+ percent_transformations = data.get("percent_transformations")
1276
+
1277
+ # Extract overall progress
1278
+ percent_overall = data.get("percent_overall")
1279
+
1280
+ # Extract timing
1281
+ elapsed_seconds = data.get("elapsed_seconds")
1282
+ eta_seconds = data.get("eta_seconds")
1283
+
1284
+ # Extract token usage
1285
+ rollout_tokens_used = data.get("rollout_tokens_used")
1286
+ rollout_tokens_budget = data.get("rollout_tokens_budget")
1287
+
1288
+ # Build progress message
1289
+ parts = []
1290
+
1291
+ # Overall percentage
1292
+ if percent_overall is not None:
1293
+ parts.append(f"{int(percent_overall * 100)}% complete")
1294
+
1295
+ # Rollout progress
1296
+ if rollouts_completed is not None and rollouts_total is not None:
1297
+ parts.append(f"rollouts={rollouts_completed}/{rollouts_total}")
1298
+ if percent_rollouts is not None:
1299
+ parts.append(f"({int(percent_rollouts * 100)}%)")
1300
+ elif rollouts_completed is not None:
1301
+ parts.append(f"rollouts={rollouts_completed}")
1302
+
1303
+ # Transformation progress
1304
+ if transformations_tried is not None and transformations_planned is not None:
1305
+ parts.append(f"transformations={transformations_tried}/{transformations_planned}")
1306
+ if percent_transformations is not None:
1307
+ parts.append(f"({int(percent_transformations * 100)}%)")
1308
+ elif transformations_tried is not None:
1309
+ parts.append(f"transformations={transformations_tried}")
1310
+
1311
+ # Token usage
1312
+ if rollout_tokens_used is not None:
1313
+ tokens_millions = rollout_tokens_used / 1_000_000.0
1314
+ if rollout_tokens_budget is not None:
1315
+ budget_millions = rollout_tokens_budget / 1_000_000.0
1316
+ parts.append(f"tokens={tokens_millions:.2f}M/{budget_millions:.2f}M")
1317
+ else:
1318
+ parts.append(f"tokens={tokens_millions:.2f}M")
1319
+
1320
+ # Timing
1321
+ if elapsed_seconds is not None:
1322
+ if elapsed_seconds >= 60:
1323
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min"
1324
+ else:
1325
+ elapsed_str = f"{int(elapsed_seconds)}s"
1326
+ parts.append(f"elapsed={elapsed_str}")
1327
+
1328
+ if eta_seconds is not None:
1329
+ eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
1330
+ parts.append(f"eta={eta_str}")
1331
+
1332
+ # Fallback to simple step/total_steps if no detailed info
1333
+ if not parts:
1334
+ step = data.get("step") or data.get("current_step")
1335
+ total_steps = data.get("total_steps") or data.get("max_steps")
1336
+ if step is not None and total_steps is not None:
1337
+ parts.append(f"{step}/{total_steps} ({100 * step / total_steps:.1f}%)")
1338
+
1339
+ if parts:
1340
+ progress_msg = " ".join(parts)
1341
+ self._write_log(f"[{timestamp}] Progress: {progress_msg}")
1342
+
1343
+ def _handle_rollouts_start(self, event_data: dict[str, Any]) -> None:
1344
+ """Handle GEPA rollouts start event.
1345
+
1346
+ Displays when rollouts begin, showing the number of training seeds
1347
+ that will be evaluated. This marks the start of the main optimization
1348
+ phase for GEPA.
1349
+
1350
+ Args:
1351
+ event_data: Event data dictionary containing:
1352
+ - data.train_seeds: List of training seed values
1353
+ """
1354
+ data = event_data.get("data", {})
1355
+ if not isinstance(data, dict):
1356
+ return
1357
+
1358
+ timestamp = datetime.now().strftime("%H:%M:%S")
1359
+ train_seeds = data.get("train_seeds", [])
1360
+
1361
+ if isinstance(train_seeds, list) and train_seeds:
1362
+ num_seeds = len(train_seeds)
1363
+ self._write_log(f"[{timestamp}] Starting rollouts: {num_seeds} seeds")
1364
+ else:
1365
+ self._write_log(f"[{timestamp}] Starting rollouts")
1366
+
1367
+ def _handle_gepa_new_best(self, event_data: dict[str, Any]) -> None:
1368
+ """Handle GEPA new best candidate event.
1369
+
1370
+ Displays when a new best candidate is found during optimization,
1371
+ showing the improvement over the previous best.
1372
+
1373
+ Args:
1374
+ event_data: Event data dictionary containing:
1375
+ - data.accuracy: New best accuracy score
1376
+ - data.previous_best_score: Previous best score
1377
+ - data.improvement: Absolute improvement
1378
+ - data.version_id: ID of the new best candidate
1379
+ """
1380
+ data = event_data.get("data", {})
1381
+ if not isinstance(data, dict):
1382
+ return
1383
+
1384
+ timestamp = datetime.now().strftime("%H:%M:%S")
1385
+ accuracy = data.get("accuracy")
1386
+ previous = data.get("previous_best_score")
1387
+ improvement = data.get("improvement")
1388
+
1389
+ if accuracy is not None:
1390
+ msg = f"[{timestamp}] \u2728 New best: {accuracy:.4f}"
1391
+ if previous is not None and improvement is not None:
1392
+ msg += f" (+{improvement:.4f} from {previous:.4f})"
1393
+ elif previous is not None:
1394
+ msg += f" (was {previous:.4f})"
1395
+ self._write_log(msg)
1396
+
1397
+ def _handle_phase_changed(self, event_data: dict[str, Any]) -> None:
1398
+ """Handle phase transition event.
1399
+
1400
+ Displays when the optimization transitions between phases
1401
+ (e.g., bootstrap -> optimization -> validation -> complete).
1402
+
1403
+ Args:
1404
+ event_data: Event data dictionary containing:
1405
+ - data.from_phase: Previous phase name
1406
+ - data.to_phase: New phase name
1407
+ - data.phase_summary: Optional summary of completed phase
1408
+ """
1409
+ data = event_data.get("data", {})
1410
+ if not isinstance(data, dict):
1411
+ return
1412
+
1413
+ timestamp = datetime.now().strftime("%H:%M:%S")
1414
+ from_phase = data.get("from_phase") or "start"
1415
+ to_phase = data.get("to_phase")
1416
+
1417
+ if to_phase:
1418
+ self._write_log(f"[{timestamp}] Phase: {from_phase} \u2192 {to_phase}")
1419
+
1420
+ def _handle_stream_connected(self, event_data: dict[str, Any]) -> None:
1421
+ """Handle SSE stream connection event.
1422
+
1423
+ Displays connection confirmation with cursor position for debugging.
1424
+
1425
+ Args:
1426
+ event_data: Event data dictionary containing:
1427
+ - data.cursor: Current sequence cursor position
1428
+ - data.heartbeat_interval_seconds: Heartbeat interval
1429
+ """
1430
+ data = event_data.get("data", {})
1431
+ if not isinstance(data, dict):
1432
+ return
1433
+
1434
+ timestamp = datetime.now().strftime("%H:%M:%S")
1435
+ cursor = data.get("cursor", 0)
1436
+ self._write_log(f"[{timestamp}] Stream connected (cursor={cursor})")
1437
+
1438
+ def _handle_mipro_job_started(self, event_data: dict[str, Any]) -> None:
1439
+ """Handle MIPRO job start event and extract configuration.
1440
+
1441
+ Captures initial MIPRO configuration from the job start event to enable
1442
+ progress tracking. Extracts num_iterations and num_trials_per_iteration
1443
+ to estimate total trials and rollouts.
1444
+
1445
+ Args:
1446
+ event_data: Event data dictionary containing:
1447
+ - data.num_iterations: Total number of optimization iterations
1448
+ - data.num_trials_per_iteration: Trials per iteration
1449
+ """
1450
+ data = event_data.get("data", {})
1451
+ if not isinstance(data, dict):
1452
+ return
1453
+
1454
+ # Extract config values to estimate max rollouts
1455
+ num_iterations = data.get("num_iterations")
1456
+ num_trials_per_iteration = data.get("num_trials_per_iteration")
1457
+
1458
+ if num_iterations is not None:
1459
+ self.mipro_num_iterations = num_iterations
1460
+ if num_trials_per_iteration is not None:
1461
+ self.mipro_trials_per_iteration = num_trials_per_iteration
1462
+
1463
+ def _handle_mipro_iteration_start(self, event_data: dict[str, Any]) -> None:
1464
+ """Handle MIPRO iteration start event and initialize progress tracking.
1465
+
1466
+ Called at the start of each MIPRO iteration. On the first iteration (0),
1467
+ initializes all progress tracking variables including:
1468
+ - Total iterations and trials per iteration
1469
+ - Batch size (for minibatch evaluations)
1470
+ - Max rollouts estimate (iterations * trials * batch_size)
1471
+ - Time and token budgets
1472
+
1473
+ Sets the start time for elapsed time tracking.
1474
+
1475
+ Args:
1476
+ event_data: Event data dictionary containing:
1477
+ - data.iteration: Current iteration number (0-indexed)
1478
+ - data.num_iterations: Total iterations
1479
+ - data.num_trials_per_iteration: Trials per iteration
1480
+ - data.batch_size: Minibatch size (N for minibatch scores)
1481
+ - data.max_trials: Maximum trials limit (optional)
1482
+ - data.max_rollouts: Maximum rollouts limit (optional)
1483
+ - data.max_time_seconds: Maximum time limit (optional)
1484
+ """
1485
+ import time
1486
+
1487
+ data = event_data.get("data", {})
1488
+ if not isinstance(data, dict):
1489
+ return
1490
+
1491
+ iteration = data.get("iteration")
1492
+ if iteration == 0 and self.mipro_start_time is None:
1493
+ self.mipro_start_time = time.time()
1494
+
1495
+ # Extract total iterations and trials per iteration from first iteration
1496
+ if iteration == 0:
1497
+ self.mipro_num_iterations = data.get("num_iterations") or self.mipro_num_iterations
1498
+ self.mipro_trials_per_iteration = data.get("num_trials_per_iteration") or self.mipro_trials_per_iteration
1499
+ batch_size = data.get("batch_size")
1500
+ if batch_size is not None:
1501
+ self.mipro_batch_size = batch_size
1502
+
1503
+ if self.mipro_num_iterations and self.mipro_trials_per_iteration:
1504
+ self.mipro_total_trials = self.mipro_num_iterations * self.mipro_trials_per_iteration
1505
+
1506
+ # Extract max limits if available (from events, but TOML value takes precedence)
1507
+ # Only override if TOML value wasn't set
1508
+ max_trials = data.get("max_trials")
1509
+ max_rollouts_from_event = data.get("max_rollouts")
1510
+ if self.mipro_max_rollouts is None:
1511
+ if max_rollouts_from_event is not None:
1512
+ # Use event value if TOML value wasn't set
1513
+ self.mipro_max_rollouts = max_rollouts_from_event
1514
+ elif max_trials is not None:
1515
+ # Fallback: If max_trials is set, use it as max rollouts (approximation)
1516
+ self.mipro_max_rollouts = max_trials
1517
+ elif self.mipro_num_iterations and self.mipro_trials_per_iteration and self.mipro_batch_size:
1518
+ # Estimate max rollouts: iterations * trials_per_iteration * batch_size
1519
+ self.mipro_max_rollouts = self.mipro_num_iterations * self.mipro_trials_per_iteration * self.mipro_batch_size
1520
+
1521
+ max_time_seconds = data.get("max_time_seconds") or data.get("max_wall_clock_seconds")
1522
+ if max_time_seconds is not None and self.mipro_max_time_seconds is None:
1523
+ # Use event value only if TOML value wasn't set
1524
+ self.mipro_max_time_seconds = float(max_time_seconds)
1525
+
1526
+ self.mipro_current_iteration = iteration if iteration is not None else self.mipro_current_iteration
1527
+
1528
+ def _handle_mipro_iteration_complete(self, event_data: dict[str, Any]) -> None:
1529
+ """Handle MIPRO iteration completion event.
1530
+
1531
+ Updates progress tracking when an iteration completes, including:
1532
+ - Cumulative trial count
1533
+ - Current iteration number
1534
+
1535
+ Emits a progress update showing overall progress, trials completed,
1536
+ iterations, rollouts, tokens, and time.
1537
+
1538
+ Args:
1539
+ event_data: Event data dictionary containing:
1540
+ - data.iteration: Completed iteration number
1541
+ - data.cumulative: Cumulative trial count across all iterations
1542
+ """
1543
+ data = event_data.get("data", {})
1544
+ if not isinstance(data, dict):
1545
+ return
1546
+
1547
+ cumulative = data.get("cumulative")
1548
+ if cumulative is not None:
1549
+ self.mipro_completed_trials = cumulative
1550
+
1551
+ # Update current iteration
1552
+ iteration = data.get("iteration")
1553
+ if iteration is not None:
1554
+ self.mipro_current_iteration = iteration
1555
+
1556
+ # Emit progress update
1557
+ self._emit_mipro_progress()
1558
+
1559
+ def _handle_mipro_trial_complete(self, event_data: dict[str, Any]) -> None:
1560
+ """Handle MIPRO trial completion event (minibatch evaluation).
1561
+
1562
+ Processes minibatch trial completion events, which occur frequently during
1563
+ MIPRO optimization. Tracks:
1564
+ - Completed trial count
1565
+ - Rollouts completed (from num_seeds)
1566
+ - Minibatch scores (displayed if show_trial_results is True)
1567
+
1568
+ Displays trial results in GEPA-like format: [Trial X] Score: Y (Best: Z) N=W
1569
+ where N is the minibatch size. Emits throttled progress updates.
1570
+
1571
+ Args:
1572
+ event_data: Event data dictionary containing:
1573
+ - data.minibatch_score: Score from minibatch evaluation
1574
+ - data.iteration: Current iteration number
1575
+ - data.trial: Trial number within iteration
1576
+ - data.num_seeds: Number of seeds evaluated (minibatch size N)
1577
+ """
1578
+ data = event_data.get("data", {})
1579
+ if not isinstance(data, dict):
1580
+ return
1581
+
1582
+ # Increment completed trials counter
1583
+ self.mipro_completed_trials += 1
1584
+
1585
+ # Count rollouts from trial events
1586
+ num_seeds = data.get("num_seeds") or data.get("num_instances", 0)
1587
+ if num_seeds:
1588
+ self.mipro_rollouts_completed += num_seeds
1589
+
1590
+ # Show trial score (minibatch) - like GEPA trial format
1591
+ if self.show_trial_results:
1592
+ timestamp = datetime.now().strftime("%H:%M:%S")
1593
+ minibatch_score = data.get("minibatch_score")
1594
+ iteration = data.get("iteration")
1595
+ trial = data.get("trial")
1596
+
1597
+ if minibatch_score is not None:
1598
+ try:
1599
+ score_float = float(minibatch_score)
1600
+ # Calculate trial number for display
1601
+ if iteration is not None and trial is not None and self.mipro_trials_per_iteration:
1602
+ trial_num_display = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
1603
+ else:
1604
+ trial_num_display = self.mipro_completed_trials
1605
+
1606
+ n_str = f" N={num_seeds}" if num_seeds else ""
1607
+ best_str = f" (Best: {self.mipro_best_score:.4f})" if self.mipro_best_score > 0 else ""
1608
+
1609
+ self._write_log(
1610
+ f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f}{best_str}{n_str}"
1611
+ )
1612
+ except (ValueError, TypeError):
1613
+ pass
1614
+
1615
+ # Emit progress update after each trial (throttled internally)
1616
+ self._emit_mipro_progress()
1617
+
1618
+ def _handle_mipro_fulleval_complete(self, event_data: dict[str, Any]) -> None:
1619
+ """Handle MIPRO full evaluation completion event.
1620
+
1621
+ Processes full evaluation events, which occur less frequently than minibatch
1622
+ trials. Full evaluations use the full validation set and are more expensive.
1623
+ Only displays results if the score is "promising":
1624
+ - Better than current best score, OR
1625
+ - At least 5% improvement over baseline
1626
+
1627
+ Tracks rollouts from full evaluations and updates best score. Displays
1628
+ results with baseline comparison and improvement percentage.
1629
+
1630
+ Args:
1631
+ event_data: Event data dictionary containing:
1632
+ - data.score: Full evaluation score
1633
+ - data.iteration: Current iteration number
1634
+ - data.trial: Trial number within iteration
1635
+ - data.num_seeds: Number of seeds evaluated (full eval size)
1636
+ - data.seeds: List of seed values (alternative to num_seeds)
1637
+ """
1638
+ data = event_data.get("data", {})
1639
+ if not isinstance(data, dict):
1640
+ return
1641
+
1642
+ # Count rollouts from full eval
1643
+ num_seeds = data.get("num_seeds") or data.get("seeds", 0)
1644
+ if isinstance(num_seeds, list):
1645
+ num_seeds = len(num_seeds)
1646
+ if num_seeds:
1647
+ self.mipro_rollouts_completed += num_seeds
1648
+
1649
+ score = data.get("score")
1650
+ if score is None:
1651
+ return
1652
+
1653
+ try:
1654
+ score_float = float(score)
1655
+ except (ValueError, TypeError):
1656
+ return
1657
+
1658
+ # Initialize baseline if not set (use first score as baseline)
1659
+ if self.mipro_baseline_score is None:
1660
+ self.mipro_baseline_score = score_float
1661
+
1662
+ # Only show if score is promising:
1663
+ # - Better than current best, OR
1664
+ # - At least 5% improvement over baseline
1665
+ is_promising = False
1666
+ if score_float > self.mipro_best_score:
1667
+ self.mipro_best_score = score_float
1668
+ is_promising = True
1669
+ elif self.mipro_baseline_score is not None:
1670
+ improvement = score_float - self.mipro_baseline_score
1671
+ improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
1672
+ if improvement_pct >= 5.0: # At least 5% improvement over baseline
1673
+ is_promising = True
1674
+
1675
+ if is_promising:
1676
+ timestamp = datetime.now().strftime("%H:%M:%S")
1677
+ iteration = data.get("iteration")
1678
+ trial = data.get("trial")
1679
+ seeds = data.get("seeds") or data.get("num_seeds", 0)
1680
+ if isinstance(seeds, list):
1681
+ seeds = len(seeds)
1682
+
1683
+ # Format similar to GEPA trial results with N displayed
1684
+ iter_str = f" iter={iteration}" if iteration is not None else ""
1685
+ trial_str = f" trial={trial}" if trial is not None else ""
1686
+ n_str = f" N={seeds}" if seeds else ""
1687
+
1688
+ baseline_str = ""
1689
+ if self.mipro_baseline_score is not None:
1690
+ improvement = score_float - self.mipro_baseline_score
1691
+ improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
1692
+ baseline_str = f" (Baseline: {self.mipro_baseline_score:.4f}, +{improvement_pct:.1f}%)"
1693
+
1694
+ self._write_log(
1695
+ f"[{timestamp}] Full eval: Score={score_float:.4f} (Best: {self.mipro_best_score:.4f}){n_str}{baseline_str}{iter_str}{trial_str}"
1696
+ )
1697
+
1698
+ def _handle_mipro_new_incumbent(self, event_data: dict[str, Any]) -> None:
1699
+ """Handle MIPRO new incumbent event (best candidate found).
1700
+
1701
+ Processes events when MIPRO finds a new best candidate (incumbent).
1702
+ Updates the optimization curve and displays the result in GEPA-like format
1703
+ for consistency. Tracks cumulative trial count for curve visualization.
1704
+
1705
+ Args:
1706
+ event_data: Event data dictionary containing:
1707
+ - data.minibatch_score: Minibatch score of the new incumbent
1708
+ - data.best_score: Overall best score
1709
+ - data.iteration: Current iteration number
1710
+ - data.trial: Trial number within iteration
1711
+ - data.cumulative_trials: Cumulative trial count across iterations
1712
+ - data.num_seeds: Minibatch size (N)
1713
+ """
1714
+ data = event_data.get("data", {})
1715
+ if not isinstance(data, dict):
1716
+ return
1717
+
1718
+ timestamp = datetime.now().strftime("%H:%M:%S")
1719
+ minibatch_score = data.get("minibatch_score")
1720
+ best_score = data.get("best_score")
1721
+ iteration = data.get("iteration")
1722
+ trial = data.get("trial")
1723
+ num_seeds = data.get("num_seeds") # N for minibatch
1724
+
1725
+ if minibatch_score is None:
1726
+ return
1727
+
1728
+ try:
1729
+ score_float = float(minibatch_score)
1730
+ except (ValueError, TypeError):
1731
+ return
1732
+
1733
+ # Update best score if this is better
1734
+ if best_score is not None:
1735
+ best_float = float(best_score)
1736
+ if best_float > self.best_score_so_far:
1737
+ self.best_score_so_far = best_float
1738
+ elif score_float > self.best_score_so_far:
1739
+ self.best_score_so_far = score_float
1740
+
1741
+ # Track optimization curve
1742
+ if trial is not None:
1743
+ # Use cumulative trial count for x-axis
1744
+ cumulative_trials = data.get("cumulative_trials")
1745
+ if cumulative_trials is not None:
1746
+ trial_num = cumulative_trials
1747
+ else:
1748
+ # Estimate: (iteration * trials_per_iteration) + trial
1749
+ if iteration is not None and self.mipro_trials_per_iteration:
1750
+ trial_num = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
1751
+ else:
1752
+ trial_num = self.trial_counter + 1
1753
+
1754
+ self.optimization_curve.append((trial_num, self.best_score_so_far))
1755
+ self.trial_counter = trial_num
1756
+
1757
+ # Format like GEPA: [Trial X] Score: X (Best: Y) N=Z
1758
+ trial_num_display = self.trial_counter if self.trial_counter > 0 else (trial + 1 if trial is not None else 1)
1759
+ n_str = f" N={num_seeds}" if num_seeds is not None else ""
1760
+
1761
+ click.echo(
1762
+ f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f} (Best: {self.best_score_so_far:.4f}){n_str}"
1763
+ )
1764
+
1765
+ # Emit progress update after each trial (throttled internally)
1766
+ self._emit_mipro_progress()
1767
+
1768
+ def _handle_mipro_budget_update(self, event_data: dict[str, Any]) -> None:
1769
+ """Handle MIPRO budget update events.
1770
+
1771
+ Tracks token usage and cost accumulation during optimization. Updates:
1772
+ - Total tokens consumed (all operations)
1773
+ - Policy tokens (rollout tokens only)
1774
+ - Total cost in USD
1775
+ - Max token and cost limits (if provided in event)
1776
+
1777
+ Emits throttled progress updates to show budget consumption.
1778
+
1779
+ Args:
1780
+ event_data: Event data dictionary containing:
1781
+ - data.total_tokens: Total tokens consumed
1782
+ - data.policy_tokens: Tokens used for rollouts (policy only)
1783
+ - data.total_cost_usd: Total cost in USD
1784
+ - data.max_token_limit: Maximum token budget (optional)
1785
+ - data.max_spend_usd: Maximum cost budget (optional)
1786
+ """
1787
+ data = event_data.get("data", {})
1788
+ if not isinstance(data, dict):
1789
+ return
1790
+
1791
+ # Update token tracking
1792
+ total_tokens = data.get("total_tokens")
1793
+ if total_tokens is not None:
1794
+ self.mipro_total_tokens = total_tokens
1795
+
1796
+ # Track policy tokens separately (rollout tokens)
1797
+ policy_tokens = data.get("policy_tokens")
1798
+ if policy_tokens is not None:
1799
+ self.mipro_policy_tokens = policy_tokens
1800
+
1801
+ # Update cost tracking
1802
+ total_cost = data.get("total_cost_usd")
1803
+ if total_cost is not None:
1804
+ self.mipro_total_cost = total_cost
1805
+
1806
+ # Extract max limits if available in event data
1807
+ max_token_limit = data.get("max_token_limit")
1808
+ if max_token_limit is not None:
1809
+ self.mipro_max_tokens = max_token_limit
1810
+
1811
+ max_spend_usd = data.get("max_spend_usd")
1812
+ if max_spend_usd is not None:
1813
+ self.mipro_max_cost = max_spend_usd
1814
+
1815
+ # Emit progress update periodically (throttled)
1816
+ self._emit_mipro_progress()
1817
+
1818
+ def _emit_mipro_progress(self) -> None:
1819
+ """Emit a comprehensive progress update for MIPRO (throttled).
1820
+
1821
+ Formats and displays MIPRO progress in a format similar to GEPA for consistency.
1822
+ Shows:
1823
+ - Overall completion percentage
1824
+ - Trial progress (completed/total with remaining)
1825
+ - Iteration progress (current/total)
1826
+ - Rollout progress (completed/max)
1827
+ - Token usage (used/budget in millions)
1828
+ - Cost (USD)
1829
+ - Elapsed time and ETA
1830
+
1831
+ Progress updates are throttled to emit at most every 5 seconds to avoid
1832
+ overwhelming the console. This method is called after significant events
1833
+ (trial completion, iteration completion, budget updates).
1834
+
1835
+ Note:
1836
+ Only emits if start_time is set (job has started) and sufficient time
1837
+ has passed since the last update.
1838
+ """
1839
+ import time
1840
+
1841
+ if self.mipro_start_time is None:
1842
+ return
1843
+
1844
+ # Throttle progress updates - only emit every N seconds
1845
+ now = time.time()
1846
+ if self._last_progress_emit_time is not None:
1847
+ time_since_last = now - self._last_progress_emit_time
1848
+ if time_since_last < self._progress_emit_interval:
1849
+ return # Skip this update
1850
+
1851
+ self._last_progress_emit_time = now
1852
+
1853
+ timestamp = datetime.now().strftime("%H:%M:%S")
1854
+ elapsed = now - self.mipro_start_time
1855
+
1856
+ parts = []
1857
+
1858
+ # Overall progress percentage
1859
+ percent_overall = None
1860
+ if self.mipro_total_trials and self.mipro_completed_trials is not None:
1861
+ percent_overall = (self.mipro_completed_trials / self.mipro_total_trials) * 100
1862
+ parts.append(f"{int(percent_overall)}% complete")
1863
+
1864
+ # Trial progress (like rollouts in GEPA)
1865
+ if self.mipro_total_trials and self.mipro_completed_trials is not None:
1866
+ parts.append(f"trials={self.mipro_completed_trials}/{self.mipro_total_trials}")
1867
+ # Calculate remaining trials
1868
+ remaining_trials = self.mipro_total_trials - self.mipro_completed_trials
1869
+ if remaining_trials > 0:
1870
+ parts.append(f"rem={remaining_trials}")
1871
+ # Show percentage
1872
+ if percent_overall is not None:
1873
+ parts.append(f"({int(percent_overall)}%)")
1874
+ elif self.mipro_completed_trials is not None:
1875
+ parts.append(f"trials={self.mipro_completed_trials}")
1876
+
1877
+ # Iteration progress
1878
+ if self.mipro_num_iterations and self.mipro_current_iteration is not None:
1879
+ parts.append(f"iter={self.mipro_current_iteration + 1}/{self.mipro_num_iterations}")
1880
+
1881
+ # Rollouts completed vs max (like GEPA) - always show if we have any rollouts
1882
+ if self.mipro_rollouts_completed > 0:
1883
+ # Always try to show max if available (from TOML, event, or estimate)
1884
+ max_rollouts_to_show = self.mipro_max_rollouts
1885
+ if max_rollouts_to_show is None and self.mipro_total_trials and self.mipro_batch_size:
1886
+ # Estimate max rollouts from total trials if available
1887
+ max_rollouts_to_show = self.mipro_total_trials * self.mipro_batch_size
1888
+
1889
+ if max_rollouts_to_show:
1890
+ rollouts_pct = (self.mipro_rollouts_completed / max_rollouts_to_show) * 100
1891
+ parts.append(f"rollouts={self.mipro_rollouts_completed}/{max_rollouts_to_show} ({int(rollouts_pct)}%)")
1892
+ else:
1893
+ parts.append(f"rollouts={self.mipro_rollouts_completed}")
1894
+
1895
+ # Tokens (policy tokens only, like GEPA rollout_tokens) - always show max if available
1896
+ if self.mipro_policy_tokens > 0:
1897
+ rollout_tokens_millions = self.mipro_policy_tokens / 1_000_000.0
1898
+ if self.mipro_max_tokens:
1899
+ # Use max_tokens as budget for rollout tokens (approximation)
1900
+ budget_millions = self.mipro_max_tokens / 1_000_000.0
1901
+ tokens_pct = (self.mipro_policy_tokens / self.mipro_max_tokens * 100) if self.mipro_max_tokens > 0 else 0
1902
+ parts.append(f"tokens={rollout_tokens_millions:.2f}M/{budget_millions:.2f}M ({int(tokens_pct)}%)")
1903
+ else:
1904
+ parts.append(f"tokens={rollout_tokens_millions:.2f}M")
1905
+
1906
+ # Timing (elapsed out of max, like GEPA)
1907
+ elapsed_seconds = int(elapsed)
1908
+ if self.mipro_max_time_seconds:
1909
+ elapsed_pct = (elapsed / self.mipro_max_time_seconds * 100) if self.mipro_max_time_seconds > 0 else 0
1910
+ max_time_minutes = self.mipro_max_time_seconds / 60.0
1911
+ if elapsed_seconds >= 60:
1912
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min/{max_time_minutes:.1f}min ({int(elapsed_pct)}%)"
1913
+ else:
1914
+ elapsed_str = f"{elapsed_seconds}s/{int(self.mipro_max_time_seconds)}s ({int(elapsed_pct)}%)"
1915
+ else:
1916
+ if elapsed_seconds >= 60:
1917
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min"
1918
+ else:
1919
+ elapsed_str = f"{elapsed_seconds}s"
1920
+ parts.append(f"elapsed={elapsed_str}")
1921
+
1922
+ # ETA calculation (similar to GEPA) - always show if we have progress
1923
+ eta_seconds = None
1924
+ if self.mipro_completed_trials is not None and self.mipro_completed_trials > 0 and elapsed > 0:
1925
+ rate = self.mipro_completed_trials / elapsed
1926
+ if rate > 0:
1927
+ if self.mipro_total_trials:
1928
+ # Calculate ETA based on remaining trials
1929
+ remaining = self.mipro_total_trials - self.mipro_completed_trials
1930
+ if remaining > 0:
1931
+ eta_seconds = remaining / rate
1932
+ else:
1933
+ # Estimate based on iterations if we don't have total trials
1934
+ if self.mipro_num_iterations and self.mipro_current_iteration is not None:
1935
+ remaining_iterations = self.mipro_num_iterations - (self.mipro_current_iteration + 1)
1936
+ if remaining_iterations > 0 and self.mipro_trials_per_iteration:
1937
+ # Estimate: assume same rate for remaining iterations
1938
+ remaining_trials_estimate = remaining_iterations * self.mipro_trials_per_iteration
1939
+ eta_seconds = remaining_trials_estimate / rate
1940
+
1941
+ if eta_seconds is not None and eta_seconds > 0:
1942
+ eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
1943
+ parts.append(f"eta={eta_str}")
1944
+
1945
+ if parts:
1946
+ progress_msg = " ".join(parts)
1947
+ self._write_log(f"[{timestamp}] Progress: {progress_msg}")
1948
+
1949
+ def flush(self) -> None:
1950
+ """Flush buffered output and close log file."""
1951
+ if self._log_file_handle:
1952
+ try:
1953
+ from datetime import datetime
1954
+ self._log_file_handle.write("\n" + "=" * 80 + "\n")
1955
+ self._log_file_handle.write(f"Ended: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
1956
+ self._log_file_handle.write("=" * 80 + "\n")
1957
+ self._log_file_handle.flush()
1958
+ self._log_file_handle.close()
1959
+ except Exception:
1960
+ pass
1961
+ finally:
1962
+ self._log_file_handle = None
1963
+
1964
+ def _handle_proposal_scored(self, event_data: dict[str, Any]) -> None:
1965
+ """Handle GEPA proposal scored events (transformations).
1966
+
1967
+ Displays transformation/proposal scoring events from GEPA optimization.
1968
+ Only called if show_transformations is True (default: False) to avoid
1969
+ verbose output. Shows the score assigned to each proposed transformation.
1970
+
1971
+ Args:
1972
+ event_data: Event data dictionary containing:
1973
+ - data.score: Score assigned to the transformation/proposal
1974
+ """
1975
+ # Only called if show_transformations=True
1976
+ data = event_data.get("data", {})
1977
+ if not isinstance(data, dict):
1978
+ return
1979
+
1980
+ timestamp = datetime.now().strftime("%H:%M:%S")
1981
+ score = data.get("score")
1982
+ if score is not None:
1983
+ click.echo(f"[{timestamp}] Proposal scored: {score:.4f}")
1984
+
1985
+
1986
+ __all__ = [
1987
+ "GraphGenHandler",
1988
+ "BufferedHandler",
1989
+ "CallbackHandler",
1990
+ "CLIHandler",
1991
+ "PromptLearningHandler",
1992
+ "JSONHandler",
1993
+ "IntegrationTestHandler",
1994
+ "LossCurveHandler",
1995
+ "RichHandler",
1996
+ "StreamHandler",
1997
+ ]