synth-ai 0.2.9.dev11__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (909) hide show
  1. synth_ai/__init__.py +44 -45
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +104 -78
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/baseline/__init__.py +12 -0
  25. synth_ai/cli/commands/baseline/core.py +636 -0
  26. synth_ai/cli/commands/baseline/list.py +94 -0
  27. synth_ai/cli/commands/demo/__init__.py +3 -0
  28. synth_ai/cli/commands/demo/core.py +153 -0
  29. synth_ai/cli/commands/eval/__init__.py +19 -0
  30. synth_ai/cli/commands/eval/core.py +1113 -0
  31. synth_ai/cli/commands/eval/errors.py +81 -0
  32. synth_ai/cli/commands/eval/validation.py +133 -0
  33. synth_ai/cli/commands/filter/__init__.py +12 -0
  34. synth_ai/cli/commands/filter/core.py +424 -0
  35. synth_ai/cli/commands/filter/errors.py +55 -0
  36. synth_ai/cli/commands/filter/validation.py +77 -0
  37. synth_ai/cli/commands/help/__init__.py +185 -0
  38. synth_ai/cli/commands/help/core.py +72 -0
  39. synth_ai/cli/commands/scan/__init__.py +19 -0
  40. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  41. synth_ai/cli/commands/scan/core.py +344 -0
  42. synth_ai/cli/commands/scan/health_checker.py +242 -0
  43. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  44. synth_ai/cli/commands/scan/models.py +83 -0
  45. synth_ai/cli/commands/smoke/__init__.py +7 -0
  46. synth_ai/cli/commands/smoke/core.py +1438 -0
  47. synth_ai/cli/commands/status/__init__.py +66 -0
  48. synth_ai/cli/commands/status/client.py +192 -0
  49. synth_ai/cli/commands/status/config.py +92 -0
  50. synth_ai/cli/commands/status/errors.py +20 -0
  51. synth_ai/cli/commands/status/formatters.py +164 -0
  52. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  53. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  54. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  55. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  56. synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
  57. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  58. synth_ai/cli/commands/status/subcommands/session.py +182 -0
  59. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  60. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  61. synth_ai/cli/commands/status/utils.py +114 -0
  62. synth_ai/cli/commands/train/__init__.py +53 -0
  63. synth_ai/cli/commands/train/core.py +22 -0
  64. synth_ai/cli/commands/train/errors.py +117 -0
  65. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  66. synth_ai/cli/commands/train/judge_validation.py +305 -0
  67. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  68. synth_ai/cli/commands/train/validation.py +392 -0
  69. synth_ai/cli/demo_apps/__init__.py +10 -0
  70. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  71. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  72. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  73. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  74. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  75. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  81. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  82. synth_ai/cli/demo_apps/math/_common.py +16 -0
  83. synth_ai/cli/demo_apps/math/app.py +38 -0
  84. synth_ai/cli/demo_apps/math/config.toml +76 -0
  85. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  86. synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
  87. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  88. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  89. synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
  90. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  91. synth_ai/cli/demos/__init__.py +12 -0
  92. synth_ai/cli/demos/demo.py +32 -0
  93. synth_ai/cli/demos/rl_demo.py +254 -0
  94. synth_ai/cli/deploy.py +216 -0
  95. synth_ai/cli/infra/__init__.py +14 -0
  96. synth_ai/cli/infra/balance.py +216 -0
  97. synth_ai/cli/infra/mcp.py +35 -0
  98. synth_ai/cli/infra/modal_app.py +36 -0
  99. synth_ai/cli/infra/setup.py +69 -0
  100. synth_ai/cli/infra/status.py +16 -0
  101. synth_ai/cli/infra/turso.py +77 -0
  102. synth_ai/cli/lib/__init__.py +10 -0
  103. synth_ai/cli/lib/agents.py +76 -0
  104. synth_ai/cli/lib/apps/modal_app.py +101 -0
  105. synth_ai/cli/lib/apps/task_app.py +643 -0
  106. synth_ai/cli/lib/bin.py +39 -0
  107. synth_ai/cli/lib/env.py +375 -0
  108. synth_ai/cli/lib/errors.py +85 -0
  109. synth_ai/cli/lib/modal.py +315 -0
  110. synth_ai/cli/lib/plotting.py +126 -0
  111. synth_ai/cli/lib/prompt_args.py +39 -0
  112. synth_ai/cli/lib/prompts.py +284 -0
  113. synth_ai/cli/lib/sqld.py +122 -0
  114. synth_ai/cli/lib/task_app_discovery.py +884 -0
  115. synth_ai/cli/lib/task_app_env.py +295 -0
  116. synth_ai/cli/lib/train_cfgs.py +300 -0
  117. synth_ai/cli/lib/tunnel_records.py +207 -0
  118. synth_ai/cli/local/__init__.py +14 -0
  119. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  120. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  121. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  122. synth_ai/cli/local/experiment_queue/config.py +128 -0
  123. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  124. synth_ai/cli/local/experiment_queue/database.py +175 -0
  125. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  126. synth_ai/cli/local/experiment_queue/models.py +231 -0
  127. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  128. synth_ai/cli/local/experiment_queue/results.py +373 -0
  129. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  130. synth_ai/cli/local/experiment_queue/service.py +344 -0
  131. synth_ai/cli/local/experiment_queue/status.py +372 -0
  132. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  133. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  134. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  135. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  136. synth_ai/cli/local/session/__init__.py +92 -0
  137. synth_ai/cli/local/session/client.py +383 -0
  138. synth_ai/cli/local/session/constants.py +63 -0
  139. synth_ai/cli/local/session/exceptions.py +105 -0
  140. synth_ai/cli/local/session/manager.py +139 -0
  141. synth_ai/cli/local/session/models.py +89 -0
  142. synth_ai/cli/local/session/query.py +110 -0
  143. synth_ai/cli/root.py +30 -103
  144. synth_ai/cli/task_apps/__init__.py +26 -0
  145. synth_ai/cli/task_apps/commands.py +3153 -0
  146. synth_ai/cli/task_apps/deploy.py +7 -0
  147. synth_ai/cli/task_apps/list.py +26 -0
  148. synth_ai/cli/task_apps/main.py +36 -0
  149. synth_ai/cli/task_apps/modal_serve.py +11 -0
  150. synth_ai/cli/task_apps/serve.py +11 -0
  151. synth_ai/cli/training/__init__.py +8 -0
  152. synth_ai/cli/training/train.py +5 -0
  153. synth_ai/cli/training/train_cfg.py +34 -0
  154. synth_ai/cli/training/watch.py +506 -0
  155. synth_ai/cli/turso.py +34 -55
  156. synth_ai/cli/usage.py +159 -0
  157. synth_ai/cli/utils/__init__.py +8 -0
  158. synth_ai/cli/utils/experiments.py +235 -0
  159. synth_ai/cli/utils/queue.py +504 -0
  160. synth_ai/cli/utils/recent.py +133 -0
  161. synth_ai/cli/utils/traces.py +164 -0
  162. synth_ai/contracts/__init__.py +67 -0
  163. synth_ai/core/__init__.py +100 -0
  164. synth_ai/core/_utils/__init__.py +54 -0
  165. synth_ai/core/_utils/base_url.py +10 -0
  166. synth_ai/core/_utils/http.py +10 -0
  167. synth_ai/core/_utils/prompts.py +14 -0
  168. synth_ai/core/_utils/task_app_state.py +12 -0
  169. synth_ai/core/_utils/user_config.py +10 -0
  170. synth_ai/core/apps/common.py +116 -0
  171. synth_ai/core/auth.py +95 -0
  172. synth_ai/core/cfgs.py +240 -0
  173. synth_ai/core/config/__init__.py +16 -0
  174. synth_ai/core/config/base.py +168 -0
  175. synth_ai/core/config/resolver.py +89 -0
  176. synth_ai/core/env.py +220 -0
  177. synth_ai/core/errors.py +126 -0
  178. synth_ai/core/http.py +230 -0
  179. synth_ai/core/integrations/__init__.py +11 -0
  180. synth_ai/core/integrations/cloudflare.py +1710 -0
  181. synth_ai/core/integrations/mcp/__init__.py +6 -0
  182. synth_ai/core/integrations/mcp/__main__.py +8 -0
  183. synth_ai/core/integrations/mcp/claude.py +36 -0
  184. synth_ai/core/integrations/mcp/main.py +254 -0
  185. synth_ai/core/integrations/mcp/setup.py +100 -0
  186. synth_ai/core/integrations/modal.py +277 -0
  187. synth_ai/core/json.py +72 -0
  188. synth_ai/core/log_filter.py +99 -0
  189. synth_ai/core/logging.py +82 -0
  190. synth_ai/core/paths.py +107 -0
  191. synth_ai/core/pricing.py +109 -0
  192. synth_ai/core/process.py +233 -0
  193. synth_ai/core/ssl.py +25 -0
  194. synth_ai/core/storage/__init__.py +71 -0
  195. synth_ai/core/task_app_state.py +318 -0
  196. synth_ai/core/telemetry.py +282 -0
  197. synth_ai/core/tracing_v3/__init__.py +99 -0
  198. synth_ai/core/tracing_v3/config.py +229 -0
  199. synth_ai/core/tracing_v3/constants.py +21 -0
  200. synth_ai/core/tracing_v3/db_config.py +182 -0
  201. synth_ai/core/tracing_v3/decorators.py +401 -0
  202. synth_ai/core/tracing_v3/examples/basic_usage.py +194 -0
  203. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  204. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  205. synth_ai/core/tracing_v3/replica_sync.py +262 -0
  206. synth_ai/core/tracing_v3/serialization.py +130 -0
  207. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  208. synth_ai/core/tracing_v3/storage/base.py +211 -0
  209. synth_ai/core/tracing_v3/storage/config.py +109 -0
  210. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  211. synth_ai/core/tracing_v3/storage/utils.py +206 -0
  212. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  213. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  214. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  215. synth_ai/core/tracing_v3/turso/models.py +470 -0
  216. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  217. synth_ai/core/tracing_v3/utils.py +108 -0
  218. synth_ai/core/urls.py +18 -0
  219. synth_ai/core/user_config.py +137 -0
  220. synth_ai/core/uvicorn.py +222 -0
  221. synth_ai/data/__init__.py +110 -0
  222. synth_ai/data/enums.py +141 -0
  223. synth_ai/data/rewards.py +152 -0
  224. synth_ai/data/specs.py +36 -0
  225. synth_ai/data/traces.py +35 -0
  226. synth_ai/products/__init__.py +6 -0
  227. synth_ai/products/graph_evolve/__init__.py +46 -0
  228. synth_ai/products/graph_evolve/client.py +226 -0
  229. synth_ai/products/graph_evolve/config.py +591 -0
  230. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  231. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  232. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  233. synth_ai/products/graph_evolve/run.py +222 -0
  234. synth_ai/sdk/__init__.py +119 -0
  235. synth_ai/sdk/api/__init__.py +1 -0
  236. synth_ai/sdk/api/models/supported.py +514 -0
  237. synth_ai/sdk/api/research_agent/__init__.py +86 -0
  238. synth_ai/sdk/api/research_agent/cli.py +428 -0
  239. synth_ai/sdk/api/research_agent/config.py +357 -0
  240. synth_ai/sdk/api/research_agent/job.py +717 -0
  241. synth_ai/sdk/api/train/__init__.py +85 -0
  242. synth_ai/sdk/api/train/builders.py +895 -0
  243. synth_ai/sdk/api/train/cli.py +2188 -0
  244. synth_ai/sdk/api/train/config_finder.py +267 -0
  245. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  246. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  247. synth_ai/sdk/api/train/configs/rl.py +188 -0
  248. synth_ai/sdk/api/train/configs/sft.py +99 -0
  249. synth_ai/sdk/api/train/configs/shared.py +81 -0
  250. synth_ai/sdk/api/train/context_learning.py +312 -0
  251. synth_ai/sdk/api/train/env_resolver.py +418 -0
  252. synth_ai/sdk/api/train/graph_validators.py +216 -0
  253. synth_ai/sdk/api/train/graphgen.py +984 -0
  254. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  255. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  256. synth_ai/sdk/api/train/pollers.py +124 -0
  257. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  258. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  259. synth_ai/sdk/api/train/progress/events.py +326 -0
  260. synth_ai/sdk/api/train/progress/results.py +428 -0
  261. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  262. synth_ai/sdk/api/train/prompt_learning.py +470 -0
  263. synth_ai/sdk/api/train/rl.py +442 -0
  264. synth_ai/sdk/api/train/sft.py +396 -0
  265. synth_ai/sdk/api/train/summary.py +522 -0
  266. synth_ai/sdk/api/train/supported_algos.py +147 -0
  267. synth_ai/sdk/api/train/task_app.py +331 -0
  268. synth_ai/sdk/api/train/utils.py +279 -0
  269. synth_ai/sdk/api/train/validators.py +2424 -0
  270. synth_ai/sdk/baseline/__init__.py +25 -0
  271. synth_ai/sdk/baseline/config.py +209 -0
  272. synth_ai/sdk/baseline/discovery.py +216 -0
  273. synth_ai/sdk/baseline/execution.py +154 -0
  274. synth_ai/sdk/graphs/__init__.py +15 -0
  275. synth_ai/sdk/graphs/completions.py +570 -0
  276. synth_ai/sdk/inference/__init__.py +6 -0
  277. synth_ai/sdk/inference/client.py +128 -0
  278. synth_ai/sdk/jobs/__init__.py +16 -0
  279. synth_ai/sdk/jobs/client.py +371 -0
  280. synth_ai/sdk/judging/__init__.py +15 -0
  281. synth_ai/sdk/judging/base.py +24 -0
  282. synth_ai/sdk/judging/client.py +191 -0
  283. synth_ai/sdk/judging/schemas.py +222 -0
  284. synth_ai/sdk/judging/types.py +42 -0
  285. synth_ai/sdk/learning/__init__.py +69 -0
  286. synth_ai/sdk/learning/client.py +240 -0
  287. synth_ai/sdk/learning/ft_client.py +7 -0
  288. synth_ai/sdk/learning/health.py +49 -0
  289. synth_ai/sdk/learning/jobs.py +202 -0
  290. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  291. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  292. synth_ai/sdk/learning/prompt_learning_types.py +185 -0
  293. synth_ai/sdk/learning/rl/client.py +268 -0
  294. synth_ai/sdk/learning/rl/contracts.py +27 -0
  295. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  296. synth_ai/sdk/learning/rl/secrets.py +13 -0
  297. synth_ai/sdk/learning/sft/client.py +95 -0
  298. synth_ai/sdk/learning/sft/config.py +270 -0
  299. synth_ai/sdk/learning/sft/data.py +698 -0
  300. synth_ai/sdk/learning/validators.py +52 -0
  301. synth_ai/sdk/research_agent/__init__.py +34 -0
  302. synth_ai/sdk/research_agent/container_builder.py +328 -0
  303. synth_ai/sdk/research_agent/container_spec.py +198 -0
  304. synth_ai/sdk/research_agent/defaults.py +34 -0
  305. synth_ai/sdk/research_agent/results_collector.py +69 -0
  306. synth_ai/sdk/specs/__init__.py +46 -0
  307. synth_ai/sdk/specs/dataclasses.py +149 -0
  308. synth_ai/sdk/specs/loader.py +144 -0
  309. synth_ai/sdk/specs/serializer.py +199 -0
  310. synth_ai/sdk/specs/validation.py +250 -0
  311. synth_ai/sdk/streaming/__init__.py +35 -0
  312. synth_ai/sdk/streaming/config.py +94 -0
  313. synth_ai/sdk/streaming/handlers.py +1997 -0
  314. synth_ai/sdk/streaming/streamer.py +704 -0
  315. synth_ai/sdk/streaming/types.py +112 -0
  316. synth_ai/sdk/task/__init__.py +151 -0
  317. synth_ai/sdk/task/apps/__init__.py +133 -0
  318. synth_ai/sdk/task/config.py +261 -0
  319. synth_ai/sdk/task/contracts.py +298 -0
  320. synth_ai/sdk/task/datasets.py +108 -0
  321. synth_ai/sdk/task/in_process.py +1190 -0
  322. synth_ai/sdk/task/in_process_runner.py +309 -0
  323. synth_ai/sdk/task/inference_api.py +299 -0
  324. synth_ai/sdk/task/proxy.py +287 -0
  325. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  326. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  327. synth_ai/sdk/task/rubrics/models.py +57 -0
  328. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  329. synth_ai/sdk/task/rubrics/strict.py +149 -0
  330. synth_ai/sdk/task/server.py +580 -0
  331. synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
  332. synth_ai/sdk/task/tracing_utils.py +95 -0
  333. synth_ai/sdk/task/validators.py +456 -0
  334. synth_ai/sdk/tracing/__init__.py +39 -0
  335. synth_ai/sdk/training/__init__.py +102 -0
  336. synth_ai/sdk/usage/__init__.py +37 -0
  337. synth_ai/sdk/usage/client.py +171 -0
  338. synth_ai/sdk/usage/models.py +261 -0
  339. synth_ai/utils/__init__.py +213 -0
  340. synth_ai-0.4.1.dist-info/METADATA +195 -0
  341. synth_ai-0.4.1.dist-info/RECORD +379 -0
  342. synth_ai-0.4.1.dist-info/entry_points.txt +2 -0
  343. synth_ai-0.4.1.dist-info/top_level.txt +1 -0
  344. examples/__init__.py +0 -16
  345. examples/analyze_semantic_words.sh +0 -17
  346. examples/crafter_debug_render.py +0 -186
  347. examples/qwen_coder/README.md +0 -102
  348. examples/qwen_coder/_shared.py +0 -113
  349. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  350. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  351. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  352. examples/qwen_coder/generate_dataset.py +0 -98
  353. examples/qwen_coder/infer_ft_smoke.py +0 -64
  354. examples/qwen_coder/infer_prod_proxy.py +0 -73
  355. examples/qwen_coder/infer_via_synth.py +0 -87
  356. examples/qwen_coder/scripts/infer_coder.sh +0 -18
  357. examples/qwen_coder/scripts/train_coder_30b.sh +0 -21
  358. examples/qwen_coder/sft_full_17b.py +0 -103
  359. examples/qwen_coder/sft_lora_30b.py +0 -110
  360. examples/qwen_coder/subset_jsonl.py +0 -38
  361. examples/qwen_coder/validate_jsonl.py +0 -59
  362. examples/rl/README.md +0 -169
  363. examples/rl/configs/eval_base_qwen.toml +0 -15
  364. examples/rl/configs/eval_rl_qwen.toml +0 -11
  365. examples/rl/configs/rl_from_base_qwen.toml +0 -35
  366. examples/rl/configs/rl_from_base_qwen17.toml +0 -74
  367. examples/rl/configs/rl_from_ft_qwen.toml +0 -35
  368. examples/rl/download_dataset.py +0 -80
  369. examples/rl/run_eval.py +0 -436
  370. examples/rl/run_rl_and_save.py +0 -111
  371. examples/rl/task_app/README.md +0 -22
  372. examples/rl/task_app/math_single_step.py +0 -991
  373. examples/rl/task_app/math_task_app.py +0 -115
  374. examples/run_crafter_demo.sh +0 -10
  375. examples/sft/README.md +0 -139
  376. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  377. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  378. examples/sft/evaluate.py +0 -117
  379. examples/sft/export_dataset.py +0 -117
  380. examples/sft/generate_traces.py +0 -162
  381. examples/swe/__init__.py +0 -12
  382. examples/swe/task_app/README.md +0 -105
  383. examples/swe/task_app/__init__.py +0 -2
  384. examples/swe/task_app/grpo_swe_mini.py +0 -571
  385. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  386. examples/swe/task_app/hosted/README.md +0 -173
  387. examples/swe/task_app/hosted/__init__.py +0 -5
  388. examples/swe/task_app/hosted/branching.py +0 -143
  389. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  390. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  391. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  392. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  393. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  394. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  395. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  396. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  397. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  398. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  399. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  400. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  401. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  402. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  403. examples/swe/task_app/hosted/hosted_app.py +0 -204
  404. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  405. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  406. examples/swe/task_app/hosted/main.py +0 -100
  407. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  408. examples/swe/task_app/hosted/registry.py +0 -195
  409. examples/swe/task_app/hosted/rollout.py +0 -1869
  410. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  411. examples/swe/task_app/hosted/storage/volume.py +0 -211
  412. examples/swe/task_app/hosted/test_agents.py +0 -161
  413. examples/swe/task_app/hosted/test_service.py +0 -137
  414. examples/swe/task_app/hosted/utils.py +0 -62
  415. examples/vlm/README.md +0 -68
  416. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  417. examples/vlm/crafter_image_only_agent.py +0 -207
  418. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  419. examples/vlm/filter_image_rows.py +0 -63
  420. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  421. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  422. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  423. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  424. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  425. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  426. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  427. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  428. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  429. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  430. examples/warming_up_to_rl/groq_test.py +0 -95
  431. examples/warming_up_to_rl/manage_secrets.py +0 -131
  432. examples/warming_up_to_rl/readme.md +0 -179
  433. examples/warming_up_to_rl/run_eval.py +0 -510
  434. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  435. examples/warming_up_to_rl/run_local_rollout.py +0 -237
  436. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -246
  437. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -403
  438. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -475
  439. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  440. examples/warming_up_to_rl/run_rollout_remote.py +0 -154
  441. examples/warming_up_to_rl/task_app/README.md +0 -42
  442. examples/warming_up_to_rl/task_app/grpo_crafter.py +0 -700
  443. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +0 -146
  444. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +0 -173
  445. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +0 -5
  446. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +0 -143
  447. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  448. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  449. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  450. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  451. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -522
  452. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -478
  453. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -108
  454. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  455. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  456. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +0 -204
  457. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  458. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +0 -618
  459. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -100
  460. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +0 -1083
  461. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +0 -195
  462. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +0 -1869
  463. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  464. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +0 -211
  465. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +0 -161
  466. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +0 -137
  467. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  468. synth/__init__.py +0 -14
  469. synth_ai/api/models/supported.py +0 -376
  470. synth_ai/api/train/__init__.py +0 -5
  471. synth_ai/api/train/builders.py +0 -296
  472. synth_ai/api/train/cli.py +0 -606
  473. synth_ai/api/train/config_finder.py +0 -228
  474. synth_ai/api/train/env_resolver.py +0 -347
  475. synth_ai/api/train/pollers.py +0 -75
  476. synth_ai/api/train/supported_algos.py +0 -139
  477. synth_ai/api/train/task_app.py +0 -195
  478. synth_ai/api/train/utils.py +0 -217
  479. synth_ai/cli/_modal_wrapper.py +0 -28
  480. synth_ai/cli/_typer_patch.py +0 -49
  481. synth_ai/cli/balance.py +0 -203
  482. synth_ai/cli/calc.py +0 -69
  483. synth_ai/cli/demo.py +0 -159
  484. synth_ai/cli/legacy_root_backup.py +0 -470
  485. synth_ai/cli/man.py +0 -106
  486. synth_ai/cli/recent.py +0 -127
  487. synth_ai/cli/rl_demo.py +0 -274
  488. synth_ai/cli/status.py +0 -133
  489. synth_ai/cli/task_apps.py +0 -2782
  490. synth_ai/cli/traces.py +0 -163
  491. synth_ai/cli/watch.py +0 -505
  492. synth_ai/config/base_url.py +0 -107
  493. synth_ai/core/experiment.py +0 -13
  494. synth_ai/core/system.py +0 -15
  495. synth_ai/demo_registry.py +0 -295
  496. synth_ai/demos/core/__init__.py +0 -1
  497. synth_ai/demos/core/cli.py +0 -1756
  498. synth_ai/demos/demo_task_apps/core.py +0 -440
  499. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -172
  500. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  501. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  502. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  503. synth_ai/environments/__init__.py +0 -31
  504. synth_ai/environments/environment/__init__.py +0 -1
  505. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  506. synth_ai/environments/environment/artifacts/base.py +0 -52
  507. synth_ai/environments/environment/core.py +0 -67
  508. synth_ai/environments/environment/db/__init__.py +0 -1
  509. synth_ai/environments/environment/db/sqlite.py +0 -45
  510. synth_ai/environments/environment/registry.py +0 -233
  511. synth_ai/environments/environment/resources/sqlite.py +0 -45
  512. synth_ai/environments/environment/results.py +0 -1
  513. synth_ai/environments/environment/rewards/__init__.py +0 -1
  514. synth_ai/environments/environment/rewards/core.py +0 -29
  515. synth_ai/environments/environment/shared_engine.py +0 -26
  516. synth_ai/environments/environment/tools/__init__.py +0 -200
  517. synth_ai/environments/examples/__init__.py +0 -1
  518. synth_ai/environments/examples/bandit/__init__.py +0 -33
  519. synth_ai/environments/examples/bandit/engine.py +0 -302
  520. synth_ai/environments/examples/bandit/environment.py +0 -194
  521. synth_ai/environments/examples/bandit/taskset.py +0 -200
  522. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  523. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  524. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  525. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  526. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  527. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  528. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  529. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  530. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  531. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  532. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  533. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  534. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  535. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  536. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  537. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  538. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  539. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  540. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  541. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  542. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  543. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  544. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  545. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  546. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  547. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  548. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  549. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  550. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  551. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  552. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  553. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  554. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  555. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  556. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  557. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  558. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  559. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  560. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  561. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  562. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  563. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  564. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  565. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  566. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  567. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  568. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  569. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  570. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  571. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  572. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  573. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  574. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  575. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  576. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  577. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  578. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  579. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  580. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  581. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  582. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  583. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  584. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  585. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  586. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  587. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  588. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  589. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  590. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  591. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  592. synth_ai/environments/examples/crafter_classic/environment.py +0 -479
  593. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  594. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  595. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  596. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  597. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  598. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  599. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  600. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  601. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  602. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  603. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  604. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  605. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  606. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  607. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  608. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  609. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  610. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  611. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  612. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  613. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  614. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  615. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  616. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  617. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  618. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  619. synth_ai/environments/examples/enron/engine.py +0 -295
  620. synth_ai/environments/examples/enron/environment.py +0 -166
  621. synth_ai/environments/examples/enron/taskset.py +0 -112
  622. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  623. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  624. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  625. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  626. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  627. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  628. synth_ai/environments/examples/minigrid/engine.py +0 -589
  629. synth_ai/environments/examples/minigrid/environment.py +0 -274
  630. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  631. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  632. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  633. synth_ai/environments/examples/nethack/__init__.py +0 -7
  634. synth_ai/environments/examples/nethack/achievements.py +0 -337
  635. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  636. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  637. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  638. synth_ai/environments/examples/nethack/engine.py +0 -739
  639. synth_ai/environments/examples/nethack/environment.py +0 -256
  640. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  641. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  642. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  643. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  644. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  645. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  646. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  647. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  648. synth_ai/environments/examples/nethack/taskset.py +0 -323
  649. synth_ai/environments/examples/red/__init__.py +0 -7
  650. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  651. synth_ai/environments/examples/red/config_logging.py +0 -110
  652. synth_ai/environments/examples/red/engine.py +0 -694
  653. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  654. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  655. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  656. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  657. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  658. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  659. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  660. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  661. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  662. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  663. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  664. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  665. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  666. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  667. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  668. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  669. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  670. synth_ai/environments/examples/red/environment.py +0 -238
  671. synth_ai/environments/examples/red/taskset.py +0 -79
  672. synth_ai/environments/examples/red/units/__init__.py +0 -1
  673. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  674. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  675. synth_ai/environments/examples/sokoban/engine.py +0 -678
  676. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  677. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  678. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  679. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  680. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  681. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  682. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  683. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  684. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  685. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  686. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  687. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  688. synth_ai/environments/examples/sokoban/environment.py +0 -229
  689. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  690. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  691. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  692. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  693. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  694. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  695. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  696. synth_ai/environments/examples/verilog/__init__.py +0 -10
  697. synth_ai/environments/examples/verilog/engine.py +0 -329
  698. synth_ai/environments/examples/verilog/environment.py +0 -350
  699. synth_ai/environments/examples/verilog/taskset.py +0 -420
  700. synth_ai/environments/examples/wordle/__init__.py +0 -29
  701. synth_ai/environments/examples/wordle/engine.py +0 -398
  702. synth_ai/environments/examples/wordle/environment.py +0 -159
  703. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  704. synth_ai/environments/examples/wordle/taskset.py +0 -230
  705. synth_ai/environments/reproducibility/core.py +0 -42
  706. synth_ai/environments/reproducibility/helpers.py +0 -0
  707. synth_ai/environments/reproducibility/tree.py +0 -363
  708. synth_ai/environments/service/app.py +0 -97
  709. synth_ai/environments/service/core_routes.py +0 -1021
  710. synth_ai/environments/service/external_registry.py +0 -56
  711. synth_ai/environments/service/registry.py +0 -9
  712. synth_ai/environments/stateful/__init__.py +0 -1
  713. synth_ai/environments/stateful/core.py +0 -163
  714. synth_ai/environments/stateful/engine.py +0 -21
  715. synth_ai/environments/stateful/state.py +0 -7
  716. synth_ai/environments/tasks/api.py +0 -19
  717. synth_ai/environments/tasks/core.py +0 -81
  718. synth_ai/environments/tasks/filters.py +0 -40
  719. synth_ai/environments/tasks/utils.py +0 -90
  720. synth_ai/environments/v0_observability/history.py +0 -3
  721. synth_ai/environments/v0_observability/log.py +0 -2
  722. synth_ai/evals/base.py +0 -13
  723. synth_ai/handshake.py +0 -109
  724. synth_ai/http.py +0 -26
  725. synth_ai/http_client.py +0 -136
  726. synth_ai/inference/__init__.py +0 -5
  727. synth_ai/inference/client.py +0 -34
  728. synth_ai/jobs/client.py +0 -271
  729. synth_ai/learning/__init__.py +0 -59
  730. synth_ai/learning/client.py +0 -241
  731. synth_ai/learning/ft_client.py +0 -7
  732. synth_ai/learning/health.py +0 -49
  733. synth_ai/learning/jobs.py +0 -201
  734. synth_ai/learning/rl/client.py +0 -267
  735. synth_ai/learning/rl/contracts.py +0 -27
  736. synth_ai/learning/rl/env_keys.py +0 -166
  737. synth_ai/learning/rl/secrets.py +0 -13
  738. synth_ai/learning/sft/client.py +0 -68
  739. synth_ai/learning/sft/config.py +0 -270
  740. synth_ai/learning/sft/data.py +0 -295
  741. synth_ai/learning/validators.py +0 -49
  742. synth_ai/lm/__init__.py +0 -25
  743. synth_ai/main.py +0 -6
  744. synth_ai/task/__init__.py +0 -102
  745. synth_ai/task/apps/__init__.py +0 -128
  746. synth_ai/task/contracts.py +0 -137
  747. synth_ai/task/datasets.py +0 -108
  748. synth_ai/task/proxy.py +0 -259
  749. synth_ai/task/server.py +0 -424
  750. synth_ai/task/tracing_utils.py +0 -84
  751. synth_ai/task/validators.py +0 -11
  752. synth_ai/tracing_v3/__init__.py +0 -97
  753. synth_ai/tracing_v3/config.py +0 -84
  754. synth_ai/tracing_v3/db_config.py +0 -194
  755. synth_ai/tracing_v3/decorators.py +0 -369
  756. synth_ai/tracing_v3/examples/basic_usage.py +0 -189
  757. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -337
  758. synth_ai/tracing_v3/migration_helper.py +0 -120
  759. synth_ai/tracing_v3/replica_sync.py +0 -258
  760. synth_ai/tracing_v3/session_tracer.py +0 -530
  761. synth_ai/tracing_v3/storage/base.py +0 -210
  762. synth_ai/tracing_v3/storage/config.py +0 -75
  763. synth_ai/tracing_v3/storage/factory.py +0 -39
  764. synth_ai/tracing_v3/storage/utils.py +0 -204
  765. synth_ai/tracing_v3/turso/daemon.py +0 -149
  766. synth_ai/tracing_v3/turso/models.py +0 -469
  767. synth_ai/tracing_v3/turso/native_manager.py +0 -1173
  768. synth_ai/tracing_v3/utils.py +0 -108
  769. synth_ai/v0/api/__init__.py +0 -8
  770. synth_ai/v0/api/models/__init__.py +0 -8
  771. synth_ai/v0/api/models/supported.py +0 -8
  772. synth_ai/v0/config/__init__.py +0 -15
  773. synth_ai/v0/config/base_url.py +0 -12
  774. synth_ai/v0/lm/__init__.py +0 -51
  775. synth_ai/v0/lm/caching/constants.py +0 -6
  776. synth_ai/v0/lm/caching/dbs.py +0 -0
  777. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  778. synth_ai/v0/lm/caching/handler.py +0 -137
  779. synth_ai/v0/lm/caching/initialize.py +0 -11
  780. synth_ai/v0/lm/caching/persistent.py +0 -114
  781. synth_ai/v0/lm/config.py +0 -115
  782. synth_ai/v0/lm/constants.py +0 -32
  783. synth_ai/v0/lm/core/__init__.py +0 -8
  784. synth_ai/v0/lm/core/all.py +0 -73
  785. synth_ai/v0/lm/core/exceptions.py +0 -5
  786. synth_ai/v0/lm/core/main.py +0 -331
  787. synth_ai/v0/lm/core/main_v3.py +0 -594
  788. synth_ai/v0/lm/core/synth_models.py +0 -35
  789. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  790. synth_ai/v0/lm/cost/__init__.py +0 -0
  791. synth_ai/v0/lm/cost/monitor.py +0 -1
  792. synth_ai/v0/lm/cost/statefulness.py +0 -1
  793. synth_ai/v0/lm/injection.py +0 -80
  794. synth_ai/v0/lm/overrides.py +0 -206
  795. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  796. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  797. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  798. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  799. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  800. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  801. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  802. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  803. synth_ai/v0/lm/tools/__init__.py +0 -3
  804. synth_ai/v0/lm/tools/base.py +0 -172
  805. synth_ai/v0/lm/unified_interface.py +0 -202
  806. synth_ai/v0/lm/vendors/__init__.py +0 -0
  807. synth_ai/v0/lm/vendors/base.py +0 -81
  808. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  809. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  810. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  811. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  812. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  813. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  814. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  815. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  816. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  817. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  818. synth_ai/v0/lm/vendors/retries.py +0 -22
  819. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  820. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  821. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  822. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  823. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  824. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  825. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  826. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  827. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  828. synth_ai/v0/lm/warmup.py +0 -186
  829. synth_ai/v0/tracing/__init__.py +0 -0
  830. synth_ai/v0/tracing/abstractions.py +0 -224
  831. synth_ai/v0/tracing/base_client.py +0 -91
  832. synth_ai/v0/tracing/client_manager.py +0 -131
  833. synth_ai/v0/tracing/config.py +0 -142
  834. synth_ai/v0/tracing/context.py +0 -146
  835. synth_ai/v0/tracing/decorators.py +0 -682
  836. synth_ai/v0/tracing/events/__init__.py +0 -0
  837. synth_ai/v0/tracing/events/manage.py +0 -147
  838. synth_ai/v0/tracing/events/scope.py +0 -86
  839. synth_ai/v0/tracing/events/store.py +0 -228
  840. synth_ai/v0/tracing/immediate_client.py +0 -151
  841. synth_ai/v0/tracing/local.py +0 -18
  842. synth_ai/v0/tracing/log_client_base.py +0 -73
  843. synth_ai/v0/tracing/retry_queue.py +0 -186
  844. synth_ai/v0/tracing/trackers.py +0 -515
  845. synth_ai/v0/tracing/upload.py +0 -409
  846. synth_ai/v0/tracing/utils.py +0 -9
  847. synth_ai/v0/tracing_v1/__init__.py +0 -16
  848. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  849. synth_ai/v0/tracing_v1/base_client.py +0 -91
  850. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  851. synth_ai/v0/tracing_v1/config.py +0 -142
  852. synth_ai/v0/tracing_v1/context.py +0 -146
  853. synth_ai/v0/tracing_v1/decorators.py +0 -703
  854. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  855. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  856. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  857. synth_ai/v0/tracing_v1/events/store.py +0 -228
  858. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  859. synth_ai/v0/tracing_v1/local.py +0 -18
  860. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  861. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  862. synth_ai/v0/tracing_v1/trackers.py +0 -515
  863. synth_ai/v0/tracing_v1/upload.py +0 -527
  864. synth_ai/v0/tracing_v1/utils.py +0 -9
  865. synth_ai/v0/tracing_v3/__init__.py +0 -10
  866. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  867. synth_ai/v0/tracing_v3/decorators.py +0 -3
  868. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  869. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  870. synth_ai-0.2.9.dev11.dist-info/METADATA +0 -191
  871. synth_ai-0.2.9.dev11.dist-info/RECORD +0 -571
  872. synth_ai-0.2.9.dev11.dist-info/entry_points.txt +0 -3
  873. synth_ai-0.2.9.dev11.dist-info/top_level.txt +0 -3
  874. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  875. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  876. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  877. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  878. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  879. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  880. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  881. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
  882. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  883. /synth_ai/{v0/lm/caching → core/apps}/__init__.py +0 -0
  884. /synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +0 -0
  885. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  886. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  887. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  888. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  889. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  890. /synth_ai/{compound/cais.py → py.typed} +0 -0
  891. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  892. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  893. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  894. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  895. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  896. /synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
  897. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  898. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  899. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  900. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  901. /synth_ai/{task → sdk/task}/auth.py +0 -0
  902. /synth_ai/{task → sdk/task}/client.py +0 -0
  903. /synth_ai/{task → sdk/task}/errors.py +0 -0
  904. /synth_ai/{task → sdk/task}/health.py +0 -0
  905. /synth_ai/{task → sdk/task}/json.py +0 -0
  906. /synth_ai/{task → sdk/task}/rubrics.py +0 -0
  907. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  908. {synth_ai-0.2.9.dev11.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
  909. {synth_ai-0.2.9.dev11.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1997 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import json
5
+ import re
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from collections import deque
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+
13
+ import click
14
+
15
+ from .types import StreamMessage, StreamType
16
+
17
+
18
+ def _mask_sensitive_urls(text: str) -> str:
19
+ """Mask S3/Wasabi URLs and sensitive paths in log messages.
20
+
21
+ Replaces full S3/Wasabi URLs with masked versions to prevent leaking
22
+ bucket names, paths, and infrastructure details in public SDK logs.
23
+
24
+ Examples:
25
+ s3://synth-artifacts/models/... -> s3://***/***/[masked]
26
+ Wasabi s3://bucket/path/file.tar.gz -> Wasabi s3://***/***/[masked]
27
+ """
28
+ if not text:
29
+ return text
30
+
31
+ # Pattern matches:
32
+ # - Optional "Wasabi " prefix
33
+ # - s3:// or http(s):// scheme
34
+ # - Any bucket/host
35
+ # - Any path
36
+ # - Common model file extensions
37
+ pattern = r'(Wasabi\s+)?((s3|https?)://[^\s]+\.(tar\.gz|zip|pt|pth|safetensors|ckpt|bin))'
38
+
39
+ def replace_url(match: re.Match) -> str:
40
+ prefix = match.group(1) or "" # "Wasabi " or empty
41
+ url = match.group(2)
42
+ # Extract just the filename
43
+ filename = url.split("/")[-1] if "/" in url else "file"
44
+ return f'{prefix}s3://***/***/[{filename}]'
45
+
46
+ return re.sub(pattern, replace_url, text, flags=re.IGNORECASE)
47
+
48
+
49
+ class StreamHandler(ABC):
50
+ """Base class for log handlers that consume ``StreamMessage`` objects."""
51
+
52
+ @abstractmethod
53
+ def handle(self, message: StreamMessage) -> None:
54
+ """Process a message produced by the streamer."""
55
+
56
+ def should_handle(self, message: StreamMessage) -> bool: # pragma: no cover - trivial
57
+ """Predicate allowing handlers to filter messages before processing."""
58
+ return True
59
+
60
+ def flush(self) -> None: # pragma: no cover - optional
61
+ """Flush buffered output."""
62
+ return None
63
+
64
+
65
+ class CLIHandler(StreamHandler):
66
+ """Simple CLI output mirroring current poller behaviour."""
67
+
68
+ def __init__(
69
+ self,
70
+ *,
71
+ hidden_event_types: set[str] | None = None,
72
+ hidden_event_substrings: set[str] | None = None,
73
+ ) -> None:
74
+ self._hidden_event_types = set(hidden_event_types or set())
75
+ self._hidden_event_substrings = {s.lower() for s in (hidden_event_substrings or set())}
76
+
77
+ def handle(self, message: StreamMessage) -> None:
78
+ if not self.should_handle(message):
79
+ return
80
+
81
+ timestamp = datetime.now().strftime("%H:%M:%S")
82
+ if message.stream_type is StreamType.STATUS:
83
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
84
+ click.echo(f"[{timestamp}] status={status}")
85
+ return
86
+
87
+ if message.stream_type is StreamType.EVENTS:
88
+ event_type = message.data.get("type", "event")
89
+ if event_type in self._hidden_event_types:
90
+ return
91
+ level = message.data.get("level")
92
+ msg = message.data.get("message") or ""
93
+ # Evaluate substring filters against lower-cased concatenated text
94
+ if self._hidden_event_substrings:
95
+ blob = " ".join(
96
+ [
97
+ event_type or "",
98
+ str(msg),
99
+ json.dumps(message.data.get("data", "")),
100
+ ]
101
+ ).lower()
102
+ if any(sub in blob for sub in self._hidden_event_substrings):
103
+ return
104
+ prefix = f"[{timestamp}] [{message.seq}] {event_type}"
105
+ if level:
106
+ prefix += f" ({level})"
107
+ # Mask sensitive URLs before displaying
108
+ sanitized_msg = _mask_sensitive_urls(msg)
109
+
110
+ # For error events, show full details including underlying errors
111
+ if level == "error" or event_type.endswith(".failed"):
112
+ click.echo(f"{prefix}: {sanitized_msg}")
113
+ # Show error details from data field if available
114
+ data = message.data.get("data", {})
115
+ if isinstance(data, dict):
116
+ error_detail = data.get("detail") or data.get("error") or data.get("error_detail")
117
+ if error_detail and str(error_detail) != sanitized_msg:
118
+ # Show underlying error if different from main message
119
+ click.echo(f" Error details: {error_detail}")
120
+ # Show traceback or stack if available
121
+ traceback_info = data.get("traceback") or data.get("stack")
122
+ if traceback_info:
123
+ lines = str(traceback_info).split("\n")
124
+ # Show last few lines of traceback (most relevant)
125
+ for line in lines[-5:]:
126
+ if line.strip():
127
+ click.echo(f" {line}")
128
+ else:
129
+ click.echo(f"{prefix}: {sanitized_msg}".rstrip(": "))
130
+
131
+ data = message.data.get("data") if isinstance(message.data.get("data"), dict) else {}
132
+ if event_type == "prompt.learning.mipro.complete" and data:
133
+ best_prompt = data.get("best_prompt")
134
+ if isinstance(best_prompt, dict):
135
+ sections = best_prompt.get("sections")
136
+ if isinstance(sections, list) and sections:
137
+ click.echo(" --- BEST PROMPT ---")
138
+ for section in sections:
139
+ if not isinstance(section, dict):
140
+ continue
141
+ role = section.get("role", "unknown").upper()
142
+ name = section.get("name")
143
+ header = f" [{role}]"
144
+ if name:
145
+ header += f" {name}"
146
+ click.echo(header)
147
+ content = section.get("content", "")
148
+ if isinstance(content, str) and content:
149
+ click.echo(f" {content}")
150
+ click.echo(" -------------------")
151
+
152
+ if event_type == "mipro.topk.evaluated" and data:
153
+ rank = data.get("rank")
154
+ train_score = data.get("train_score")
155
+ test_score = data.get("test_score")
156
+ instruction_text = data.get("instruction_text", "")
157
+ demo_indices = data.get("demo_indices", [])
158
+ lift_abs = data.get("lift_absolute")
159
+ lift_pct = data.get("lift_percent")
160
+ stage_payloads = data.get("stage_payloads", {})
161
+ details: list[str] = []
162
+ if rank is not None:
163
+ details.append(f"Rank {rank}")
164
+ if isinstance(train_score, int | float):
165
+ train_score_float = float(train_score)
166
+ details.append(f"train={train_score_float:.3f} ({train_score_float*100:.1f}%)")
167
+ if isinstance(test_score, int | float):
168
+ test_score_float = float(test_score)
169
+ details.append(f"test={test_score_float:.3f} ({test_score_float*100:.1f}%)")
170
+ if isinstance(lift_abs, int | float) and isinstance(lift_pct, int | float):
171
+ details.append(f"lift={lift_abs:+.3f} ({lift_pct:+.1f}%)")
172
+ if details:
173
+ click.echo(" --- TOP-K CANDIDATE ---")
174
+ click.echo(f" {' | '.join(details)}")
175
+ if isinstance(instruction_text, str) and instruction_text.strip():
176
+ snippet = instruction_text.strip()
177
+ click.echo(f" Instruction: {snippet}")
178
+ if isinstance(demo_indices, list) and demo_indices:
179
+ click.echo(f" Demo indices: {demo_indices}")
180
+
181
+ # Display per-stage information if available
182
+ if isinstance(stage_payloads, dict) and stage_payloads:
183
+ click.echo(" Per-stage breakdown:")
184
+ for stage_id, payload in stage_payloads.items():
185
+ if isinstance(payload, dict):
186
+ module_id = payload.get("module_id", stage_id)
187
+ instr_ids = payload.get("instruction_indices", [])
188
+ demo_ids = payload.get("demo_indices", [])
189
+ click.echo(f" [{module_id}/{stage_id}] instr_ids={instr_ids} demo_ids={demo_ids}")
190
+
191
+ seed_scores = data.get("test_seed_scores")
192
+ if isinstance(seed_scores, list) and seed_scores:
193
+ formatted_scores = ", ".join(
194
+ f"{item.get('seed')}: {item.get('score'):.2f}"
195
+ for item in seed_scores
196
+ if isinstance(item, dict) and isinstance(item.get("seed"), int) and isinstance(item.get("score"), int | float)
197
+ )
198
+ if formatted_scores:
199
+ click.echo(f" Test per-seed: {formatted_scores}")
200
+ click.echo(" ----------------------")
201
+ return
202
+
203
+ if message.stream_type is StreamType.METRICS:
204
+ name = message.data.get("name")
205
+ value = message.data.get("value")
206
+ step = message.data.get("step")
207
+ data = message.data.get("data", {})
208
+
209
+ # Format metric display
210
+ metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
211
+ if step is not None:
212
+ metric_str += f" (step={step})"
213
+
214
+ # Add any additional context from data field
215
+ if isinstance(data, dict):
216
+ n = data.get("n")
217
+ if n is not None:
218
+ metric_str += f" n={n}"
219
+
220
+ click.echo(metric_str)
221
+ return
222
+
223
+ if message.stream_type is StreamType.TIMELINE:
224
+ phase = message.data.get("phase", "phase")
225
+ click.echo(f"[{timestamp}] timeline={phase}")
226
+
227
+
228
+ class JSONHandler(StreamHandler):
229
+ """Emit messages as JSON lines suitable for machine parsing."""
230
+
231
+ def __init__(self, output_file: str | None = None, *, indent: int | None = None) -> None:
232
+ self.output_file = Path(output_file).expanduser() if output_file else None
233
+ self._indent = indent
234
+
235
+ def handle(self, message: StreamMessage) -> None:
236
+ if not self.should_handle(message):
237
+ return
238
+
239
+ payload: dict[str, Any] = {
240
+ "stream_type": message.stream_type.name,
241
+ "timestamp": message.timestamp,
242
+ "job_id": message.job_id,
243
+ "data": message.data,
244
+ }
245
+ if message.seq is not None:
246
+ payload["seq"] = message.seq
247
+ if message.step is not None:
248
+ payload["step"] = message.step
249
+ if message.phase is not None:
250
+ payload["phase"] = message.phase
251
+
252
+ line = json.dumps(payload, indent=self._indent)
253
+ if self.output_file:
254
+ with self.output_file.open("a", encoding="utf-8") as fh:
255
+ fh.write(line)
256
+ if self._indent is None:
257
+ fh.write("\n")
258
+ else:
259
+ click.echo(line)
260
+
261
+ def flush(self) -> None:
262
+ return None
263
+
264
+
265
+ class CallbackHandler(StreamHandler):
266
+ """Invoke user-provided callbacks for specific stream types."""
267
+
268
+ def __init__(
269
+ self,
270
+ *,
271
+ on_status: Callable[[dict[str, Any]], None] | None = None,
272
+ on_event: Callable[[dict[str, Any]], None] | None = None,
273
+ on_metric: Callable[[dict[str, Any]], None] | None = None,
274
+ on_timeline: Callable[[dict[str, Any]], None] | None = None,
275
+ ) -> None:
276
+ self._on_status = on_status
277
+ self._on_event = on_event
278
+ self._on_metric = on_metric
279
+ self._on_timeline = on_timeline
280
+
281
+ def handle(self, message: StreamMessage) -> None:
282
+ if not self.should_handle(message):
283
+ return
284
+
285
+ if message.stream_type is StreamType.STATUS and self._on_status:
286
+ self._on_status(message.data)
287
+ elif message.stream_type is StreamType.EVENTS and self._on_event:
288
+ self._on_event(message.data)
289
+ elif message.stream_type is StreamType.METRICS and self._on_metric:
290
+ self._on_metric(message.data)
291
+ elif message.stream_type is StreamType.TIMELINE and self._on_timeline:
292
+ self._on_timeline(message.data)
293
+
294
+
295
+ class BufferedHandler(StreamHandler):
296
+ """Collect messages and emit them in batches."""
297
+
298
+ def __init__(self, *, flush_interval: float = 5.0, max_buffer_size: int = 100) -> None:
299
+ self.flush_interval = flush_interval
300
+ self.max_buffer_size = max_buffer_size
301
+ self._buffer: list[StreamMessage] = []
302
+ self._last_flush = time.time()
303
+
304
+ def handle(self, message: StreamMessage) -> None:
305
+ if not self.should_handle(message):
306
+ return
307
+
308
+ self._buffer.append(message)
309
+ now = time.time()
310
+ if len(self._buffer) >= self.max_buffer_size or now - self._last_flush >= self.flush_interval:
311
+ self.flush()
312
+
313
+ def flush(self) -> None:
314
+ if not self._buffer:
315
+ return
316
+ self.process_batch(self._buffer)
317
+ self._buffer.clear()
318
+ self._last_flush = time.time()
319
+
320
+ def process_batch(self, messages: list[StreamMessage]) -> None: # pragma: no cover - abstract
321
+ """Override to define how buffered messages should be processed."""
322
+
323
+
324
+ class IntegrationTestHandler(StreamHandler):
325
+ """Collect messages for integration tests or programmatic assertions."""
326
+
327
+ def __init__(self) -> None:
328
+ self.messages: list[StreamMessage] = []
329
+
330
+ def handle(self, message: StreamMessage) -> None:
331
+ self.messages.append(message)
332
+
333
+ def clear(self) -> None:
334
+ self.messages.clear()
335
+
336
+
337
+ class GraphGenHandler(StreamHandler):
338
+ """Handler for ADAS jobs that delegate child job streams to an underlying handler.
339
+
340
+ ADAS jobs emit events from child jobs (GEPA, MIPRO, RL, SFT, etc.). This handler
341
+ provides light ADAS-aware filtering and routing while keeping child job output
342
+ intact via a delegate handler. The delegate can be supplied directly or created
343
+ via a factory; by default we choose a prompt-learning handler for GEPA/MIPRO and
344
+ a basic CLI handler for other job types.
345
+ """
346
+
347
+ def __init__(
348
+ self,
349
+ *,
350
+ child_handler: StreamHandler | None = None,
351
+ child_handler_factory: Callable[[str | None], StreamHandler | None] | None = None,
352
+ show_trial_results: bool = True,
353
+ show_transformations: bool = False,
354
+ show_validation: bool = True,
355
+ filter_verbose_events: bool = True,
356
+ wrap_child_events: bool = True,
357
+ ) -> None:
358
+ # User-supplied delegate or factory; both are optional.
359
+ self.child_handler = child_handler
360
+ self._child_handler_factory = child_handler_factory
361
+
362
+ # Options for the default prompt-learning delegate
363
+ self._pl_show_trial_results = show_trial_results
364
+ self._pl_show_transformations = show_transformations
365
+ self._pl_show_validation = show_validation
366
+
367
+ self.filter_verbose_events = filter_verbose_events
368
+ # If False, skip ADAS-specific filtering/transformations and just pass through.
369
+ self.wrap_child_events = wrap_child_events
370
+
371
+ # Detected child job type (gepa/mipro/rl/sft/etc.)
372
+ self.child_job_type: str | None = None
373
+ # Track whether we created the delegate automatically (so we can swap if needed)
374
+ self._delegate_auto_created = False
375
+
376
+ def handle(self, message: StreamMessage) -> None:
377
+ if not self.should_handle(message):
378
+ return
379
+
380
+ if message.stream_type is StreamType.EVENTS:
381
+ self._detect_child_job_type(message)
382
+ self._maybe_reset_delegate_for_child_type()
383
+
384
+ if self.wrap_child_events and self.filter_verbose_events:
385
+ if self._should_filter_event(message):
386
+ return
387
+
388
+ if self.wrap_child_events:
389
+ message = self._transform_event_message(message)
390
+
391
+ delegate = self._get_child_handler()
392
+ if delegate:
393
+ delegate.handle(message)
394
+
395
+ def _get_child_handler(self) -> StreamHandler:
396
+ """Return or create the delegate handler used for child job events."""
397
+ if self.child_handler:
398
+ return self.child_handler
399
+
400
+ handler: StreamHandler | None = None
401
+ if self._child_handler_factory:
402
+ handler = self._child_handler_factory(self.child_job_type)
403
+
404
+ if handler is None:
405
+ # Choose a sensible default based on detected child job type
406
+ if self._is_prompt_learning_type(self.child_job_type):
407
+ handler = PromptLearningHandler(
408
+ show_trial_results=self._pl_show_trial_results,
409
+ show_transformations=self._pl_show_transformations,
410
+ show_validation=self._pl_show_validation,
411
+ )
412
+ else:
413
+ handler = CLIHandler()
414
+
415
+ self.child_handler = handler
416
+ self._delegate_auto_created = self._child_handler_factory is None and self.child_handler is not None
417
+ return handler
418
+
419
+ def _detect_child_job_type(self, message: StreamMessage) -> None:
420
+ """Infer the child job type from event types."""
421
+ if self.child_job_type:
422
+ return
423
+
424
+ event_type = str(message.data.get("type") or "").lower()
425
+ if not event_type:
426
+ return
427
+
428
+ if event_type.startswith("graph_evolve."):
429
+ self.child_job_type = "graph_evolve"
430
+ elif "mipro" in event_type:
431
+ self.child_job_type = "mipro"
432
+ elif "gepa" in event_type or event_type.startswith("prompt.learning"):
433
+ self.child_job_type = "prompt_learning"
434
+ elif event_type.startswith("rl.") or ".rl." in event_type:
435
+ self.child_job_type = "rl"
436
+ elif event_type.startswith("sft.") or ".sft." in event_type:
437
+ self.child_job_type = "sft"
438
+ else:
439
+ # Fall back to the first segment as a hint (e.g., "adas.child_type")
440
+ parts = event_type.split(".")
441
+ if parts:
442
+ self.child_job_type = parts[0]
443
+
444
+ def _maybe_reset_delegate_for_child_type(self) -> None:
445
+ """Swap out auto-created delegates when we later detect a different child type."""
446
+ if not self.child_handler or not self._delegate_auto_created:
447
+ return
448
+
449
+ # If the detected type does not match the current delegate choice, rebuild.
450
+ wants_prompt_learning = self._is_prompt_learning_type(self.child_job_type)
451
+ has_prompt_learning_handler = isinstance(self.child_handler, PromptLearningHandler)
452
+
453
+ if wants_prompt_learning and not has_prompt_learning_handler:
454
+ self.child_handler = None
455
+ self._delegate_auto_created = False
456
+ elif not wants_prompt_learning and has_prompt_learning_handler:
457
+ self.child_handler = None
458
+ self._delegate_auto_created = False
459
+
460
+ def _should_filter_event(self, message: StreamMessage) -> bool:
461
+ """Determine if an event should be hidden from output."""
462
+ event_type = message.data.get("type", "") or ""
463
+ event_type_lower = event_type.lower()
464
+
465
+ # Never filter graph_evolve events - they're important for GraphGen jobs
466
+ if event_type.startswith("graph_evolve."):
467
+ return False
468
+
469
+ # Only filter prompt-learning style events; leave other job types untouched.
470
+ if not any(key in event_type_lower for key in ("prompt.learning", "gepa", "mipro")):
471
+ return False
472
+
473
+ important_events = {
474
+ "prompt.learning.created",
475
+ "prompt.learning.gepa.start",
476
+ "prompt.learning.gepa.complete",
477
+ "prompt.learning.mipro.job.started",
478
+ "prompt.learning.mipro.optimization.exhausted",
479
+ "prompt.learning.trial.results",
480
+ "prompt.learning.progress",
481
+ "prompt.learning.gepa.new_best",
482
+ "prompt.learning.validation.summary",
483
+ "prompt.learning.candidate.evaluated",
484
+ "prompt.learning.candidate.evaluation.started",
485
+ # GraphGen/graph_evolve important events
486
+ "graph_evolve.job_started",
487
+ "graph_evolve.generation_started",
488
+ "graph_evolve.generation_completed",
489
+ "graph_evolve.candidate_evaluated",
490
+ "graph_evolve.archive_updated",
491
+ "graph_evolve.job_completed",
492
+ "graph_evolve.job_failed",
493
+ }
494
+ if event_type in important_events:
495
+ return False
496
+
497
+ verbose_patterns = [
498
+ "gepa.transformation.proposed",
499
+ "gepa.proposal.scored",
500
+ "prompt.learning.proposal.scored",
501
+ "mipro.tpe.update",
502
+ "prompt.learning.stream.connected",
503
+ ]
504
+ return any(pattern in event_type_lower for pattern in verbose_patterns)
505
+
506
+ def _transform_event_message(self, message: StreamMessage) -> StreamMessage:
507
+ """Transform event messages for ADAS context (currently passthrough)."""
508
+ return message
509
+
510
+ def flush(self) -> None:
511
+ # Ensure delegate flushes buffered output if needed.
512
+ if self.child_handler and hasattr(self.child_handler, "flush"):
513
+ with contextlib.suppress(Exception):
514
+ self.child_handler.flush()
515
+
516
+ @staticmethod
517
+ def _is_prompt_learning_type(job_type: str | None) -> bool:
518
+ """Return True if the child job type should use prompt-learning formatting."""
519
+ return job_type in {"gepa", "mipro", "prompt_learning", "prompt-learning", None}
520
+
521
+
522
+ class LossCurveHandler(StreamHandler):
523
+ """Render a live-updating loss chart inside a fixed Rich panel."""
524
+
525
+ def __init__(
526
+ self,
527
+ *,
528
+ metric_name: str = "train.loss",
529
+ max_points: int = 200,
530
+ width: int = 60,
531
+ console: Any | None = None,
532
+ live: Any | None = None,
533
+ ) -> None:
534
+ try:
535
+ from rich.console import Console
536
+ from rich.live import Live
537
+ from rich.panel import Panel
538
+ from rich.text import Text
539
+ except ImportError as exc: # pragma: no cover - optional dependency guard
540
+ raise RuntimeError(
541
+ "LossCurveHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
542
+ ) from exc
543
+
544
+ self.metric_name = metric_name
545
+ self.max_points = max_points
546
+ self.width = width
547
+
548
+ self._console_class = Console
549
+ self._panel_class = Panel
550
+ self._text_class = Text
551
+
552
+ self._console = console or Console()
553
+ self._live = live or Live(console=self._console, transient=False, refresh_per_second=8)
554
+ self._started = False
555
+
556
+ self._steps: list[int] = []
557
+ self._values: list[float] = []
558
+ self._status = "waiting"
559
+ self._last_event: str | None = None
560
+
561
+ def handle(self, message: StreamMessage) -> None:
562
+ updated = False
563
+
564
+ if message.stream_type is StreamType.STATUS:
565
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
566
+ if status != self._status:
567
+ self._status = status
568
+ updated = True
569
+
570
+ elif message.stream_type is StreamType.EVENTS:
571
+ event_type = message.data.get("type", "")
572
+ msg = message.data.get("message") or ""
573
+ level = message.data.get("level")
574
+ summary = f"{event_type}".strip()
575
+ if level:
576
+ summary += f" ({level})"
577
+ if msg:
578
+ summary += f": {msg}"
579
+ if summary != self._last_event:
580
+ self._last_event = summary
581
+ updated = True
582
+
583
+ elif message.stream_type is StreamType.METRICS:
584
+ if message.data.get("name") != self.metric_name:
585
+ return
586
+ value = message.data.get("value")
587
+ step = message.data.get("step")
588
+ if not isinstance(value, int | float) or not isinstance(step, int):
589
+ return
590
+ self._values.append(float(value))
591
+ self._steps.append(step)
592
+ if len(self._values) > self.max_points:
593
+ self._values = self._values[-self.max_points :]
594
+ self._steps = self._steps[-self.max_points :]
595
+ updated = True
596
+
597
+ elif message.stream_type is StreamType.TIMELINE:
598
+ phase = message.data.get("phase")
599
+ if phase:
600
+ self._status = str(phase)
601
+ updated = True
602
+
603
+ if updated:
604
+ self._refresh()
605
+
606
+ def flush(self) -> None:
607
+ if self._started:
608
+ with contextlib.suppress(Exception):
609
+ self._live.stop()
610
+ self._started = False
611
+
612
+ def _ensure_live(self) -> None:
613
+ if not self._started:
614
+ with contextlib.suppress(Exception):
615
+ self._live.start()
616
+ self._started = True
617
+
618
+ def _refresh(self) -> None:
619
+ self._ensure_live()
620
+ body = self._build_body()
621
+ title = f"{self.metric_name} | status={self._status}"
622
+ self._live.update(self._panel_class(body, title=title, border_style="cyan"))
623
+
624
+ def _build_body(self) -> Any:
625
+ if not self._values:
626
+ return self._text_class("Waiting for metrics…", style="yellow")
627
+
628
+ chart = self._render_sparkline()
629
+ last_value = self._values[-1]
630
+ lines = [
631
+ chart,
632
+ f"latest: {last_value:.4f} (step {self._steps[-1]})",
633
+ ]
634
+ if self._last_event:
635
+ lines.append(f"event: {self._last_event}")
636
+ return "\n".join(lines)
637
+
638
+ def _render_sparkline(self) -> str:
639
+ blocks = "▁▂▃▄▅▆▇█"
640
+ tail_len = min(self.width, len(self._values))
641
+ tail = self._values[-tail_len:]
642
+ minimum = min(tail)
643
+ maximum = max(tail)
644
+ if maximum == minimum:
645
+ level = blocks[0]
646
+ return f"{minimum:.2f} {level * tail_len} {maximum:.2f}"
647
+ scale = (len(blocks) - 1) / (maximum - minimum)
648
+ chars = "".join(blocks[int((v - minimum) * scale + 0.5)] for v in tail)
649
+ return f"{minimum:.2f} {chars} {maximum:.2f}"
650
+
651
+ def __del__(self) -> None: # pragma: no cover - defensive cleanup
652
+ with contextlib.suppress(Exception):
653
+ self.flush()
654
+
655
+ class RichHandler(StreamHandler):
656
+ """Rich powered handler with live progress and metrics table."""
657
+
658
+ def __init__(
659
+ self,
660
+ *,
661
+ event_log_size: int = 20,
662
+ console: Any | None = None,
663
+ ) -> None:
664
+ try:
665
+ from rich.console import Console
666
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
667
+ from rich.table import Table
668
+ except ImportError as exc: # pragma: no cover - requires optional dependency
669
+ raise RuntimeError(
670
+ "RichHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
671
+ ) from exc
672
+
673
+ self._console_class = Console
674
+ self._progress_class = Progress
675
+ self._spinner_column = SpinnerColumn
676
+ self._text_column = TextColumn
677
+ self._bar_column = BarColumn
678
+ self._table_class = Table
679
+
680
+ self._console = console or Console()
681
+ self._progress = Progress(
682
+ SpinnerColumn(),
683
+ TextColumn("[progress.description]{task.description}"),
684
+ BarColumn(),
685
+ TextColumn("{task.completed}/{task.total}" if console else ""),
686
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
687
+ transient=False,
688
+ console=self._console,
689
+ )
690
+ self._task_id: int | None = None
691
+ self._current_status = "unknown"
692
+ self._latest_metrics: dict[str, Any] = {}
693
+ self._event_log: deque[str] = deque(maxlen=event_log_size)
694
+ self._progress_started = False
695
+
696
+ def handle(self, message: StreamMessage) -> None:
697
+ if not self.should_handle(message):
698
+ return
699
+
700
+ if message.stream_type is StreamType.STATUS:
701
+ self._current_status = str(message.data.get("status") or message.data.get("state"))
702
+ self._ensure_progress_started()
703
+ if self._task_id is not None:
704
+ description = f"Status: {self._current_status}"
705
+ self._progress.update(self._task_id, description=description) # type: ignore[arg-type]
706
+ self._render_summary()
707
+ return
708
+
709
+ if message.stream_type is StreamType.EVENTS:
710
+ event_type = message.data.get("type", "event")
711
+ summary = message.data.get("message") or ""
712
+ level = message.data.get("level")
713
+ # Mask sensitive URLs before displaying
714
+ sanitized_summary = _mask_sensitive_urls(summary)
715
+ formatted = f"[{event_type}] {sanitized_summary}".strip()
716
+ if level:
717
+ formatted = f"{formatted} ({level})"
718
+ self._event_log.append(formatted)
719
+ data = message.data.get("data") or {}
720
+ step = data.get("step") or data.get("current_step")
721
+ total_steps = data.get("total_steps") or data.get("max_steps")
722
+ if step and total_steps:
723
+ self._ensure_progress_started(total_steps)
724
+ if self._task_id is not None:
725
+ self._progress.update(self._task_id, completed=int(step), total=int(total_steps)) # type: ignore[arg-type]
726
+ self._render_summary()
727
+ return
728
+
729
+ if message.stream_type is StreamType.METRICS:
730
+ name = message.data.get("name", "")
731
+ value = message.data.get("value")
732
+ if name:
733
+ self._latest_metrics[name] = value
734
+ self._render_summary()
735
+ return
736
+
737
+ if message.stream_type is StreamType.TIMELINE:
738
+ phase = message.data.get("phase", "")
739
+ if phase and phase.lower() not in {"training", "running"}:
740
+ self._event_log.append(f"[timeline] {phase}")
741
+ self._render_summary()
742
+
743
+ def flush(self) -> None:
744
+ if self._progress_started:
745
+ self._progress.stop()
746
+ self._progress_started = False
747
+ self._render_summary(force=True)
748
+
749
+ def _ensure_progress_started(self, total: int | float | None = None) -> None:
750
+ if not self._progress_started:
751
+ self._progress.start()
752
+ self._progress_started = True
753
+ if self._task_id is None:
754
+ self._task_id = self._progress.add_task(
755
+ f"Status: {self._current_status}", total=total or 100
756
+ )
757
+ elif total is not None and self._task_id is not None:
758
+ self._progress.update(self._task_id, total=total) # type: ignore[arg-type]
759
+
760
+ def _render_summary(self, force: bool = False) -> None:
761
+ if force and self._progress_started:
762
+ self._progress.refresh()
763
+
764
+ table = self._table_class(title="Latest Metrics")
765
+ table.add_column("Metric")
766
+ table.add_column("Value")
767
+
768
+ if not self._latest_metrics:
769
+ table.add_row("—", "—")
770
+ else:
771
+ for name, value in sorted(self._latest_metrics.items()):
772
+ table.add_row(str(name), str(value))
773
+
774
+ if self._progress_started:
775
+ self._progress.console.print(table)
776
+ else:
777
+ self._console.print(table)
778
+
779
+ if self._event_log:
780
+ self._console.print("\nRecent events:")
781
+ for entry in list(self._event_log):
782
+ self._console.print(f" • {entry}")
783
+
784
+ class ContextLearningHandler(StreamHandler):
785
+ """CLI-friendly handler for Context Learning jobs.
786
+
787
+ Emits high-signal progress similar to other infra job handlers,
788
+ specialized for generation-based bash context optimization.
789
+ """
790
+
791
+ def __init__(self) -> None:
792
+ self.best_score_so_far = 0.0
793
+ self.current_generation = 0
794
+
795
+ def handle(self, message: StreamMessage) -> None:
796
+ if not self.should_handle(message):
797
+ return
798
+
799
+ timestamp = datetime.now().strftime("%H:%M:%S")
800
+
801
+ if message.stream_type is StreamType.STATUS:
802
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
803
+ click.echo(f"[{timestamp}] status={status}")
804
+ return
805
+
806
+ if message.stream_type is StreamType.METRICS:
807
+ name = message.data.get("name")
808
+ value = message.data.get("value")
809
+ step = message.data.get("step")
810
+ if isinstance(value, int | float):
811
+ try:
812
+ val_f = float(value)
813
+ if val_f > self.best_score_so_far:
814
+ self.best_score_so_far = val_f
815
+ if isinstance(step, int):
816
+ self.current_generation = max(self.current_generation, step)
817
+ click.echo(f"[{timestamp}] gen={step} best={val_f:.3f}")
818
+ return
819
+ except Exception:
820
+ pass
821
+ click.echo(f"[{timestamp}] metric {name}={value}")
822
+ return
823
+
824
+ if message.stream_type is StreamType.EVENTS:
825
+ event_type = str(message.data.get("type") or "")
826
+ msg = message.data.get("message") or ""
827
+ data = message.data.get("data") or {}
828
+
829
+ if event_type == "context.learning.generation.completed":
830
+ gen = data.get("generation") or data.get("gen") or self.current_generation
831
+ score = data.get("best_score") or data.get("score") or self.best_score_so_far
832
+ try:
833
+ score_f = float(score)
834
+ if score_f > self.best_score_so_far:
835
+ self.best_score_so_far = score_f
836
+ click.echo(f"[{timestamp}] generation {gen} best={score_f:.3f}")
837
+ except Exception:
838
+ click.echo(f"[{timestamp}] generation {gen} completed")
839
+ return
840
+
841
+ if event_type.endswith(".failed"):
842
+ click.echo(f"[{timestamp}] {event_type}: {msg}")
843
+ return
844
+
845
+ if msg:
846
+ click.echo(f"[{timestamp}] {event_type}: {msg}")
847
+ else:
848
+ click.echo(f"[{timestamp}] {event_type}")
849
+
850
+
851
+ class PromptLearningHandler(StreamHandler):
852
+ """Enhanced handler for GEPA/MIPRO prompt optimization jobs with rich formatting and metrics tracking.
853
+
854
+ This handler processes streaming events from both GEPA (Genetic Evolutionary Prompt
855
+ Algorithm) and MIPRO (Meta-Instruction PROposer) optimization jobs. It provides:
856
+
857
+ - **Real-time progress tracking**: Shows trial results, rollouts, iterations, and budget usage
858
+ - **Optimization curve tracking**: Maintains a history of best scores over time
859
+ - **GEPA-specific features**: Tracks transformations, rollouts, and validation results
860
+ - **MIPRO-specific features**: Tracks iterations, trials, minibatch/full evaluations, and budget
861
+ - **Dual output**: Writes to both console (via click.echo) and optional log file
862
+
863
+ The handler filters verbose events (like TPE updates, proposed instructions) to keep
864
+ output readable while preserving important progress information. It formats output
865
+ consistently between GEPA and MIPRO for easier comparison.
866
+
867
+ Example:
868
+ >>> handler = PromptLearningHandler(
869
+ ... show_trial_results=True,
870
+ ... max_tokens=1_000_000,
871
+ ... log_file=Path("optimization.log")
872
+ ... )
873
+ >>> # Handler is used by JobStreamer to process events
874
+ """
875
+
876
+ def __init__(
877
+ self,
878
+ *,
879
+ show_trial_results: bool = True,
880
+ show_transformations: bool = False,
881
+ show_validation: bool = True,
882
+ max_tokens: int | None = None,
883
+ max_time_seconds: float | None = None,
884
+ max_rollouts: int | None = None,
885
+ log_file: Path | None = None,
886
+ ):
887
+ """Initialize the prompt learning handler.
888
+
889
+ Args:
890
+ show_trial_results: Whether to display individual trial scores (default: True).
891
+ When True, shows each trial's score and best score so far.
892
+ show_transformations: Whether to display transformation/proposal details
893
+ (default: False). When True, shows verbose transformation events.
894
+ show_validation: Whether to display validation summaries (default: True).
895
+ Shows validation results comparing candidates against baseline.
896
+ max_tokens: Maximum token budget for MIPRO (from TOML termination_config).
897
+ Used to track progress and enforce limits.
898
+ max_time_seconds: Maximum time budget in seconds (from TOML termination_config).
899
+ Used to track elapsed time and ETA.
900
+ max_rollouts: Maximum rollouts budget (from TOML termination_config).
901
+ Used to track rollout progress for both GEPA and MIPRO.
902
+ log_file: Optional path to log file for persistent logging. If provided,
903
+ all output is written to both console and file. File is opened in
904
+ append mode and remains open for streaming.
905
+ """
906
+ self.show_trial_results = show_trial_results
907
+ self.show_transformations = show_transformations
908
+ self.show_validation = show_validation
909
+ self.optimization_curve: list[tuple[int, float]] = []
910
+ self.trial_counter = 0
911
+ self.best_score_so_far = 0.0
912
+
913
+ # MIPRO progress tracking
914
+ self.mipro_start_time: float | None = None
915
+ self.mipro_total_trials: int | None = None
916
+ self.mipro_completed_trials: int = 0
917
+ self.mipro_total_tokens: int = 0
918
+ self.mipro_policy_tokens: int = 0 # Rollout tokens (policy only)
919
+ self.mipro_max_tokens: int | None = max_tokens # From TOML termination_config
920
+ self.mipro_total_cost: float = 0.0
921
+ self.mipro_max_cost: float | None = None
922
+ self.mipro_current_iteration: int = 0
923
+ self.mipro_num_iterations: int | None = None
924
+ self.mipro_trials_per_iteration: int | None = None
925
+ self.mipro_best_score: float = 0.0 # Track best full eval score
926
+ self.mipro_baseline_score: float | None = None # Track baseline for comparison
927
+ self.mipro_batch_size: int | None = None # Track minibatch size (N for minibatch scores)
928
+ self.mipro_rollouts_completed: int = 0 # Total rollouts completed
929
+ self.mipro_max_rollouts: int | None = max_rollouts # From TOML termination_config
930
+ self.mipro_max_time_seconds: float | None = max_time_seconds # From TOML termination_config
931
+ self._last_progress_emit_time: float | None = None # Throttle progress updates
932
+ self._progress_emit_interval: float = 5.0 # Emit progress at most every 5 seconds
933
+
934
+ # Log file for real-time streaming
935
+ self.log_file: Path | None = log_file
936
+ self._log_file_handle = None
937
+ if self.log_file:
938
+ try:
939
+ # Create parent directory if needed
940
+ self.log_file.parent.mkdir(parents=True, exist_ok=True)
941
+ # Open file in append mode for live streaming
942
+ # Note: File must remain open for streaming, so we can't use context manager
943
+ from datetime import datetime
944
+ self._log_file_handle = open(self.log_file, "a", encoding="utf-8") # noqa: SIM115
945
+ # Write header
946
+ self._log_file_handle.write("=" * 80 + "\n")
947
+ self._log_file_handle.write("PROMPT LEARNING VERBOSE LOG\n")
948
+ self._log_file_handle.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
949
+ self._log_file_handle.write("=" * 80 + "\n\n")
950
+ self._log_file_handle.flush()
951
+ except Exception as e:
952
+ # If we can't open the log file, continue without it
953
+ click.echo(f"⚠️ Could not open log file {log_file}: {e}", err=True)
954
+ self.log_file = None
955
+ self._log_file_handle = None
956
+
957
+ def _write_log(self, text: str) -> None:
958
+ """Write text to both console and log file."""
959
+ click.echo(text)
960
+ if self._log_file_handle:
961
+ try:
962
+ self._log_file_handle.write(text + "\n")
963
+ self._log_file_handle.flush()
964
+ except Exception:
965
+ # If write fails, close handle and continue without logging
966
+ from contextlib import suppress
967
+ with suppress(Exception):
968
+ self._log_file_handle.close()
969
+ self._log_file_handle = None
970
+
971
+ def handle(self, message: StreamMessage) -> None:
972
+ """Handle a stream message from the prompt learning job.
973
+
974
+ Routes messages to appropriate handlers based on stream type:
975
+ - STATUS: Job status updates (queued, running, completed, etc.)
976
+ - EVENTS: Algorithm-specific events (trials, iterations, transformations)
977
+ - METRICS: Performance metrics (scores, accuracies, costs)
978
+ - TIMELINE: Phase transitions
979
+
980
+ Filters verbose events (TPE updates, proposed instructions) to keep output
981
+ readable. MIPRO and GEPA events are handled by specialized methods.
982
+
983
+ Args:
984
+ message: StreamMessage containing event data from the backend
985
+ """
986
+ if not self.should_handle(message):
987
+ return
988
+
989
+ timestamp = datetime.now().strftime("%H:%M:%S")
990
+
991
+ if message.stream_type is StreamType.STATUS:
992
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
993
+ self._write_log(f"[{timestamp}] status={status}")
994
+ return
995
+
996
+ if message.stream_type is StreamType.EVENTS:
997
+ event_type = message.data.get("type", "event")
998
+ level = message.data.get("level")
999
+ msg = message.data.get("message") or ""
1000
+
1001
+ # Handle MIPRO-specific events for progress tracking (before skipping hidden events)
1002
+ if event_type == "mipro.job.started":
1003
+ self._handle_mipro_job_started(message.data)
1004
+ # Continue to default display
1005
+
1006
+ if event_type == "mipro.budget.update":
1007
+ self._handle_mipro_budget_update(message.data)
1008
+ # Continue to default display
1009
+
1010
+ if event_type == "mipro.trial.complete":
1011
+ self._handle_mipro_trial_complete(message.data)
1012
+ # Continue to default display
1013
+
1014
+ # Show more MIPRO events - only hide the most verbose ones
1015
+ _hidden_mipro_events = {
1016
+ # Keep only the most verbose TPE updates hidden
1017
+ "mipro.tpe.update", # Very frequent, low value
1018
+ }
1019
+ if event_type in _hidden_mipro_events:
1020
+ return
1021
+
1022
+ # Show GEPA transformation proposals - they're useful for debugging
1023
+ # if event_type == "gepa.transformation.proposed":
1024
+ # return
1025
+
1026
+ # Handle trial results for optimization curve tracking
1027
+ if event_type == "prompt.learning.trial.results":
1028
+ self._handle_trial_results(message.data)
1029
+ # Continue to default display
1030
+
1031
+ # Handle validation summary
1032
+ if event_type == "prompt.learning.validation.summary":
1033
+ if self.show_validation:
1034
+ self._handle_validation_summary(message.data)
1035
+ # Continue to default display
1036
+
1037
+ # Handle progress events
1038
+ if event_type == "prompt.learning.progress":
1039
+ self._handle_progress(message.data)
1040
+ # Continue to default display
1041
+
1042
+ # Handle MIPRO-specific events for progress tracking
1043
+ if event_type == "mipro.iteration.start":
1044
+ self._handle_mipro_iteration_start(message.data)
1045
+ # Continue to default display
1046
+
1047
+ if event_type == "mipro.iteration.complete":
1048
+ self._handle_mipro_iteration_complete(message.data)
1049
+ # Continue to default display
1050
+
1051
+ if event_type == "mipro.fulleval.complete":
1052
+ self._handle_mipro_fulleval_complete(message.data)
1053
+ # Continue to default display
1054
+
1055
+ if event_type == "mipro.optimization.exhausted":
1056
+ # Graceful conclusion - show final progress
1057
+ self._emit_mipro_progress()
1058
+ # Continue to default display
1059
+
1060
+ if event_type == "mipro.new_incumbent":
1061
+ self._handle_mipro_new_incumbent(message.data)
1062
+ # Continue to default display
1063
+
1064
+ # Handle rollouts start event
1065
+ if event_type == "prompt.learning.rollouts.start":
1066
+ self._handle_rollouts_start(message.data)
1067
+ # Continue to default display
1068
+
1069
+ # Handle GEPA new best event
1070
+ if event_type == "prompt.learning.gepa.new_best":
1071
+ self._handle_gepa_new_best(message.data)
1072
+ # Continue to default display
1073
+
1074
+ # Handle phase changed event
1075
+ if event_type == "prompt.learning.phase.changed":
1076
+ self._handle_phase_changed(message.data)
1077
+ # Continue to default display
1078
+
1079
+ # Handle stream connected event (connection lifecycle)
1080
+ if event_type == "prompt.learning.stream.connected":
1081
+ self._handle_stream_connected(message.data)
1082
+ # Continue to default display
1083
+
1084
+ # Handle proposal scored events (transformations) - show by default
1085
+ if event_type == "prompt.learning.proposal.scored":
1086
+ self._handle_proposal_scored(message.data)
1087
+ # Continue to default display
1088
+
1089
+ # Show verbose transformation events by default - they're useful
1090
+ # Only skip if explicitly disabled via show_transformations=False
1091
+ # verbose_event_types = [
1092
+ # "prompt.learning.proposal.scored",
1093
+ # "prompt.learning.eval.summary",
1094
+ # "prompt.learning.validation.scored",
1095
+ # "prompt.learning.final.results",
1096
+ # ]
1097
+ # if event_type in verbose_event_types and not self.show_transformations:
1098
+ # return
1099
+
1100
+ # Default event display - show more details
1101
+ prefix = f"[{timestamp}] {event_type}"
1102
+ if level:
1103
+ prefix += f" ({level})"
1104
+ sanitized_msg = _mask_sensitive_urls(msg)
1105
+
1106
+ # Include key data fields if message is empty or short
1107
+ if not sanitized_msg or len(sanitized_msg) < 50:
1108
+ data = message.data.get("data", {})
1109
+ if isinstance(data, dict):
1110
+ # Show useful fields
1111
+ useful_fields = []
1112
+ for key in ["score", "accuracy", "mean", "step", "iteration", "trial", "completed", "total", "version_id"]:
1113
+ if key in data:
1114
+ value = data[key]
1115
+ if isinstance(value, (int, float)):
1116
+ useful_fields.append(f"{key}={value:.4f}" if isinstance(value, float) else f"{key}={value}")
1117
+ else:
1118
+ useful_fields.append(f"{key}={value}")
1119
+ if useful_fields:
1120
+ sanitized_msg = sanitized_msg + (" " if sanitized_msg else "") + " ".join(useful_fields[:5]) # Limit to 5 fields
1121
+
1122
+ self._write_log(f"{prefix}: {sanitized_msg}".rstrip(": "))
1123
+ return
1124
+
1125
+ if message.stream_type is StreamType.METRICS:
1126
+ name = message.data.get("name")
1127
+ value = message.data.get("value")
1128
+ step = message.data.get("step")
1129
+ data = message.data.get("data", {})
1130
+
1131
+ metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
1132
+ if step is not None:
1133
+ metric_str += f" (step={step})"
1134
+
1135
+ if isinstance(data, dict):
1136
+ n = data.get("n")
1137
+ if n is not None:
1138
+ metric_str += f" n={n}"
1139
+
1140
+ self._write_log(metric_str)
1141
+ return
1142
+
1143
+ if message.stream_type is StreamType.TIMELINE:
1144
+ phase = message.data.get("phase", "phase")
1145
+ self._write_log(f"[{timestamp}] timeline={phase}")
1146
+
1147
+ def _handle_trial_results(self, event_data: dict[str, Any]) -> None:
1148
+ """Handle GEPA trial results events and track optimization curve.
1149
+
1150
+ Processes trial completion events from GEPA optimization, tracking:
1151
+ - Mean score for the trial
1152
+ - Best score achieved so far
1153
+ - Number of rollouts completed (N)
1154
+ - Optimization curve data points
1155
+
1156
+ Updates the optimization curve with (trial_number, best_score) tuples
1157
+ for visualization. Displays trial results if show_trial_results is True.
1158
+
1159
+ Args:
1160
+ event_data: Event data dictionary containing:
1161
+ - data.mean: Mean score for this trial
1162
+ - data.completed: Number of rollouts completed
1163
+ - data.total: Total rollouts planned
1164
+ """
1165
+ data = event_data.get("data", {})
1166
+ if not isinstance(data, dict):
1167
+ return
1168
+
1169
+ mean_score = data.get("mean")
1170
+ if mean_score is not None:
1171
+ self.trial_counter += 1
1172
+ self.best_score_so_far = max(self.best_score_so_far, float(mean_score))
1173
+ self.optimization_curve.append((self.trial_counter, self.best_score_so_far))
1174
+
1175
+ if self.show_trial_results:
1176
+ timestamp = datetime.now().strftime("%H:%M:%S")
1177
+
1178
+ # Extract N (number of rollouts)
1179
+ completed = data.get("completed")
1180
+ total = data.get("total")
1181
+
1182
+ n_str = f" N={completed}/{total}" if completed is not None and total is not None else (f" N={completed}" if completed is not None else "")
1183
+
1184
+ self._write_log(f"[{timestamp}] [Trial {self.trial_counter}] Score: {mean_score:.4f} (Best: {self.best_score_so_far:.4f}){n_str}")
1185
+
1186
+ def _handle_validation_summary(self, event_data: dict[str, Any]) -> None:
1187
+ """Handle validation summary events showing candidate performance.
1188
+
1189
+ Displays validation results comparing optimized prompts against a baseline.
1190
+ Shows baseline score, number of candidates evaluated (N), and top candidate
1191
+ scores. Only displayed if show_validation is True.
1192
+
1193
+ Args:
1194
+ event_data: Event data dictionary containing:
1195
+ - data.baseline: Baseline score (dict with accuracy/score or number)
1196
+ - data.results: List of candidate results with accuracy/score fields
1197
+ """
1198
+ data = event_data.get("data", {})
1199
+ if not isinstance(data, dict):
1200
+ return
1201
+
1202
+ timestamp = datetime.now().strftime("%H:%M:%S")
1203
+
1204
+ # Extract baseline
1205
+ baseline = data.get("baseline")
1206
+ baseline_score = None
1207
+ if isinstance(baseline, dict):
1208
+ baseline_score = baseline.get("accuracy") or baseline.get("score")
1209
+ elif isinstance(baseline, int | float):
1210
+ baseline_score = baseline
1211
+
1212
+ # Extract results
1213
+ results = data.get("results", [])
1214
+ if not isinstance(results, list):
1215
+ results = []
1216
+
1217
+ # Display validation summary
1218
+ self._write_log(f"[{timestamp}] Validation Summary:")
1219
+
1220
+ # Show baseline if available
1221
+ if baseline_score is not None:
1222
+ self._write_log(f" Baseline: {baseline_score:.4f}")
1223
+
1224
+ # Show N (number of candidates)
1225
+ n_candidates = len(results)
1226
+ if n_candidates > 0:
1227
+ self._write_log(f" N={n_candidates}")
1228
+
1229
+ # Display validation results
1230
+ if results:
1231
+ for i, result in enumerate(results[:10]): # Show top 10
1232
+ if isinstance(result, dict):
1233
+ accuracy = result.get("accuracy") or result.get("score")
1234
+ if accuracy is not None:
1235
+ self._write_log(f" Candidate {i+1}: {accuracy:.4f}")
1236
+
1237
+ def _handle_progress(self, event_data: dict[str, Any]) -> None:
1238
+ """Handle GEPA progress events with detailed rollout and transformation tracking.
1239
+
1240
+ Displays comprehensive progress information including:
1241
+ - Overall completion percentage
1242
+ - Rollout progress (completed/total with percentage)
1243
+ - Transformation progress (tried/planned with percentage)
1244
+ - Token usage (used/budget in millions)
1245
+ - Elapsed time and ETA
1246
+
1247
+ Formats progress in a human-readable format similar to CLI progress bars.
1248
+
1249
+ Args:
1250
+ event_data: Event data dictionary containing:
1251
+ - data.rollouts_completed: Number of rollouts completed
1252
+ - data.rollouts_total: Total rollouts planned
1253
+ - data.transformations_tried: Number of transformations tried
1254
+ - data.transformations_planned: Total transformations planned
1255
+ - data.rollout_tokens_used: Tokens consumed
1256
+ - data.rollout_tokens_budget: Token budget
1257
+ - data.elapsed_seconds: Time elapsed
1258
+ - data.eta_seconds: Estimated time remaining
1259
+ - data.percent_overall: Overall completion percentage
1260
+ """
1261
+ data = event_data.get("data", {})
1262
+ if not isinstance(data, dict):
1263
+ return
1264
+
1265
+ timestamp = datetime.now().strftime("%H:%M:%S")
1266
+
1267
+ # Extract rollout progress
1268
+ rollouts_completed = data.get("rollouts_completed")
1269
+ rollouts_total = data.get("rollouts_total")
1270
+ percent_rollouts = data.get("percent_rollouts")
1271
+
1272
+ # Extract transformation progress
1273
+ transformations_tried = data.get("transformations_tried")
1274
+ transformations_planned = data.get("transformations_planned")
1275
+ percent_transformations = data.get("percent_transformations")
1276
+
1277
+ # Extract overall progress
1278
+ percent_overall = data.get("percent_overall")
1279
+
1280
+ # Extract timing
1281
+ elapsed_seconds = data.get("elapsed_seconds")
1282
+ eta_seconds = data.get("eta_seconds")
1283
+
1284
+ # Extract token usage
1285
+ rollout_tokens_used = data.get("rollout_tokens_used")
1286
+ rollout_tokens_budget = data.get("rollout_tokens_budget")
1287
+
1288
+ # Build progress message
1289
+ parts = []
1290
+
1291
+ # Overall percentage
1292
+ if percent_overall is not None:
1293
+ parts.append(f"{int(percent_overall * 100)}% complete")
1294
+
1295
+ # Rollout progress
1296
+ if rollouts_completed is not None and rollouts_total is not None:
1297
+ parts.append(f"rollouts={rollouts_completed}/{rollouts_total}")
1298
+ if percent_rollouts is not None:
1299
+ parts.append(f"({int(percent_rollouts * 100)}%)")
1300
+ elif rollouts_completed is not None:
1301
+ parts.append(f"rollouts={rollouts_completed}")
1302
+
1303
+ # Transformation progress
1304
+ if transformations_tried is not None and transformations_planned is not None:
1305
+ parts.append(f"transformations={transformations_tried}/{transformations_planned}")
1306
+ if percent_transformations is not None:
1307
+ parts.append(f"({int(percent_transformations * 100)}%)")
1308
+ elif transformations_tried is not None:
1309
+ parts.append(f"transformations={transformations_tried}")
1310
+
1311
+ # Token usage
1312
+ if rollout_tokens_used is not None:
1313
+ tokens_millions = rollout_tokens_used / 1_000_000.0
1314
+ if rollout_tokens_budget is not None:
1315
+ budget_millions = rollout_tokens_budget / 1_000_000.0
1316
+ parts.append(f"tokens={tokens_millions:.2f}M/{budget_millions:.2f}M")
1317
+ else:
1318
+ parts.append(f"tokens={tokens_millions:.2f}M")
1319
+
1320
+ # Timing
1321
+ if elapsed_seconds is not None:
1322
+ if elapsed_seconds >= 60:
1323
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min"
1324
+ else:
1325
+ elapsed_str = f"{int(elapsed_seconds)}s"
1326
+ parts.append(f"elapsed={elapsed_str}")
1327
+
1328
+ if eta_seconds is not None:
1329
+ eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
1330
+ parts.append(f"eta={eta_str}")
1331
+
1332
+ # Fallback to simple step/total_steps if no detailed info
1333
+ if not parts:
1334
+ step = data.get("step") or data.get("current_step")
1335
+ total_steps = data.get("total_steps") or data.get("max_steps")
1336
+ if step is not None and total_steps is not None:
1337
+ parts.append(f"{step}/{total_steps} ({100 * step / total_steps:.1f}%)")
1338
+
1339
+ if parts:
1340
+ progress_msg = " ".join(parts)
1341
+ self._write_log(f"[{timestamp}] Progress: {progress_msg}")
1342
+
1343
+ def _handle_rollouts_start(self, event_data: dict[str, Any]) -> None:
1344
+ """Handle GEPA rollouts start event.
1345
+
1346
+ Displays when rollouts begin, showing the number of training seeds
1347
+ that will be evaluated. This marks the start of the main optimization
1348
+ phase for GEPA.
1349
+
1350
+ Args:
1351
+ event_data: Event data dictionary containing:
1352
+ - data.train_seeds: List of training seed values
1353
+ """
1354
+ data = event_data.get("data", {})
1355
+ if not isinstance(data, dict):
1356
+ return
1357
+
1358
+ timestamp = datetime.now().strftime("%H:%M:%S")
1359
+ train_seeds = data.get("train_seeds", [])
1360
+
1361
+ if isinstance(train_seeds, list) and train_seeds:
1362
+ num_seeds = len(train_seeds)
1363
+ self._write_log(f"[{timestamp}] Starting rollouts: {num_seeds} seeds")
1364
+ else:
1365
+ self._write_log(f"[{timestamp}] Starting rollouts")
1366
+
1367
+ def _handle_gepa_new_best(self, event_data: dict[str, Any]) -> None:
1368
+ """Handle GEPA new best candidate event.
1369
+
1370
+ Displays when a new best candidate is found during optimization,
1371
+ showing the improvement over the previous best.
1372
+
1373
+ Args:
1374
+ event_data: Event data dictionary containing:
1375
+ - data.accuracy: New best accuracy score
1376
+ - data.previous_best_score: Previous best score
1377
+ - data.improvement: Absolute improvement
1378
+ - data.version_id: ID of the new best candidate
1379
+ """
1380
+ data = event_data.get("data", {})
1381
+ if not isinstance(data, dict):
1382
+ return
1383
+
1384
+ timestamp = datetime.now().strftime("%H:%M:%S")
1385
+ accuracy = data.get("accuracy")
1386
+ previous = data.get("previous_best_score")
1387
+ improvement = data.get("improvement")
1388
+
1389
+ if accuracy is not None:
1390
+ msg = f"[{timestamp}] \u2728 New best: {accuracy:.4f}"
1391
+ if previous is not None and improvement is not None:
1392
+ msg += f" (+{improvement:.4f} from {previous:.4f})"
1393
+ elif previous is not None:
1394
+ msg += f" (was {previous:.4f})"
1395
+ self._write_log(msg)
1396
+
1397
+ def _handle_phase_changed(self, event_data: dict[str, Any]) -> None:
1398
+ """Handle phase transition event.
1399
+
1400
+ Displays when the optimization transitions between phases
1401
+ (e.g., bootstrap -> optimization -> validation -> complete).
1402
+
1403
+ Args:
1404
+ event_data: Event data dictionary containing:
1405
+ - data.from_phase: Previous phase name
1406
+ - data.to_phase: New phase name
1407
+ - data.phase_summary: Optional summary of completed phase
1408
+ """
1409
+ data = event_data.get("data", {})
1410
+ if not isinstance(data, dict):
1411
+ return
1412
+
1413
+ timestamp = datetime.now().strftime("%H:%M:%S")
1414
+ from_phase = data.get("from_phase") or "start"
1415
+ to_phase = data.get("to_phase")
1416
+
1417
+ if to_phase:
1418
+ self._write_log(f"[{timestamp}] Phase: {from_phase} \u2192 {to_phase}")
1419
+
1420
+ def _handle_stream_connected(self, event_data: dict[str, Any]) -> None:
1421
+ """Handle SSE stream connection event.
1422
+
1423
+ Displays connection confirmation with cursor position for debugging.
1424
+
1425
+ Args:
1426
+ event_data: Event data dictionary containing:
1427
+ - data.cursor: Current sequence cursor position
1428
+ - data.heartbeat_interval_seconds: Heartbeat interval
1429
+ """
1430
+ data = event_data.get("data", {})
1431
+ if not isinstance(data, dict):
1432
+ return
1433
+
1434
+ timestamp = datetime.now().strftime("%H:%M:%S")
1435
+ cursor = data.get("cursor", 0)
1436
+ self._write_log(f"[{timestamp}] Stream connected (cursor={cursor})")
1437
+
1438
+ def _handle_mipro_job_started(self, event_data: dict[str, Any]) -> None:
1439
+ """Handle MIPRO job start event and extract configuration.
1440
+
1441
+ Captures initial MIPRO configuration from the job start event to enable
1442
+ progress tracking. Extracts num_iterations and num_trials_per_iteration
1443
+ to estimate total trials and rollouts.
1444
+
1445
+ Args:
1446
+ event_data: Event data dictionary containing:
1447
+ - data.num_iterations: Total number of optimization iterations
1448
+ - data.num_trials_per_iteration: Trials per iteration
1449
+ """
1450
+ data = event_data.get("data", {})
1451
+ if not isinstance(data, dict):
1452
+ return
1453
+
1454
+ # Extract config values to estimate max rollouts
1455
+ num_iterations = data.get("num_iterations")
1456
+ num_trials_per_iteration = data.get("num_trials_per_iteration")
1457
+
1458
+ if num_iterations is not None:
1459
+ self.mipro_num_iterations = num_iterations
1460
+ if num_trials_per_iteration is not None:
1461
+ self.mipro_trials_per_iteration = num_trials_per_iteration
1462
+
1463
+ def _handle_mipro_iteration_start(self, event_data: dict[str, Any]) -> None:
1464
+ """Handle MIPRO iteration start event and initialize progress tracking.
1465
+
1466
+ Called at the start of each MIPRO iteration. On the first iteration (0),
1467
+ initializes all progress tracking variables including:
1468
+ - Total iterations and trials per iteration
1469
+ - Batch size (for minibatch evaluations)
1470
+ - Max rollouts estimate (iterations * trials * batch_size)
1471
+ - Time and token budgets
1472
+
1473
+ Sets the start time for elapsed time tracking.
1474
+
1475
+ Args:
1476
+ event_data: Event data dictionary containing:
1477
+ - data.iteration: Current iteration number (0-indexed)
1478
+ - data.num_iterations: Total iterations
1479
+ - data.num_trials_per_iteration: Trials per iteration
1480
+ - data.batch_size: Minibatch size (N for minibatch scores)
1481
+ - data.max_trials: Maximum trials limit (optional)
1482
+ - data.max_rollouts: Maximum rollouts limit (optional)
1483
+ - data.max_time_seconds: Maximum time limit (optional)
1484
+ """
1485
+ import time
1486
+
1487
+ data = event_data.get("data", {})
1488
+ if not isinstance(data, dict):
1489
+ return
1490
+
1491
+ iteration = data.get("iteration")
1492
+ if iteration == 0 and self.mipro_start_time is None:
1493
+ self.mipro_start_time = time.time()
1494
+
1495
+ # Extract total iterations and trials per iteration from first iteration
1496
+ if iteration == 0:
1497
+ self.mipro_num_iterations = data.get("num_iterations") or self.mipro_num_iterations
1498
+ self.mipro_trials_per_iteration = data.get("num_trials_per_iteration") or self.mipro_trials_per_iteration
1499
+ batch_size = data.get("batch_size")
1500
+ if batch_size is not None:
1501
+ self.mipro_batch_size = batch_size
1502
+
1503
+ if self.mipro_num_iterations and self.mipro_trials_per_iteration:
1504
+ self.mipro_total_trials = self.mipro_num_iterations * self.mipro_trials_per_iteration
1505
+
1506
+ # Extract max limits if available (from events, but TOML value takes precedence)
1507
+ # Only override if TOML value wasn't set
1508
+ max_trials = data.get("max_trials")
1509
+ max_rollouts_from_event = data.get("max_rollouts")
1510
+ if self.mipro_max_rollouts is None:
1511
+ if max_rollouts_from_event is not None:
1512
+ # Use event value if TOML value wasn't set
1513
+ self.mipro_max_rollouts = max_rollouts_from_event
1514
+ elif max_trials is not None:
1515
+ # Fallback: If max_trials is set, use it as max rollouts (approximation)
1516
+ self.mipro_max_rollouts = max_trials
1517
+ elif self.mipro_num_iterations and self.mipro_trials_per_iteration and self.mipro_batch_size:
1518
+ # Estimate max rollouts: iterations * trials_per_iteration * batch_size
1519
+ self.mipro_max_rollouts = self.mipro_num_iterations * self.mipro_trials_per_iteration * self.mipro_batch_size
1520
+
1521
+ max_time_seconds = data.get("max_time_seconds") or data.get("max_wall_clock_seconds")
1522
+ if max_time_seconds is not None and self.mipro_max_time_seconds is None:
1523
+ # Use event value only if TOML value wasn't set
1524
+ self.mipro_max_time_seconds = float(max_time_seconds)
1525
+
1526
+ self.mipro_current_iteration = iteration if iteration is not None else self.mipro_current_iteration
1527
+
1528
+ def _handle_mipro_iteration_complete(self, event_data: dict[str, Any]) -> None:
1529
+ """Handle MIPRO iteration completion event.
1530
+
1531
+ Updates progress tracking when an iteration completes, including:
1532
+ - Cumulative trial count
1533
+ - Current iteration number
1534
+
1535
+ Emits a progress update showing overall progress, trials completed,
1536
+ iterations, rollouts, tokens, and time.
1537
+
1538
+ Args:
1539
+ event_data: Event data dictionary containing:
1540
+ - data.iteration: Completed iteration number
1541
+ - data.cumulative: Cumulative trial count across all iterations
1542
+ """
1543
+ data = event_data.get("data", {})
1544
+ if not isinstance(data, dict):
1545
+ return
1546
+
1547
+ cumulative = data.get("cumulative")
1548
+ if cumulative is not None:
1549
+ self.mipro_completed_trials = cumulative
1550
+
1551
+ # Update current iteration
1552
+ iteration = data.get("iteration")
1553
+ if iteration is not None:
1554
+ self.mipro_current_iteration = iteration
1555
+
1556
+ # Emit progress update
1557
+ self._emit_mipro_progress()
1558
+
1559
+ def _handle_mipro_trial_complete(self, event_data: dict[str, Any]) -> None:
1560
+ """Handle MIPRO trial completion event (minibatch evaluation).
1561
+
1562
+ Processes minibatch trial completion events, which occur frequently during
1563
+ MIPRO optimization. Tracks:
1564
+ - Completed trial count
1565
+ - Rollouts completed (from num_seeds)
1566
+ - Minibatch scores (displayed if show_trial_results is True)
1567
+
1568
+ Displays trial results in GEPA-like format: [Trial X] Score: Y (Best: Z) N=W
1569
+ where N is the minibatch size. Emits throttled progress updates.
1570
+
1571
+ Args:
1572
+ event_data: Event data dictionary containing:
1573
+ - data.minibatch_score: Score from minibatch evaluation
1574
+ - data.iteration: Current iteration number
1575
+ - data.trial: Trial number within iteration
1576
+ - data.num_seeds: Number of seeds evaluated (minibatch size N)
1577
+ """
1578
+ data = event_data.get("data", {})
1579
+ if not isinstance(data, dict):
1580
+ return
1581
+
1582
+ # Increment completed trials counter
1583
+ self.mipro_completed_trials += 1
1584
+
1585
+ # Count rollouts from trial events
1586
+ num_seeds = data.get("num_seeds") or data.get("num_instances", 0)
1587
+ if num_seeds:
1588
+ self.mipro_rollouts_completed += num_seeds
1589
+
1590
+ # Show trial score (minibatch) - like GEPA trial format
1591
+ if self.show_trial_results:
1592
+ timestamp = datetime.now().strftime("%H:%M:%S")
1593
+ minibatch_score = data.get("minibatch_score")
1594
+ iteration = data.get("iteration")
1595
+ trial = data.get("trial")
1596
+
1597
+ if minibatch_score is not None:
1598
+ try:
1599
+ score_float = float(minibatch_score)
1600
+ # Calculate trial number for display
1601
+ if iteration is not None and trial is not None and self.mipro_trials_per_iteration:
1602
+ trial_num_display = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
1603
+ else:
1604
+ trial_num_display = self.mipro_completed_trials
1605
+
1606
+ n_str = f" N={num_seeds}" if num_seeds else ""
1607
+ best_str = f" (Best: {self.mipro_best_score:.4f})" if self.mipro_best_score > 0 else ""
1608
+
1609
+ self._write_log(
1610
+ f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f}{best_str}{n_str}"
1611
+ )
1612
+ except (ValueError, TypeError):
1613
+ pass
1614
+
1615
+ # Emit progress update after each trial (throttled internally)
1616
+ self._emit_mipro_progress()
1617
+
1618
+ def _handle_mipro_fulleval_complete(self, event_data: dict[str, Any]) -> None:
1619
+ """Handle MIPRO full evaluation completion event.
1620
+
1621
+ Processes full evaluation events, which occur less frequently than minibatch
1622
+ trials. Full evaluations use the full validation set and are more expensive.
1623
+ Only displays results if the score is "promising":
1624
+ - Better than current best score, OR
1625
+ - At least 5% improvement over baseline
1626
+
1627
+ Tracks rollouts from full evaluations and updates best score. Displays
1628
+ results with baseline comparison and improvement percentage.
1629
+
1630
+ Args:
1631
+ event_data: Event data dictionary containing:
1632
+ - data.score: Full evaluation score
1633
+ - data.iteration: Current iteration number
1634
+ - data.trial: Trial number within iteration
1635
+ - data.num_seeds: Number of seeds evaluated (full eval size)
1636
+ - data.seeds: List of seed values (alternative to num_seeds)
1637
+ """
1638
+ data = event_data.get("data", {})
1639
+ if not isinstance(data, dict):
1640
+ return
1641
+
1642
+ # Count rollouts from full eval
1643
+ num_seeds = data.get("num_seeds") or data.get("seeds", 0)
1644
+ if isinstance(num_seeds, list):
1645
+ num_seeds = len(num_seeds)
1646
+ if num_seeds:
1647
+ self.mipro_rollouts_completed += num_seeds
1648
+
1649
+ score = data.get("score")
1650
+ if score is None:
1651
+ return
1652
+
1653
+ try:
1654
+ score_float = float(score)
1655
+ except (ValueError, TypeError):
1656
+ return
1657
+
1658
+ # Initialize baseline if not set (use first score as baseline)
1659
+ if self.mipro_baseline_score is None:
1660
+ self.mipro_baseline_score = score_float
1661
+
1662
+ # Only show if score is promising:
1663
+ # - Better than current best, OR
1664
+ # - At least 5% improvement over baseline
1665
+ is_promising = False
1666
+ if score_float > self.mipro_best_score:
1667
+ self.mipro_best_score = score_float
1668
+ is_promising = True
1669
+ elif self.mipro_baseline_score is not None:
1670
+ improvement = score_float - self.mipro_baseline_score
1671
+ improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
1672
+ if improvement_pct >= 5.0: # At least 5% improvement over baseline
1673
+ is_promising = True
1674
+
1675
+ if is_promising:
1676
+ timestamp = datetime.now().strftime("%H:%M:%S")
1677
+ iteration = data.get("iteration")
1678
+ trial = data.get("trial")
1679
+ seeds = data.get("seeds") or data.get("num_seeds", 0)
1680
+ if isinstance(seeds, list):
1681
+ seeds = len(seeds)
1682
+
1683
+ # Format similar to GEPA trial results with N displayed
1684
+ iter_str = f" iter={iteration}" if iteration is not None else ""
1685
+ trial_str = f" trial={trial}" if trial is not None else ""
1686
+ n_str = f" N={seeds}" if seeds else ""
1687
+
1688
+ baseline_str = ""
1689
+ if self.mipro_baseline_score is not None:
1690
+ improvement = score_float - self.mipro_baseline_score
1691
+ improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
1692
+ baseline_str = f" (Baseline: {self.mipro_baseline_score:.4f}, +{improvement_pct:.1f}%)"
1693
+
1694
+ self._write_log(
1695
+ f"[{timestamp}] Full eval: Score={score_float:.4f} (Best: {self.mipro_best_score:.4f}){n_str}{baseline_str}{iter_str}{trial_str}"
1696
+ )
1697
+
1698
+ def _handle_mipro_new_incumbent(self, event_data: dict[str, Any]) -> None:
1699
+ """Handle MIPRO new incumbent event (best candidate found).
1700
+
1701
+ Processes events when MIPRO finds a new best candidate (incumbent).
1702
+ Updates the optimization curve and displays the result in GEPA-like format
1703
+ for consistency. Tracks cumulative trial count for curve visualization.
1704
+
1705
+ Args:
1706
+ event_data: Event data dictionary containing:
1707
+ - data.minibatch_score: Minibatch score of the new incumbent
1708
+ - data.best_score: Overall best score
1709
+ - data.iteration: Current iteration number
1710
+ - data.trial: Trial number within iteration
1711
+ - data.cumulative_trials: Cumulative trial count across iterations
1712
+ - data.num_seeds: Minibatch size (N)
1713
+ """
1714
+ data = event_data.get("data", {})
1715
+ if not isinstance(data, dict):
1716
+ return
1717
+
1718
+ timestamp = datetime.now().strftime("%H:%M:%S")
1719
+ minibatch_score = data.get("minibatch_score")
1720
+ best_score = data.get("best_score")
1721
+ iteration = data.get("iteration")
1722
+ trial = data.get("trial")
1723
+ num_seeds = data.get("num_seeds") # N for minibatch
1724
+
1725
+ if minibatch_score is None:
1726
+ return
1727
+
1728
+ try:
1729
+ score_float = float(minibatch_score)
1730
+ except (ValueError, TypeError):
1731
+ return
1732
+
1733
+ # Update best score if this is better
1734
+ if best_score is not None:
1735
+ best_float = float(best_score)
1736
+ if best_float > self.best_score_so_far:
1737
+ self.best_score_so_far = best_float
1738
+ elif score_float > self.best_score_so_far:
1739
+ self.best_score_so_far = score_float
1740
+
1741
+ # Track optimization curve
1742
+ if trial is not None:
1743
+ # Use cumulative trial count for x-axis
1744
+ cumulative_trials = data.get("cumulative_trials")
1745
+ if cumulative_trials is not None:
1746
+ trial_num = cumulative_trials
1747
+ else:
1748
+ # Estimate: (iteration * trials_per_iteration) + trial
1749
+ if iteration is not None and self.mipro_trials_per_iteration:
1750
+ trial_num = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
1751
+ else:
1752
+ trial_num = self.trial_counter + 1
1753
+
1754
+ self.optimization_curve.append((trial_num, self.best_score_so_far))
1755
+ self.trial_counter = trial_num
1756
+
1757
+ # Format like GEPA: [Trial X] Score: X (Best: Y) N=Z
1758
+ trial_num_display = self.trial_counter if self.trial_counter > 0 else (trial + 1 if trial is not None else 1)
1759
+ n_str = f" N={num_seeds}" if num_seeds is not None else ""
1760
+
1761
+ click.echo(
1762
+ f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f} (Best: {self.best_score_so_far:.4f}){n_str}"
1763
+ )
1764
+
1765
+ # Emit progress update after each trial (throttled internally)
1766
+ self._emit_mipro_progress()
1767
+
1768
+ def _handle_mipro_budget_update(self, event_data: dict[str, Any]) -> None:
1769
+ """Handle MIPRO budget update events.
1770
+
1771
+ Tracks token usage and cost accumulation during optimization. Updates:
1772
+ - Total tokens consumed (all operations)
1773
+ - Policy tokens (rollout tokens only)
1774
+ - Total cost in USD
1775
+ - Max token and cost limits (if provided in event)
1776
+
1777
+ Emits throttled progress updates to show budget consumption.
1778
+
1779
+ Args:
1780
+ event_data: Event data dictionary containing:
1781
+ - data.total_tokens: Total tokens consumed
1782
+ - data.policy_tokens: Tokens used for rollouts (policy only)
1783
+ - data.total_cost_usd: Total cost in USD
1784
+ - data.max_token_limit: Maximum token budget (optional)
1785
+ - data.max_spend_usd: Maximum cost budget (optional)
1786
+ """
1787
+ data = event_data.get("data", {})
1788
+ if not isinstance(data, dict):
1789
+ return
1790
+
1791
+ # Update token tracking
1792
+ total_tokens = data.get("total_tokens")
1793
+ if total_tokens is not None:
1794
+ self.mipro_total_tokens = total_tokens
1795
+
1796
+ # Track policy tokens separately (rollout tokens)
1797
+ policy_tokens = data.get("policy_tokens")
1798
+ if policy_tokens is not None:
1799
+ self.mipro_policy_tokens = policy_tokens
1800
+
1801
+ # Update cost tracking
1802
+ total_cost = data.get("total_cost_usd")
1803
+ if total_cost is not None:
1804
+ self.mipro_total_cost = total_cost
1805
+
1806
+ # Extract max limits if available in event data
1807
+ max_token_limit = data.get("max_token_limit")
1808
+ if max_token_limit is not None:
1809
+ self.mipro_max_tokens = max_token_limit
1810
+
1811
+ max_spend_usd = data.get("max_spend_usd")
1812
+ if max_spend_usd is not None:
1813
+ self.mipro_max_cost = max_spend_usd
1814
+
1815
+ # Emit progress update periodically (throttled)
1816
+ self._emit_mipro_progress()
1817
+
1818
+ def _emit_mipro_progress(self) -> None:
1819
+ """Emit a comprehensive progress update for MIPRO (throttled).
1820
+
1821
+ Formats and displays MIPRO progress in a format similar to GEPA for consistency.
1822
+ Shows:
1823
+ - Overall completion percentage
1824
+ - Trial progress (completed/total with remaining)
1825
+ - Iteration progress (current/total)
1826
+ - Rollout progress (completed/max)
1827
+ - Token usage (used/budget in millions)
1828
+ - Cost (USD)
1829
+ - Elapsed time and ETA
1830
+
1831
+ Progress updates are throttled to emit at most every 5 seconds to avoid
1832
+ overwhelming the console. This method is called after significant events
1833
+ (trial completion, iteration completion, budget updates).
1834
+
1835
+ Note:
1836
+ Only emits if start_time is set (job has started) and sufficient time
1837
+ has passed since the last update.
1838
+ """
1839
+ import time
1840
+
1841
+ if self.mipro_start_time is None:
1842
+ return
1843
+
1844
+ # Throttle progress updates - only emit every N seconds
1845
+ now = time.time()
1846
+ if self._last_progress_emit_time is not None:
1847
+ time_since_last = now - self._last_progress_emit_time
1848
+ if time_since_last < self._progress_emit_interval:
1849
+ return # Skip this update
1850
+
1851
+ self._last_progress_emit_time = now
1852
+
1853
+ timestamp = datetime.now().strftime("%H:%M:%S")
1854
+ elapsed = now - self.mipro_start_time
1855
+
1856
+ parts = []
1857
+
1858
+ # Overall progress percentage
1859
+ percent_overall = None
1860
+ if self.mipro_total_trials and self.mipro_completed_trials is not None:
1861
+ percent_overall = (self.mipro_completed_trials / self.mipro_total_trials) * 100
1862
+ parts.append(f"{int(percent_overall)}% complete")
1863
+
1864
+ # Trial progress (like rollouts in GEPA)
1865
+ if self.mipro_total_trials and self.mipro_completed_trials is not None:
1866
+ parts.append(f"trials={self.mipro_completed_trials}/{self.mipro_total_trials}")
1867
+ # Calculate remaining trials
1868
+ remaining_trials = self.mipro_total_trials - self.mipro_completed_trials
1869
+ if remaining_trials > 0:
1870
+ parts.append(f"rem={remaining_trials}")
1871
+ # Show percentage
1872
+ if percent_overall is not None:
1873
+ parts.append(f"({int(percent_overall)}%)")
1874
+ elif self.mipro_completed_trials is not None:
1875
+ parts.append(f"trials={self.mipro_completed_trials}")
1876
+
1877
+ # Iteration progress
1878
+ if self.mipro_num_iterations and self.mipro_current_iteration is not None:
1879
+ parts.append(f"iter={self.mipro_current_iteration + 1}/{self.mipro_num_iterations}")
1880
+
1881
+ # Rollouts completed vs max (like GEPA) - always show if we have any rollouts
1882
+ if self.mipro_rollouts_completed > 0:
1883
+ # Always try to show max if available (from TOML, event, or estimate)
1884
+ max_rollouts_to_show = self.mipro_max_rollouts
1885
+ if max_rollouts_to_show is None and self.mipro_total_trials and self.mipro_batch_size:
1886
+ # Estimate max rollouts from total trials if available
1887
+ max_rollouts_to_show = self.mipro_total_trials * self.mipro_batch_size
1888
+
1889
+ if max_rollouts_to_show:
1890
+ rollouts_pct = (self.mipro_rollouts_completed / max_rollouts_to_show) * 100
1891
+ parts.append(f"rollouts={self.mipro_rollouts_completed}/{max_rollouts_to_show} ({int(rollouts_pct)}%)")
1892
+ else:
1893
+ parts.append(f"rollouts={self.mipro_rollouts_completed}")
1894
+
1895
+ # Tokens (policy tokens only, like GEPA rollout_tokens) - always show max if available
1896
+ if self.mipro_policy_tokens > 0:
1897
+ rollout_tokens_millions = self.mipro_policy_tokens / 1_000_000.0
1898
+ if self.mipro_max_tokens:
1899
+ # Use max_tokens as budget for rollout tokens (approximation)
1900
+ budget_millions = self.mipro_max_tokens / 1_000_000.0
1901
+ tokens_pct = (self.mipro_policy_tokens / self.mipro_max_tokens * 100) if self.mipro_max_tokens > 0 else 0
1902
+ parts.append(f"tokens={rollout_tokens_millions:.2f}M/{budget_millions:.2f}M ({int(tokens_pct)}%)")
1903
+ else:
1904
+ parts.append(f"tokens={rollout_tokens_millions:.2f}M")
1905
+
1906
+ # Timing (elapsed out of max, like GEPA)
1907
+ elapsed_seconds = int(elapsed)
1908
+ if self.mipro_max_time_seconds:
1909
+ elapsed_pct = (elapsed / self.mipro_max_time_seconds * 100) if self.mipro_max_time_seconds > 0 else 0
1910
+ max_time_minutes = self.mipro_max_time_seconds / 60.0
1911
+ if elapsed_seconds >= 60:
1912
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min/{max_time_minutes:.1f}min ({int(elapsed_pct)}%)"
1913
+ else:
1914
+ elapsed_str = f"{elapsed_seconds}s/{int(self.mipro_max_time_seconds)}s ({int(elapsed_pct)}%)"
1915
+ else:
1916
+ if elapsed_seconds >= 60:
1917
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min"
1918
+ else:
1919
+ elapsed_str = f"{elapsed_seconds}s"
1920
+ parts.append(f"elapsed={elapsed_str}")
1921
+
1922
+ # ETA calculation (similar to GEPA) - always show if we have progress
1923
+ eta_seconds = None
1924
+ if self.mipro_completed_trials is not None and self.mipro_completed_trials > 0 and elapsed > 0:
1925
+ rate = self.mipro_completed_trials / elapsed
1926
+ if rate > 0:
1927
+ if self.mipro_total_trials:
1928
+ # Calculate ETA based on remaining trials
1929
+ remaining = self.mipro_total_trials - self.mipro_completed_trials
1930
+ if remaining > 0:
1931
+ eta_seconds = remaining / rate
1932
+ else:
1933
+ # Estimate based on iterations if we don't have total trials
1934
+ if self.mipro_num_iterations and self.mipro_current_iteration is not None:
1935
+ remaining_iterations = self.mipro_num_iterations - (self.mipro_current_iteration + 1)
1936
+ if remaining_iterations > 0 and self.mipro_trials_per_iteration:
1937
+ # Estimate: assume same rate for remaining iterations
1938
+ remaining_trials_estimate = remaining_iterations * self.mipro_trials_per_iteration
1939
+ eta_seconds = remaining_trials_estimate / rate
1940
+
1941
+ if eta_seconds is not None and eta_seconds > 0:
1942
+ eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
1943
+ parts.append(f"eta={eta_str}")
1944
+
1945
+ if parts:
1946
+ progress_msg = " ".join(parts)
1947
+ self._write_log(f"[{timestamp}] Progress: {progress_msg}")
1948
+
1949
+ def flush(self) -> None:
1950
+ """Flush buffered output and close log file."""
1951
+ if self._log_file_handle:
1952
+ try:
1953
+ from datetime import datetime
1954
+ self._log_file_handle.write("\n" + "=" * 80 + "\n")
1955
+ self._log_file_handle.write(f"Ended: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
1956
+ self._log_file_handle.write("=" * 80 + "\n")
1957
+ self._log_file_handle.flush()
1958
+ self._log_file_handle.close()
1959
+ except Exception:
1960
+ pass
1961
+ finally:
1962
+ self._log_file_handle = None
1963
+
1964
+ def _handle_proposal_scored(self, event_data: dict[str, Any]) -> None:
1965
+ """Handle GEPA proposal scored events (transformations).
1966
+
1967
+ Displays transformation/proposal scoring events from GEPA optimization.
1968
+ Only called if show_transformations is True (default: False) to avoid
1969
+ verbose output. Shows the score assigned to each proposed transformation.
1970
+
1971
+ Args:
1972
+ event_data: Event data dictionary containing:
1973
+ - data.score: Score assigned to the transformation/proposal
1974
+ """
1975
+ # Only called if show_transformations=True
1976
+ data = event_data.get("data", {})
1977
+ if not isinstance(data, dict):
1978
+ return
1979
+
1980
+ timestamp = datetime.now().strftime("%H:%M:%S")
1981
+ score = data.get("score")
1982
+ if score is not None:
1983
+ click.echo(f"[{timestamp}] Proposal scored: {score:.4f}")
1984
+
1985
+
1986
+ __all__ = [
1987
+ "GraphGenHandler",
1988
+ "BufferedHandler",
1989
+ "CallbackHandler",
1990
+ "CLIHandler",
1991
+ "PromptLearningHandler",
1992
+ "JSONHandler",
1993
+ "IntegrationTestHandler",
1994
+ "LossCurveHandler",
1995
+ "RichHandler",
1996
+ "StreamHandler",
1997
+ ]