synth-ai 0.2.6.dev1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. synth_ai/__init__.py +44 -24
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +103 -48
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +256 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +53 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  63. synth_ai/cli/commands/train/judge_validation.py +305 -0
  64. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  65. synth_ai/cli/commands/train/validation.py +392 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
  70. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  71. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  72. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  73. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
  75. synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +117 -51
  76. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/_common.py +16 -0
  81. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
  82. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  83. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
  84. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  85. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  86. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  87. synth_ai/cli/demo_apps/math/_common.py +16 -0
  88. synth_ai/cli/demo_apps/math/app.py +38 -0
  89. synth_ai/cli/demo_apps/math/config.toml +75 -0
  90. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  91. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  92. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  93. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  94. synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
  95. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  96. synth_ai/cli/demos/__init__.py +12 -0
  97. synth_ai/cli/demos/demo.py +32 -0
  98. synth_ai/cli/demos/rl_demo.py +254 -0
  99. synth_ai/cli/deploy.py +216 -0
  100. synth_ai/cli/infra/__init__.py +14 -0
  101. synth_ai/cli/{balance.py → infra/balance.py} +21 -3
  102. synth_ai/cli/infra/mcp.py +35 -0
  103. synth_ai/cli/infra/modal_app.py +36 -0
  104. synth_ai/cli/infra/setup.py +69 -0
  105. synth_ai/cli/infra/status.py +16 -0
  106. synth_ai/cli/infra/turso.py +77 -0
  107. synth_ai/cli/lib/__init__.py +10 -0
  108. synth_ai/cli/lib/agents.py +76 -0
  109. synth_ai/cli/lib/apps/modal_app.py +101 -0
  110. synth_ai/cli/lib/apps/task_app.py +642 -0
  111. synth_ai/cli/lib/bin.py +39 -0
  112. synth_ai/cli/lib/env.py +375 -0
  113. synth_ai/cli/lib/errors.py +85 -0
  114. synth_ai/cli/lib/modal.py +315 -0
  115. synth_ai/cli/lib/plotting.py +126 -0
  116. synth_ai/cli/lib/prompt_args.py +39 -0
  117. synth_ai/cli/lib/prompts.py +284 -0
  118. synth_ai/cli/lib/sqld.py +122 -0
  119. synth_ai/cli/lib/task_app_discovery.py +884 -0
  120. synth_ai/cli/lib/task_app_env.py +295 -0
  121. synth_ai/cli/lib/train_cfgs.py +300 -0
  122. synth_ai/cli/lib/tunnel_records.py +207 -0
  123. synth_ai/cli/local/__init__.py +14 -0
  124. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  125. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  126. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  127. synth_ai/cli/local/experiment_queue/config.py +128 -0
  128. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  129. synth_ai/cli/local/experiment_queue/database.py +175 -0
  130. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  131. synth_ai/cli/local/experiment_queue/models.py +231 -0
  132. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  133. synth_ai/cli/local/experiment_queue/results.py +373 -0
  134. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  135. synth_ai/cli/local/experiment_queue/service.py +344 -0
  136. synth_ai/cli/local/experiment_queue/status.py +372 -0
  137. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  138. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  139. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  140. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  141. synth_ai/cli/local/session/__init__.py +92 -0
  142. synth_ai/cli/local/session/client.py +383 -0
  143. synth_ai/cli/local/session/constants.py +63 -0
  144. synth_ai/cli/local/session/exceptions.py +105 -0
  145. synth_ai/cli/local/session/manager.py +139 -0
  146. synth_ai/cli/local/session/models.py +89 -0
  147. synth_ai/cli/local/session/query.py +110 -0
  148. synth_ai/cli/root.py +150 -102
  149. synth_ai/cli/task_apps/__init__.py +37 -0
  150. synth_ai/cli/task_apps/commands.py +3145 -0
  151. synth_ai/cli/task_apps/deploy.py +7 -0
  152. synth_ai/cli/task_apps/list.py +26 -0
  153. synth_ai/cli/task_apps/main.py +36 -0
  154. synth_ai/cli/task_apps/modal_serve.py +11 -0
  155. synth_ai/cli/task_apps/serve.py +11 -0
  156. synth_ai/cli/training/__init__.py +8 -0
  157. synth_ai/cli/training/train.py +5 -0
  158. synth_ai/cli/training/train_cfg.py +34 -0
  159. synth_ai/cli/{watch.py → training/watch.py} +13 -18
  160. synth_ai/cli/turso.py +52 -0
  161. synth_ai/cli/utils/__init__.py +8 -0
  162. synth_ai/cli/utils/experiments.py +235 -0
  163. synth_ai/cli/utils/queue.py +504 -0
  164. synth_ai/cli/{recent.py → utils/recent.py} +13 -7
  165. synth_ai/cli/{traces.py → utils/traces.py} +9 -5
  166. synth_ai/contracts/__init__.py +67 -0
  167. synth_ai/core/__init__.py +100 -0
  168. synth_ai/core/_utils/__init__.py +54 -0
  169. synth_ai/core/_utils/base_url.py +10 -0
  170. synth_ai/core/_utils/http.py +10 -0
  171. synth_ai/core/_utils/prompts.py +14 -0
  172. synth_ai/core/_utils/task_app_state.py +12 -0
  173. synth_ai/core/_utils/user_config.py +10 -0
  174. synth_ai/core/apps/common.py +116 -0
  175. synth_ai/core/auth.py +95 -0
  176. synth_ai/core/cfgs.py +240 -0
  177. synth_ai/core/config/__init__.py +16 -0
  178. synth_ai/core/config/base.py +168 -0
  179. synth_ai/core/config/resolver.py +89 -0
  180. synth_ai/core/env.py +231 -0
  181. synth_ai/core/errors.py +126 -0
  182. synth_ai/core/http.py +230 -0
  183. synth_ai/core/integrations/__init__.py +11 -0
  184. synth_ai/core/integrations/cloudflare.py +1710 -0
  185. synth_ai/core/integrations/mcp/__init__.py +6 -0
  186. synth_ai/core/integrations/mcp/__main__.py +8 -0
  187. synth_ai/core/integrations/mcp/claude.py +36 -0
  188. synth_ai/core/integrations/mcp/main.py +254 -0
  189. synth_ai/core/integrations/mcp/setup.py +100 -0
  190. synth_ai/core/integrations/modal.py +277 -0
  191. synth_ai/core/json.py +72 -0
  192. synth_ai/core/log_filter.py +99 -0
  193. synth_ai/core/logging.py +82 -0
  194. synth_ai/core/paths.py +107 -0
  195. synth_ai/core/pricing.py +109 -0
  196. synth_ai/core/process.py +233 -0
  197. synth_ai/core/ssl.py +25 -0
  198. synth_ai/core/storage/__init__.py +71 -0
  199. synth_ai/core/task_app_state.py +318 -0
  200. synth_ai/core/telemetry.py +282 -0
  201. synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
  202. synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
  203. synth_ai/core/tracing_v3/config.py +229 -0
  204. synth_ai/core/tracing_v3/constants.py +21 -0
  205. synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
  206. synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
  207. synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
  208. synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
  209. synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
  210. synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
  211. synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
  212. synth_ai/core/tracing_v3/serialization.py +130 -0
  213. synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
  214. synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
  215. synth_ai/core/tracing_v3/storage/config.py +109 -0
  216. synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
  217. synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
  218. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  219. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  220. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  221. synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
  222. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  223. synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
  224. synth_ai/core/urls.py +18 -0
  225. synth_ai/core/user_config.py +137 -0
  226. synth_ai/core/uvicorn.py +222 -0
  227. synth_ai/data/__init__.py +83 -0
  228. synth_ai/data/enums.py +123 -0
  229. synth_ai/data/rewards.py +152 -0
  230. synth_ai/data/traces.py +35 -0
  231. synth_ai/products/__init__.py +6 -0
  232. synth_ai/products/graph_evolve/__init__.py +46 -0
  233. synth_ai/products/graph_evolve/client.py +226 -0
  234. synth_ai/products/graph_evolve/config.py +591 -0
  235. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  236. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  237. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  238. synth_ai/products/graph_evolve/run.py +222 -0
  239. synth_ai/products/graph_gepa/__init__.py +23 -0
  240. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  241. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  242. synth_ai/sdk/__init__.py +123 -0
  243. synth_ai/sdk/api/__init__.py +1 -0
  244. synth_ai/sdk/api/models/supported.py +514 -0
  245. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  246. synth_ai/sdk/api/train/__init__.py +85 -0
  247. synth_ai/sdk/api/train/builders.py +895 -0
  248. synth_ai/sdk/api/train/cli.py +2199 -0
  249. synth_ai/sdk/api/train/config_finder.py +267 -0
  250. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  251. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  252. synth_ai/sdk/api/train/configs/rl.py +187 -0
  253. synth_ai/sdk/api/train/configs/sft.py +99 -0
  254. synth_ai/sdk/api/train/configs/shared.py +81 -0
  255. synth_ai/sdk/api/train/context_learning.py +312 -0
  256. synth_ai/sdk/api/train/env_resolver.py +418 -0
  257. synth_ai/sdk/api/train/graph_validators.py +216 -0
  258. synth_ai/sdk/api/train/graphgen.py +984 -0
  259. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  260. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  261. synth_ai/sdk/api/train/local_api.py +10 -0
  262. synth_ai/sdk/api/train/pollers.py +124 -0
  263. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  264. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  265. synth_ai/sdk/api/train/progress/events.py +326 -0
  266. synth_ai/sdk/api/train/progress/results.py +428 -0
  267. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  268. synth_ai/sdk/api/train/prompt_learning.py +469 -0
  269. synth_ai/sdk/api/train/rl.py +441 -0
  270. synth_ai/sdk/api/train/sft.py +396 -0
  271. synth_ai/sdk/api/train/summary.py +522 -0
  272. synth_ai/sdk/api/train/supported_algos.py +147 -0
  273. synth_ai/sdk/api/train/task_app.py +351 -0
  274. synth_ai/sdk/api/train/utils.py +279 -0
  275. synth_ai/sdk/api/train/validators.py +2424 -0
  276. synth_ai/sdk/graphs/__init__.py +15 -0
  277. synth_ai/sdk/graphs/completions.py +570 -0
  278. synth_ai/{inference → sdk/inference}/__init__.py +0 -1
  279. synth_ai/sdk/inference/client.py +128 -0
  280. synth_ai/sdk/jobs/__init__.py +16 -0
  281. synth_ai/sdk/jobs/client.py +371 -0
  282. synth_ai/sdk/judging/__init__.py +14 -0
  283. synth_ai/sdk/judging/base.py +24 -0
  284. synth_ai/sdk/judging/client.py +40 -0
  285. synth_ai/sdk/judging/schemas.py +222 -0
  286. synth_ai/sdk/judging/types.py +42 -0
  287. synth_ai/sdk/learning/__init__.py +99 -0
  288. synth_ai/sdk/learning/algorithms.py +14 -0
  289. synth_ai/{learning → sdk/learning}/client.py +121 -30
  290. synth_ai/sdk/learning/config.py +5 -0
  291. synth_ai/{learning → sdk/learning}/constants.py +0 -2
  292. synth_ai/sdk/learning/context_learning_client.py +531 -0
  293. synth_ai/sdk/learning/context_learning_types.py +292 -0
  294. synth_ai/sdk/learning/ft_client.py +7 -0
  295. synth_ai/{learning → sdk/learning}/health.py +15 -9
  296. synth_ai/{learning → sdk/learning}/jobs.py +44 -47
  297. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  298. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  299. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  300. synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
  301. synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
  302. synth_ai/sdk/learning/rl/config.py +31 -0
  303. synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
  304. synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
  305. synth_ai/sdk/learning/rl/secrets.py +13 -0
  306. synth_ai/sdk/learning/rl_client.py +5 -0
  307. synth_ai/sdk/learning/sft/__init__.py +29 -0
  308. synth_ai/sdk/learning/sft/client.py +95 -0
  309. synth_ai/sdk/learning/sft/config.py +270 -0
  310. synth_ai/sdk/learning/sft/data.py +698 -0
  311. synth_ai/sdk/learning/sse.py +57 -0
  312. synth_ai/sdk/learning/validators.py +52 -0
  313. synth_ai/sdk/localapi/__init__.py +40 -0
  314. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  315. synth_ai/sdk/localapi/client.py +10 -0
  316. synth_ai/sdk/localapi/contracts.py +10 -0
  317. synth_ai/sdk/localapi/helpers.py +519 -0
  318. synth_ai/sdk/localapi/rollouts.py +87 -0
  319. synth_ai/sdk/localapi/server.py +29 -0
  320. synth_ai/sdk/localapi/template.py +70 -0
  321. synth_ai/sdk/streaming/__init__.py +35 -0
  322. synth_ai/sdk/streaming/config.py +94 -0
  323. synth_ai/sdk/streaming/handlers.py +1997 -0
  324. synth_ai/sdk/streaming/streamer.py +713 -0
  325. synth_ai/sdk/streaming/types.py +112 -0
  326. synth_ai/sdk/task/__init__.py +164 -0
  327. synth_ai/sdk/task/apps/__init__.py +169 -0
  328. synth_ai/sdk/task/auth.py +165 -0
  329. synth_ai/sdk/task/client.py +175 -0
  330. synth_ai/sdk/task/config.py +257 -0
  331. synth_ai/sdk/task/contracts.py +219 -0
  332. synth_ai/sdk/task/datasets.py +108 -0
  333. synth_ai/sdk/task/errors.py +50 -0
  334. synth_ai/sdk/task/health.py +34 -0
  335. synth_ai/sdk/task/in_process.py +1190 -0
  336. synth_ai/sdk/task/in_process_runner.py +314 -0
  337. synth_ai/sdk/task/inference_api.py +299 -0
  338. synth_ai/sdk/task/json.py +111 -0
  339. synth_ai/sdk/task/proxy.py +287 -0
  340. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  341. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  342. synth_ai/sdk/task/rubrics/models.py +57 -0
  343. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  344. synth_ai/sdk/task/rubrics/strict.py +149 -0
  345. synth_ai/sdk/task/rubrics.py +219 -0
  346. synth_ai/sdk/task/server.py +631 -0
  347. synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
  348. synth_ai/sdk/task/tracing_utils.py +95 -0
  349. synth_ai/sdk/task/validators.py +441 -0
  350. synth_ai/sdk/task/vendors.py +59 -0
  351. synth_ai/sdk/training/__init__.py +102 -0
  352. synth_ai/sdk/tunnels/__init__.py +83 -0
  353. synth_ai/sdk/tunnels/cleanup.py +83 -0
  354. synth_ai/sdk/tunnels/ports.py +120 -0
  355. synth_ai/utils/__init__.py +213 -0
  356. synth_ai-0.4.3.dist-info/METADATA +262 -0
  357. synth_ai-0.4.3.dist-info/RECORD +370 -0
  358. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
  359. synth_ai/cli/calc.py +0 -69
  360. synth_ai/cli/demo.py +0 -131
  361. synth_ai/cli/legacy_root_backup.py +0 -470
  362. synth_ai/cli/man.py +0 -106
  363. synth_ai/cli/rl_demo.py +0 -137
  364. synth_ai/cli/status.py +0 -133
  365. synth_ai/config/base_url.py +0 -98
  366. synth_ai/core/experiment.py +0 -15
  367. synth_ai/core/system.py +0 -15
  368. synth_ai/demos/core/__init__.py +0 -1
  369. synth_ai/demos/core/cli.py +0 -685
  370. synth_ai/demos/demo_task_apps/__init__.py +0 -1
  371. synth_ai/demos/demo_task_apps/math/config.toml +0 -44
  372. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  373. synth_ai/environments/__init__.py +0 -31
  374. synth_ai/environments/environment/__init__.py +0 -1
  375. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  376. synth_ai/environments/environment/artifacts/base.py +0 -52
  377. synth_ai/environments/environment/core.py +0 -67
  378. synth_ai/environments/environment/db/__init__.py +0 -1
  379. synth_ai/environments/environment/db/sqlite.py +0 -45
  380. synth_ai/environments/environment/registry.py +0 -233
  381. synth_ai/environments/environment/resources/sqlite.py +0 -45
  382. synth_ai/environments/environment/results.py +0 -1
  383. synth_ai/environments/environment/rewards/__init__.py +0 -1
  384. synth_ai/environments/environment/rewards/core.py +0 -29
  385. synth_ai/environments/environment/shared_engine.py +0 -26
  386. synth_ai/environments/environment/tools/__init__.py +0 -200
  387. synth_ai/environments/examples/__init__.py +0 -1
  388. synth_ai/environments/examples/bandit/__init__.py +0 -33
  389. synth_ai/environments/examples/bandit/engine.py +0 -294
  390. synth_ai/environments/examples/bandit/environment.py +0 -194
  391. synth_ai/environments/examples/bandit/taskset.py +0 -200
  392. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  393. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  394. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  395. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  396. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  397. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  398. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  399. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  400. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -724
  401. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  402. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  403. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  404. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  405. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  406. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  407. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  408. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  409. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  410. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  411. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  412. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  413. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  414. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  415. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  416. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  417. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  418. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  419. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  420. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  421. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  422. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  423. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  424. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  425. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  426. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  427. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  428. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  429. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  430. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  431. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  432. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  433. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  434. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  435. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  436. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  437. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  438. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  439. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  440. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  441. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  442. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  443. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  444. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  445. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  446. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  447. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  448. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  449. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  450. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  451. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  452. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  453. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  454. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  455. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  456. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  457. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  458. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  459. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  460. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  461. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  462. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  463. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  464. synth_ai/environments/examples/crafter_classic/environment.py +0 -404
  465. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  466. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  467. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  468. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  469. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  470. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  471. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  472. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  473. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  474. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  475. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  476. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  477. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  478. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  479. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  480. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  481. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  482. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  483. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  484. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  485. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  486. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  487. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  488. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  489. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  490. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  491. synth_ai/environments/examples/enron/engine.py +0 -295
  492. synth_ai/environments/examples/enron/environment.py +0 -166
  493. synth_ai/environments/examples/enron/taskset.py +0 -112
  494. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  495. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  496. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  497. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  498. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  499. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  500. synth_ai/environments/examples/minigrid/engine.py +0 -589
  501. synth_ai/environments/examples/minigrid/environment.py +0 -274
  502. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  503. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  504. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  505. synth_ai/environments/examples/nethack/__init__.py +0 -7
  506. synth_ai/environments/examples/nethack/achievements.py +0 -337
  507. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  508. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  509. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  510. synth_ai/environments/examples/nethack/engine.py +0 -739
  511. synth_ai/environments/examples/nethack/environment.py +0 -256
  512. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  513. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  514. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  515. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  516. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  517. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  518. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  519. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  520. synth_ai/environments/examples/nethack/taskset.py +0 -323
  521. synth_ai/environments/examples/red/__init__.py +0 -7
  522. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  523. synth_ai/environments/examples/red/config_logging.py +0 -110
  524. synth_ai/environments/examples/red/engine.py +0 -694
  525. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  526. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  527. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  528. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  529. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  530. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  531. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  532. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  533. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  534. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  535. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  536. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  537. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  538. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  539. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  540. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  541. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  542. synth_ai/environments/examples/red/environment.py +0 -238
  543. synth_ai/environments/examples/red/taskset.py +0 -79
  544. synth_ai/environments/examples/red/units/__init__.py +0 -1
  545. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  546. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  547. synth_ai/environments/examples/sokoban/engine.py +0 -678
  548. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  549. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  550. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  551. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  552. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  553. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  554. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  555. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  556. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  557. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  558. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  559. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  560. synth_ai/environments/examples/sokoban/environment.py +0 -229
  561. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  562. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  563. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  564. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  565. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  566. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  567. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  568. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  569. synth_ai/environments/examples/verilog/__init__.py +0 -10
  570. synth_ai/environments/examples/verilog/engine.py +0 -329
  571. synth_ai/environments/examples/verilog/environment.py +0 -350
  572. synth_ai/environments/examples/verilog/taskset.py +0 -420
  573. synth_ai/environments/examples/wordle/__init__.py +0 -29
  574. synth_ai/environments/examples/wordle/engine.py +0 -398
  575. synth_ai/environments/examples/wordle/environment.py +0 -159
  576. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  577. synth_ai/environments/examples/wordle/taskset.py +0 -230
  578. synth_ai/environments/reproducibility/core.py +0 -42
  579. synth_ai/environments/reproducibility/helpers.py +0 -0
  580. synth_ai/environments/reproducibility/tree.py +0 -364
  581. synth_ai/environments/service/app.py +0 -91
  582. synth_ai/environments/service/core_routes.py +0 -1020
  583. synth_ai/environments/service/external_registry.py +0 -56
  584. synth_ai/environments/service/registry.py +0 -9
  585. synth_ai/environments/stateful/__init__.py +0 -1
  586. synth_ai/environments/stateful/core.py +0 -163
  587. synth_ai/environments/stateful/engine.py +0 -21
  588. synth_ai/environments/stateful/state.py +0 -7
  589. synth_ai/environments/tasks/api.py +0 -19
  590. synth_ai/environments/tasks/core.py +0 -80
  591. synth_ai/environments/tasks/filters.py +0 -41
  592. synth_ai/environments/tasks/utils.py +0 -91
  593. synth_ai/environments/v0_observability/history.py +0 -3
  594. synth_ai/environments/v0_observability/log.py +0 -2
  595. synth_ai/evals/base.py +0 -15
  596. synth_ai/experimental/synth_oss.py +0 -446
  597. synth_ai/http.py +0 -102
  598. synth_ai/inference/client.py +0 -20
  599. synth_ai/install_sqld.sh +0 -40
  600. synth_ai/jobs/client.py +0 -246
  601. synth_ai/learning/__init__.py +0 -24
  602. synth_ai/learning/config.py +0 -43
  603. synth_ai/learning/filtering.py +0 -0
  604. synth_ai/learning/ft_client.py +0 -59
  605. synth_ai/learning/offline/dpo.py +0 -0
  606. synth_ai/learning/offline/providers.py +0 -7
  607. synth_ai/learning/offline/sft.py +0 -0
  608. synth_ai/learning/offline/shared.py +0 -0
  609. synth_ai/learning/online/grpo.py +0 -0
  610. synth_ai/learning/online/irft.py +0 -0
  611. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  612. synth_ai/learning/prompts/gepa.py +0 -0
  613. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  614. synth_ai/learning/prompts/mipro.py +0 -289
  615. synth_ai/learning/prompts/random_search.py +0 -246
  616. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  617. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  618. synth_ai/learning/sse.py +0 -58
  619. synth_ai/learning/validators.py +0 -48
  620. synth_ai/lm/__init__.py +0 -51
  621. synth_ai/lm/caching/constants.py +0 -6
  622. synth_ai/lm/caching/dbs.py +0 -0
  623. synth_ai/lm/caching/ephemeral.py +0 -102
  624. synth_ai/lm/caching/handler.py +0 -137
  625. synth_ai/lm/caching/initialize.py +0 -11
  626. synth_ai/lm/caching/persistent.py +0 -114
  627. synth_ai/lm/config.py +0 -110
  628. synth_ai/lm/constants.py +0 -32
  629. synth_ai/lm/core/__init__.py +0 -8
  630. synth_ai/lm/core/all.py +0 -73
  631. synth_ai/lm/core/exceptions.py +0 -7
  632. synth_ai/lm/core/main.py +0 -319
  633. synth_ai/lm/core/main_v3.py +0 -594
  634. synth_ai/lm/core/synth_models.py +0 -48
  635. synth_ai/lm/core/vendor_clients.py +0 -188
  636. synth_ai/lm/cost/__init__.py +0 -0
  637. synth_ai/lm/cost/monitor.py +0 -1
  638. synth_ai/lm/cost/statefulness.py +0 -1
  639. synth_ai/lm/injection.py +0 -80
  640. synth_ai/lm/overrides.py +0 -206
  641. synth_ai/lm/provider_support/__init__.py +0 -8
  642. synth_ai/lm/provider_support/anthropic.py +0 -972
  643. synth_ai/lm/provider_support/openai.py +0 -1139
  644. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  645. synth_ai/lm/structured_outputs/__init__.py +0 -0
  646. synth_ai/lm/structured_outputs/handler.py +0 -440
  647. synth_ai/lm/structured_outputs/inject.py +0 -297
  648. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  649. synth_ai/lm/tools/__init__.py +0 -3
  650. synth_ai/lm/tools/base.py +0 -172
  651. synth_ai/lm/unified_interface.py +0 -202
  652. synth_ai/lm/vendors/__init__.py +0 -0
  653. synth_ai/lm/vendors/base.py +0 -81
  654. synth_ai/lm/vendors/core/__init__.py +0 -0
  655. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  656. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  657. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  658. synth_ai/lm/vendors/core/openai_api.py +0 -220
  659. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  660. synth_ai/lm/vendors/local/__init__.py +0 -0
  661. synth_ai/lm/vendors/local/ollama.py +0 -0
  662. synth_ai/lm/vendors/openai_standard.py +0 -780
  663. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  664. synth_ai/lm/vendors/retries.py +0 -22
  665. synth_ai/lm/vendors/supported/__init__.py +0 -0
  666. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  667. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  668. synth_ai/lm/vendors/supported/grok.py +0 -75
  669. synth_ai/lm/vendors/supported/groq.py +0 -16
  670. synth_ai/lm/vendors/supported/ollama.py +0 -15
  671. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  672. synth_ai/lm/vendors/supported/together.py +0 -11
  673. synth_ai/lm/vendors/synth_client.py +0 -808
  674. synth_ai/lm/warmup.py +0 -186
  675. synth_ai/rl/secrets.py +0 -19
  676. synth_ai/scripts/verify_rewards.py +0 -100
  677. synth_ai/task/__init__.py +0 -10
  678. synth_ai/task/contracts.py +0 -120
  679. synth_ai/task/health.py +0 -28
  680. synth_ai/task/validators.py +0 -12
  681. synth_ai/tracing/__init__.py +0 -30
  682. synth_ai/tracing_v1/__init__.py +0 -33
  683. synth_ai/tracing_v3/config.py +0 -84
  684. synth_ai/tracing_v3/storage/config.py +0 -62
  685. synth_ai/tracing_v3/turso/__init__.py +0 -25
  686. synth_ai/tracing_v3/turso/daemon.py +0 -144
  687. synth_ai/tracing_v3/turso/manager.py +0 -760
  688. synth_ai/v0/tracing/__init__.py +0 -0
  689. synth_ai/v0/tracing/abstractions.py +0 -224
  690. synth_ai/v0/tracing/base_client.py +0 -91
  691. synth_ai/v0/tracing/client_manager.py +0 -131
  692. synth_ai/v0/tracing/config.py +0 -140
  693. synth_ai/v0/tracing/context.py +0 -146
  694. synth_ai/v0/tracing/decorators.py +0 -680
  695. synth_ai/v0/tracing/events/__init__.py +0 -0
  696. synth_ai/v0/tracing/events/manage.py +0 -147
  697. synth_ai/v0/tracing/events/scope.py +0 -86
  698. synth_ai/v0/tracing/events/store.py +0 -228
  699. synth_ai/v0/tracing/immediate_client.py +0 -151
  700. synth_ai/v0/tracing/local.py +0 -18
  701. synth_ai/v0/tracing/log_client_base.py +0 -73
  702. synth_ai/v0/tracing/retry_queue.py +0 -186
  703. synth_ai/v0/tracing/trackers.py +0 -515
  704. synth_ai/v0/tracing/upload.py +0 -510
  705. synth_ai/v0/tracing/utils.py +0 -9
  706. synth_ai/v0/tracing_v1/__init__.py +0 -16
  707. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  708. synth_ai/v0/tracing_v1/base_client.py +0 -91
  709. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  710. synth_ai/v0/tracing_v1/config.py +0 -140
  711. synth_ai/v0/tracing_v1/context.py +0 -146
  712. synth_ai/v0/tracing_v1/decorators.py +0 -701
  713. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  714. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  715. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  716. synth_ai/v0/tracing_v1/events/store.py +0 -228
  717. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  718. synth_ai/v0/tracing_v1/local.py +0 -18
  719. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  720. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  721. synth_ai/v0/tracing_v1/trackers.py +0 -515
  722. synth_ai/v0/tracing_v1/upload.py +0 -525
  723. synth_ai/v0/tracing_v1/utils.py +0 -9
  724. synth_ai/zyk/__init__.py +0 -30
  725. synth_ai-0.2.6.dev1.dist-info/METADATA +0 -106
  726. synth_ai-0.2.6.dev1.dist-info/RECORD +0 -416
  727. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  728. /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
  729. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  730. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  731. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  732. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  733. /synth_ai/{compound/cais.py → py.typed} +0 -0
  734. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  735. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  736. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
  737. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
  738. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1997 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import json
5
+ import re
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from collections import deque
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+
13
+ import click
14
+
15
+ from .types import StreamMessage, StreamType
16
+
17
+
18
+ def _mask_sensitive_urls(text: str) -> str:
19
+ """Mask S3/Wasabi URLs and sensitive paths in log messages.
20
+
21
+ Replaces full S3/Wasabi URLs with masked versions to prevent leaking
22
+ bucket names, paths, and infrastructure details in public SDK logs.
23
+
24
+ Examples:
25
+ s3://synth-artifacts/models/... -> s3://***/***/[masked]
26
+ Wasabi s3://bucket/path/file.tar.gz -> Wasabi s3://***/***/[masked]
27
+ """
28
+ if not text:
29
+ return text
30
+
31
+ # Pattern matches:
32
+ # - Optional "Wasabi " prefix
33
+ # - s3:// or http(s):// scheme
34
+ # - Any bucket/host
35
+ # - Any path
36
+ # - Common model file extensions
37
+ pattern = r'(Wasabi\s+)?((s3|https?)://[^\s]+\.(tar\.gz|zip|pt|pth|safetensors|ckpt|bin))'
38
+
39
+ def replace_url(match: re.Match) -> str:
40
+ prefix = match.group(1) or "" # "Wasabi " or empty
41
+ url = match.group(2)
42
+ # Extract just the filename
43
+ filename = url.split("/")[-1] if "/" in url else "file"
44
+ return f'{prefix}s3://***/***/[{filename}]'
45
+
46
+ return re.sub(pattern, replace_url, text, flags=re.IGNORECASE)
47
+
48
+
49
+ class StreamHandler(ABC):
50
+ """Base class for log handlers that consume ``StreamMessage`` objects."""
51
+
52
+ @abstractmethod
53
+ def handle(self, message: StreamMessage) -> None:
54
+ """Process a message produced by the streamer."""
55
+
56
+ def should_handle(self, message: StreamMessage) -> bool: # pragma: no cover - trivial
57
+ """Predicate allowing handlers to filter messages before processing."""
58
+ return True
59
+
60
+ def flush(self) -> None: # pragma: no cover - optional
61
+ """Flush buffered output."""
62
+ return None
63
+
64
+
65
+ class CLIHandler(StreamHandler):
66
+ """Simple CLI output mirroring current poller behaviour."""
67
+
68
+ def __init__(
69
+ self,
70
+ *,
71
+ hidden_event_types: set[str] | None = None,
72
+ hidden_event_substrings: set[str] | None = None,
73
+ ) -> None:
74
+ self._hidden_event_types = set(hidden_event_types or set())
75
+ self._hidden_event_substrings = {s.lower() for s in (hidden_event_substrings or set())}
76
+
77
+ def handle(self, message: StreamMessage) -> None:
78
+ if not self.should_handle(message):
79
+ return
80
+
81
+ timestamp = datetime.now().strftime("%H:%M:%S")
82
+ if message.stream_type is StreamType.STATUS:
83
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
84
+ click.echo(f"[{timestamp}] status={status}")
85
+ return
86
+
87
+ if message.stream_type is StreamType.EVENTS:
88
+ event_type = message.data.get("type", "event")
89
+ if event_type in self._hidden_event_types:
90
+ return
91
+ level = message.data.get("level")
92
+ msg = message.data.get("message") or ""
93
+ # Evaluate substring filters against lower-cased concatenated text
94
+ if self._hidden_event_substrings:
95
+ blob = " ".join(
96
+ [
97
+ event_type or "",
98
+ str(msg),
99
+ json.dumps(message.data.get("data", "")),
100
+ ]
101
+ ).lower()
102
+ if any(sub in blob for sub in self._hidden_event_substrings):
103
+ return
104
+ prefix = f"[{timestamp}] [{message.seq}] {event_type}"
105
+ if level:
106
+ prefix += f" ({level})"
107
+ # Mask sensitive URLs before displaying
108
+ sanitized_msg = _mask_sensitive_urls(msg)
109
+
110
+ # For error events, show full details including underlying errors
111
+ if level == "error" or event_type.endswith(".failed"):
112
+ click.echo(f"{prefix}: {sanitized_msg}")
113
+ # Show error details from data field if available
114
+ data = message.data.get("data", {})
115
+ if isinstance(data, dict):
116
+ error_detail = data.get("detail") or data.get("error") or data.get("error_detail")
117
+ if error_detail and str(error_detail) != sanitized_msg:
118
+ # Show underlying error if different from main message
119
+ click.echo(f" Error details: {error_detail}")
120
+ # Show traceback or stack if available
121
+ traceback_info = data.get("traceback") or data.get("stack")
122
+ if traceback_info:
123
+ lines = str(traceback_info).split("\n")
124
+ # Show last few lines of traceback (most relevant)
125
+ for line in lines[-5:]:
126
+ if line.strip():
127
+ click.echo(f" {line}")
128
+ else:
129
+ click.echo(f"{prefix}: {sanitized_msg}".rstrip(": "))
130
+
131
+ data = message.data.get("data") if isinstance(message.data.get("data"), dict) else {}
132
+ if event_type == "prompt.learning.mipro.complete" and data:
133
+ best_prompt = data.get("best_prompt")
134
+ if isinstance(best_prompt, dict):
135
+ sections = best_prompt.get("sections")
136
+ if isinstance(sections, list) and sections:
137
+ click.echo(" --- BEST PROMPT ---")
138
+ for section in sections:
139
+ if not isinstance(section, dict):
140
+ continue
141
+ role = section.get("role", "unknown").upper()
142
+ name = section.get("name")
143
+ header = f" [{role}]"
144
+ if name:
145
+ header += f" {name}"
146
+ click.echo(header)
147
+ content = section.get("content", "")
148
+ if isinstance(content, str) and content:
149
+ click.echo(f" {content}")
150
+ click.echo(" -------------------")
151
+
152
+ if event_type == "mipro.topk.evaluated" and data:
153
+ rank = data.get("rank")
154
+ train_score = data.get("train_score")
155
+ test_score = data.get("test_score")
156
+ instruction_text = data.get("instruction_text", "")
157
+ demo_indices = data.get("demo_indices", [])
158
+ lift_abs = data.get("lift_absolute")
159
+ lift_pct = data.get("lift_percent")
160
+ stage_payloads = data.get("stage_payloads", {})
161
+ details: list[str] = []
162
+ if rank is not None:
163
+ details.append(f"Rank {rank}")
164
+ if isinstance(train_score, int | float):
165
+ train_score_float = float(train_score)
166
+ details.append(f"train={train_score_float:.3f} ({train_score_float*100:.1f}%)")
167
+ if isinstance(test_score, int | float):
168
+ test_score_float = float(test_score)
169
+ details.append(f"test={test_score_float:.3f} ({test_score_float*100:.1f}%)")
170
+ if isinstance(lift_abs, int | float) and isinstance(lift_pct, int | float):
171
+ details.append(f"lift={lift_abs:+.3f} ({lift_pct:+.1f}%)")
172
+ if details:
173
+ click.echo(" --- TOP-K CANDIDATE ---")
174
+ click.echo(f" {' | '.join(details)}")
175
+ if isinstance(instruction_text, str) and instruction_text.strip():
176
+ snippet = instruction_text.strip()
177
+ click.echo(f" Instruction: {snippet}")
178
+ if isinstance(demo_indices, list) and demo_indices:
179
+ click.echo(f" Demo indices: {demo_indices}")
180
+
181
+ # Display per-stage information if available
182
+ if isinstance(stage_payloads, dict) and stage_payloads:
183
+ click.echo(" Per-stage breakdown:")
184
+ for stage_id, payload in stage_payloads.items():
185
+ if isinstance(payload, dict):
186
+ module_id = payload.get("module_id", stage_id)
187
+ instr_ids = payload.get("instruction_indices", [])
188
+ demo_ids = payload.get("demo_indices", [])
189
+ click.echo(f" [{module_id}/{stage_id}] instr_ids={instr_ids} demo_ids={demo_ids}")
190
+
191
+ seed_scores = data.get("test_seed_scores")
192
+ if isinstance(seed_scores, list) and seed_scores:
193
+ formatted_scores = ", ".join(
194
+ f"{item.get('seed')}: {item.get('score'):.2f}"
195
+ for item in seed_scores
196
+ if isinstance(item, dict) and isinstance(item.get("seed"), int) and isinstance(item.get("score"), int | float)
197
+ )
198
+ if formatted_scores:
199
+ click.echo(f" Test per-seed: {formatted_scores}")
200
+ click.echo(" ----------------------")
201
+ return
202
+
203
+ if message.stream_type is StreamType.METRICS:
204
+ name = message.data.get("name")
205
+ value = message.data.get("value")
206
+ step = message.data.get("step")
207
+ data = message.data.get("data", {})
208
+
209
+ # Format metric display
210
+ metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
211
+ if step is not None:
212
+ metric_str += f" (step={step})"
213
+
214
+ # Add any additional context from data field
215
+ if isinstance(data, dict):
216
+ n = data.get("n")
217
+ if n is not None:
218
+ metric_str += f" n={n}"
219
+
220
+ click.echo(metric_str)
221
+ return
222
+
223
+ if message.stream_type is StreamType.TIMELINE:
224
+ phase = message.data.get("phase", "phase")
225
+ click.echo(f"[{timestamp}] timeline={phase}")
226
+
227
+
228
+ class JSONHandler(StreamHandler):
229
+ """Emit messages as JSON lines suitable for machine parsing."""
230
+
231
+ def __init__(self, output_file: str | None = None, *, indent: int | None = None) -> None:
232
+ self.output_file = Path(output_file).expanduser() if output_file else None
233
+ self._indent = indent
234
+
235
+ def handle(self, message: StreamMessage) -> None:
236
+ if not self.should_handle(message):
237
+ return
238
+
239
+ payload: dict[str, Any] = {
240
+ "stream_type": message.stream_type.name,
241
+ "timestamp": message.timestamp,
242
+ "job_id": message.job_id,
243
+ "data": message.data,
244
+ }
245
+ if message.seq is not None:
246
+ payload["seq"] = message.seq
247
+ if message.step is not None:
248
+ payload["step"] = message.step
249
+ if message.phase is not None:
250
+ payload["phase"] = message.phase
251
+
252
+ line = json.dumps(payload, indent=self._indent)
253
+ if self.output_file:
254
+ with self.output_file.open("a", encoding="utf-8") as fh:
255
+ fh.write(line)
256
+ if self._indent is None:
257
+ fh.write("\n")
258
+ else:
259
+ click.echo(line)
260
+
261
+ def flush(self) -> None:
262
+ return None
263
+
264
+
265
+ class CallbackHandler(StreamHandler):
266
+ """Invoke user-provided callbacks for specific stream types."""
267
+
268
+ def __init__(
269
+ self,
270
+ *,
271
+ on_status: Callable[[dict[str, Any]], None] | None = None,
272
+ on_event: Callable[[dict[str, Any]], None] | None = None,
273
+ on_metric: Callable[[dict[str, Any]], None] | None = None,
274
+ on_timeline: Callable[[dict[str, Any]], None] | None = None,
275
+ ) -> None:
276
+ self._on_status = on_status
277
+ self._on_event = on_event
278
+ self._on_metric = on_metric
279
+ self._on_timeline = on_timeline
280
+
281
+ def handle(self, message: StreamMessage) -> None:
282
+ if not self.should_handle(message):
283
+ return
284
+
285
+ if message.stream_type is StreamType.STATUS and self._on_status:
286
+ self._on_status(message.data)
287
+ elif message.stream_type is StreamType.EVENTS and self._on_event:
288
+ self._on_event(message.data)
289
+ elif message.stream_type is StreamType.METRICS and self._on_metric:
290
+ self._on_metric(message.data)
291
+ elif message.stream_type is StreamType.TIMELINE and self._on_timeline:
292
+ self._on_timeline(message.data)
293
+
294
+
295
+ class BufferedHandler(StreamHandler):
296
+ """Collect messages and emit them in batches."""
297
+
298
+ def __init__(self, *, flush_interval: float = 5.0, max_buffer_size: int = 100) -> None:
299
+ self.flush_interval = flush_interval
300
+ self.max_buffer_size = max_buffer_size
301
+ self._buffer: list[StreamMessage] = []
302
+ self._last_flush = time.time()
303
+
304
+ def handle(self, message: StreamMessage) -> None:
305
+ if not self.should_handle(message):
306
+ return
307
+
308
+ self._buffer.append(message)
309
+ now = time.time()
310
+ if len(self._buffer) >= self.max_buffer_size or now - self._last_flush >= self.flush_interval:
311
+ self.flush()
312
+
313
+ def flush(self) -> None:
314
+ if not self._buffer:
315
+ return
316
+ self.process_batch(self._buffer)
317
+ self._buffer.clear()
318
+ self._last_flush = time.time()
319
+
320
+ def process_batch(self, messages: list[StreamMessage]) -> None: # pragma: no cover - abstract
321
+ """Override to define how buffered messages should be processed."""
322
+
323
+
324
+ class IntegrationTestHandler(StreamHandler):
325
+ """Collect messages for integration tests or programmatic assertions."""
326
+
327
+ def __init__(self) -> None:
328
+ self.messages: list[StreamMessage] = []
329
+
330
+ def handle(self, message: StreamMessage) -> None:
331
+ self.messages.append(message)
332
+
333
+ def clear(self) -> None:
334
+ self.messages.clear()
335
+
336
+
337
+ class GraphGenHandler(StreamHandler):
338
+ """Handler for ADAS jobs that delegate child job streams to an underlying handler.
339
+
340
+ ADAS jobs emit events from child jobs (GEPA, MIPRO, RL, SFT, etc.). This handler
341
+ provides light ADAS-aware filtering and routing while keeping child job output
342
+ intact via a delegate handler. The delegate can be supplied directly or created
343
+ via a factory; by default we choose a prompt-learning handler for GEPA/MIPRO and
344
+ a basic CLI handler for other job types.
345
+ """
346
+
347
+ def __init__(
348
+ self,
349
+ *,
350
+ child_handler: StreamHandler | None = None,
351
+ child_handler_factory: Callable[[str | None], StreamHandler | None] | None = None,
352
+ show_trial_results: bool = True,
353
+ show_transformations: bool = False,
354
+ show_validation: bool = True,
355
+ filter_verbose_events: bool = True,
356
+ wrap_child_events: bool = True,
357
+ ) -> None:
358
+ # User-supplied delegate or factory; both are optional.
359
+ self.child_handler = child_handler
360
+ self._child_handler_factory = child_handler_factory
361
+
362
+ # Options for the default prompt-learning delegate
363
+ self._pl_show_trial_results = show_trial_results
364
+ self._pl_show_transformations = show_transformations
365
+ self._pl_show_validation = show_validation
366
+
367
+ self.filter_verbose_events = filter_verbose_events
368
+ # If False, skip ADAS-specific filtering/transformations and just pass through.
369
+ self.wrap_child_events = wrap_child_events
370
+
371
+ # Detected child job type (gepa/mipro/rl/sft/etc.)
372
+ self.child_job_type: str | None = None
373
+ # Track whether we created the delegate automatically (so we can swap if needed)
374
+ self._delegate_auto_created = False
375
+
376
+ def handle(self, message: StreamMessage) -> None:
377
+ if not self.should_handle(message):
378
+ return
379
+
380
+ if message.stream_type is StreamType.EVENTS:
381
+ self._detect_child_job_type(message)
382
+ self._maybe_reset_delegate_for_child_type()
383
+
384
+ if self.wrap_child_events and self.filter_verbose_events:
385
+ if self._should_filter_event(message):
386
+ return
387
+
388
+ if self.wrap_child_events:
389
+ message = self._transform_event_message(message)
390
+
391
+ delegate = self._get_child_handler()
392
+ if delegate:
393
+ delegate.handle(message)
394
+
395
+ def _get_child_handler(self) -> StreamHandler:
396
+ """Return or create the delegate handler used for child job events."""
397
+ if self.child_handler:
398
+ return self.child_handler
399
+
400
+ handler: StreamHandler | None = None
401
+ if self._child_handler_factory:
402
+ handler = self._child_handler_factory(self.child_job_type)
403
+
404
+ if handler is None:
405
+ # Choose a sensible default based on detected child job type
406
+ if self._is_prompt_learning_type(self.child_job_type):
407
+ handler = PromptLearningHandler(
408
+ show_trial_results=self._pl_show_trial_results,
409
+ show_transformations=self._pl_show_transformations,
410
+ show_validation=self._pl_show_validation,
411
+ )
412
+ else:
413
+ handler = CLIHandler()
414
+
415
+ self.child_handler = handler
416
+ self._delegate_auto_created = self._child_handler_factory is None and self.child_handler is not None
417
+ return handler
418
+
419
+ def _detect_child_job_type(self, message: StreamMessage) -> None:
420
+ """Infer the child job type from event types."""
421
+ if self.child_job_type:
422
+ return
423
+
424
+ event_type = str(message.data.get("type") or "").lower()
425
+ if not event_type:
426
+ return
427
+
428
+ if event_type.startswith("graph_evolve."):
429
+ self.child_job_type = "graph_evolve"
430
+ elif "mipro" in event_type:
431
+ self.child_job_type = "mipro"
432
+ elif "gepa" in event_type or event_type.startswith("prompt.learning"):
433
+ self.child_job_type = "prompt_learning"
434
+ elif event_type.startswith("rl.") or ".rl." in event_type:
435
+ self.child_job_type = "rl"
436
+ elif event_type.startswith("sft.") or ".sft." in event_type:
437
+ self.child_job_type = "sft"
438
+ else:
439
+ # Fall back to the first segment as a hint (e.g., "adas.child_type")
440
+ parts = event_type.split(".")
441
+ if parts:
442
+ self.child_job_type = parts[0]
443
+
444
+ def _maybe_reset_delegate_for_child_type(self) -> None:
445
+ """Swap out auto-created delegates when we later detect a different child type."""
446
+ if not self.child_handler or not self._delegate_auto_created:
447
+ return
448
+
449
+ # If the detected type does not match the current delegate choice, rebuild.
450
+ wants_prompt_learning = self._is_prompt_learning_type(self.child_job_type)
451
+ has_prompt_learning_handler = isinstance(self.child_handler, PromptLearningHandler)
452
+
453
+ if wants_prompt_learning and not has_prompt_learning_handler:
454
+ self.child_handler = None
455
+ self._delegate_auto_created = False
456
+ elif not wants_prompt_learning and has_prompt_learning_handler:
457
+ self.child_handler = None
458
+ self._delegate_auto_created = False
459
+
460
+ def _should_filter_event(self, message: StreamMessage) -> bool:
461
+ """Determine if an event should be hidden from output."""
462
+ event_type = message.data.get("type", "") or ""
463
+ event_type_lower = event_type.lower()
464
+
465
+ # Never filter graph_evolve events - they're important for GraphGen jobs
466
+ if event_type.startswith("graph_evolve."):
467
+ return False
468
+
469
+ # Only filter prompt-learning style events; leave other job types untouched.
470
+ if not any(key in event_type_lower for key in ("prompt.learning", "gepa", "mipro")):
471
+ return False
472
+
473
+ important_events = {
474
+ "prompt.learning.created",
475
+ "prompt.learning.gepa.start",
476
+ "prompt.learning.gepa.complete",
477
+ "prompt.learning.mipro.job.started",
478
+ "prompt.learning.mipro.optimization.exhausted",
479
+ "prompt.learning.trial.results",
480
+ "prompt.learning.progress",
481
+ "prompt.learning.gepa.new_best",
482
+ "prompt.learning.validation.summary",
483
+ "prompt.learning.candidate.evaluated",
484
+ "prompt.learning.candidate.evaluation.started",
485
+ # GraphGen/graph_evolve important events
486
+ "graph_evolve.job_started",
487
+ "graph_evolve.generation_started",
488
+ "graph_evolve.generation_completed",
489
+ "graph_evolve.candidate_evaluated",
490
+ "graph_evolve.archive_updated",
491
+ "graph_evolve.job_completed",
492
+ "graph_evolve.job_failed",
493
+ }
494
+ if event_type in important_events:
495
+ return False
496
+
497
+ verbose_patterns = [
498
+ "gepa.transformation.proposed",
499
+ "gepa.proposal.scored",
500
+ "prompt.learning.proposal.scored",
501
+ "mipro.tpe.update",
502
+ "prompt.learning.stream.connected",
503
+ ]
504
+ return any(pattern in event_type_lower for pattern in verbose_patterns)
505
+
506
+ def _transform_event_message(self, message: StreamMessage) -> StreamMessage:
507
+ """Transform event messages for ADAS context (currently passthrough)."""
508
+ return message
509
+
510
+ def flush(self) -> None:
511
+ # Ensure delegate flushes buffered output if needed.
512
+ if self.child_handler and hasattr(self.child_handler, "flush"):
513
+ with contextlib.suppress(Exception):
514
+ self.child_handler.flush()
515
+
516
+ @staticmethod
517
+ def _is_prompt_learning_type(job_type: str | None) -> bool:
518
+ """Return True if the child job type should use prompt-learning formatting."""
519
+ return job_type in {"gepa", "mipro", "prompt_learning", "prompt-learning", None}
520
+
521
+
522
+ class LossCurveHandler(StreamHandler):
523
+ """Render a live-updating loss chart inside a fixed Rich panel."""
524
+
525
+ def __init__(
526
+ self,
527
+ *,
528
+ metric_name: str = "train.loss",
529
+ max_points: int = 200,
530
+ width: int = 60,
531
+ console: Any | None = None,
532
+ live: Any | None = None,
533
+ ) -> None:
534
+ try:
535
+ from rich.console import Console
536
+ from rich.live import Live
537
+ from rich.panel import Panel
538
+ from rich.text import Text
539
+ except ImportError as exc: # pragma: no cover - optional dependency guard
540
+ raise RuntimeError(
541
+ "LossCurveHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
542
+ ) from exc
543
+
544
+ self.metric_name = metric_name
545
+ self.max_points = max_points
546
+ self.width = width
547
+
548
+ self._console_class = Console
549
+ self._panel_class = Panel
550
+ self._text_class = Text
551
+
552
+ self._console = console or Console()
553
+ self._live = live or Live(console=self._console, transient=False, refresh_per_second=8)
554
+ self._started = False
555
+
556
+ self._steps: list[int] = []
557
+ self._values: list[float] = []
558
+ self._status = "waiting"
559
+ self._last_event: str | None = None
560
+
561
+ def handle(self, message: StreamMessage) -> None:
562
+ updated = False
563
+
564
+ if message.stream_type is StreamType.STATUS:
565
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
566
+ if status != self._status:
567
+ self._status = status
568
+ updated = True
569
+
570
+ elif message.stream_type is StreamType.EVENTS:
571
+ event_type = message.data.get("type", "")
572
+ msg = message.data.get("message") or ""
573
+ level = message.data.get("level")
574
+ summary = f"{event_type}".strip()
575
+ if level:
576
+ summary += f" ({level})"
577
+ if msg:
578
+ summary += f": {msg}"
579
+ if summary != self._last_event:
580
+ self._last_event = summary
581
+ updated = True
582
+
583
+ elif message.stream_type is StreamType.METRICS:
584
+ if message.data.get("name") != self.metric_name:
585
+ return
586
+ value = message.data.get("value")
587
+ step = message.data.get("step")
588
+ if not isinstance(value, int | float) or not isinstance(step, int):
589
+ return
590
+ self._values.append(float(value))
591
+ self._steps.append(step)
592
+ if len(self._values) > self.max_points:
593
+ self._values = self._values[-self.max_points :]
594
+ self._steps = self._steps[-self.max_points :]
595
+ updated = True
596
+
597
+ elif message.stream_type is StreamType.TIMELINE:
598
+ phase = message.data.get("phase")
599
+ if phase:
600
+ self._status = str(phase)
601
+ updated = True
602
+
603
+ if updated:
604
+ self._refresh()
605
+
606
+ def flush(self) -> None:
607
+ if self._started:
608
+ with contextlib.suppress(Exception):
609
+ self._live.stop()
610
+ self._started = False
611
+
612
+ def _ensure_live(self) -> None:
613
+ if not self._started:
614
+ with contextlib.suppress(Exception):
615
+ self._live.start()
616
+ self._started = True
617
+
618
+ def _refresh(self) -> None:
619
+ self._ensure_live()
620
+ body = self._build_body()
621
+ title = f"{self.metric_name} | status={self._status}"
622
+ self._live.update(self._panel_class(body, title=title, border_style="cyan"))
623
+
624
+ def _build_body(self) -> Any:
625
+ if not self._values:
626
+ return self._text_class("Waiting for metrics…", style="yellow")
627
+
628
+ chart = self._render_sparkline()
629
+ last_value = self._values[-1]
630
+ lines = [
631
+ chart,
632
+ f"latest: {last_value:.4f} (step {self._steps[-1]})",
633
+ ]
634
+ if self._last_event:
635
+ lines.append(f"event: {self._last_event}")
636
+ return "\n".join(lines)
637
+
638
+ def _render_sparkline(self) -> str:
639
+ blocks = "▁▂▃▄▅▆▇█"
640
+ tail_len = min(self.width, len(self._values))
641
+ tail = self._values[-tail_len:]
642
+ minimum = min(tail)
643
+ maximum = max(tail)
644
+ if maximum == minimum:
645
+ level = blocks[0]
646
+ return f"{minimum:.2f} {level * tail_len} {maximum:.2f}"
647
+ scale = (len(blocks) - 1) / (maximum - minimum)
648
+ chars = "".join(blocks[int((v - minimum) * scale + 0.5)] for v in tail)
649
+ return f"{minimum:.2f} {chars} {maximum:.2f}"
650
+
651
+ def __del__(self) -> None: # pragma: no cover - defensive cleanup
652
+ with contextlib.suppress(Exception):
653
+ self.flush()
654
+
655
+ class RichHandler(StreamHandler):
656
+ """Rich powered handler with live progress and metrics table."""
657
+
658
+ def __init__(
659
+ self,
660
+ *,
661
+ event_log_size: int = 20,
662
+ console: Any | None = None,
663
+ ) -> None:
664
+ try:
665
+ from rich.console import Console
666
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
667
+ from rich.table import Table
668
+ except ImportError as exc: # pragma: no cover - requires optional dependency
669
+ raise RuntimeError(
670
+ "RichHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
671
+ ) from exc
672
+
673
+ self._console_class = Console
674
+ self._progress_class = Progress
675
+ self._spinner_column = SpinnerColumn
676
+ self._text_column = TextColumn
677
+ self._bar_column = BarColumn
678
+ self._table_class = Table
679
+
680
+ self._console = console or Console()
681
+ self._progress = Progress(
682
+ SpinnerColumn(),
683
+ TextColumn("[progress.description]{task.description}"),
684
+ BarColumn(),
685
+ TextColumn("{task.completed}/{task.total}" if console else ""),
686
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
687
+ transient=False,
688
+ console=self._console,
689
+ )
690
+ self._task_id: int | None = None
691
+ self._current_status = "unknown"
692
+ self._latest_metrics: dict[str, Any] = {}
693
+ self._event_log: deque[str] = deque(maxlen=event_log_size)
694
+ self._progress_started = False
695
+
696
+ def handle(self, message: StreamMessage) -> None:
697
+ if not self.should_handle(message):
698
+ return
699
+
700
+ if message.stream_type is StreamType.STATUS:
701
+ self._current_status = str(message.data.get("status") or message.data.get("state"))
702
+ self._ensure_progress_started()
703
+ if self._task_id is not None:
704
+ description = f"Status: {self._current_status}"
705
+ self._progress.update(self._task_id, description=description) # type: ignore[arg-type]
706
+ self._render_summary()
707
+ return
708
+
709
+ if message.stream_type is StreamType.EVENTS:
710
+ event_type = message.data.get("type", "event")
711
+ summary = message.data.get("message") or ""
712
+ level = message.data.get("level")
713
+ # Mask sensitive URLs before displaying
714
+ sanitized_summary = _mask_sensitive_urls(summary)
715
+ formatted = f"[{event_type}] {sanitized_summary}".strip()
716
+ if level:
717
+ formatted = f"{formatted} ({level})"
718
+ self._event_log.append(formatted)
719
+ data = message.data.get("data") or {}
720
+ step = data.get("step") or data.get("current_step")
721
+ total_steps = data.get("total_steps") or data.get("max_steps")
722
+ if step and total_steps:
723
+ self._ensure_progress_started(total_steps)
724
+ if self._task_id is not None:
725
+ self._progress.update(self._task_id, completed=int(step), total=int(total_steps)) # type: ignore[arg-type]
726
+ self._render_summary()
727
+ return
728
+
729
+ if message.stream_type is StreamType.METRICS:
730
+ name = message.data.get("name", "")
731
+ value = message.data.get("value")
732
+ if name:
733
+ self._latest_metrics[name] = value
734
+ self._render_summary()
735
+ return
736
+
737
+ if message.stream_type is StreamType.TIMELINE:
738
+ phase = message.data.get("phase", "")
739
+ if phase and phase.lower() not in {"training", "running"}:
740
+ self._event_log.append(f"[timeline] {phase}")
741
+ self._render_summary()
742
+
743
+ def flush(self) -> None:
744
+ if self._progress_started:
745
+ self._progress.stop()
746
+ self._progress_started = False
747
+ self._render_summary(force=True)
748
+
749
+ def _ensure_progress_started(self, total: int | float | None = None) -> None:
750
+ if not self._progress_started:
751
+ self._progress.start()
752
+ self._progress_started = True
753
+ if self._task_id is None:
754
+ self._task_id = self._progress.add_task(
755
+ f"Status: {self._current_status}", total=total or 100
756
+ )
757
+ elif total is not None and self._task_id is not None:
758
+ self._progress.update(self._task_id, total=total) # type: ignore[arg-type]
759
+
760
+ def _render_summary(self, force: bool = False) -> None:
761
+ if force and self._progress_started:
762
+ self._progress.refresh()
763
+
764
+ table = self._table_class(title="Latest Metrics")
765
+ table.add_column("Metric")
766
+ table.add_column("Value")
767
+
768
+ if not self._latest_metrics:
769
+ table.add_row("—", "—")
770
+ else:
771
+ for name, value in sorted(self._latest_metrics.items()):
772
+ table.add_row(str(name), str(value))
773
+
774
+ if self._progress_started:
775
+ self._progress.console.print(table)
776
+ else:
777
+ self._console.print(table)
778
+
779
+ if self._event_log:
780
+ self._console.print("\nRecent events:")
781
+ for entry in list(self._event_log):
782
+ self._console.print(f" • {entry}")
783
+
784
+ class ContextLearningHandler(StreamHandler):
785
+ """CLI-friendly handler for Context Learning jobs.
786
+
787
+ Emits high-signal progress similar to other infra job handlers,
788
+ specialized for generation-based bash context optimization.
789
+ """
790
+
791
+ def __init__(self) -> None:
792
+ self.best_score_so_far = 0.0
793
+ self.current_generation = 0
794
+
795
+ def handle(self, message: StreamMessage) -> None:
796
+ if not self.should_handle(message):
797
+ return
798
+
799
+ timestamp = datetime.now().strftime("%H:%M:%S")
800
+
801
+ if message.stream_type is StreamType.STATUS:
802
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
803
+ click.echo(f"[{timestamp}] status={status}")
804
+ return
805
+
806
+ if message.stream_type is StreamType.METRICS:
807
+ name = message.data.get("name")
808
+ value = message.data.get("value")
809
+ step = message.data.get("step")
810
+ if isinstance(value, int | float):
811
+ try:
812
+ val_f = float(value)
813
+ if val_f > self.best_score_so_far:
814
+ self.best_score_so_far = val_f
815
+ if isinstance(step, int):
816
+ self.current_generation = max(self.current_generation, step)
817
+ click.echo(f"[{timestamp}] gen={step} best={val_f:.3f}")
818
+ return
819
+ except Exception:
820
+ pass
821
+ click.echo(f"[{timestamp}] metric {name}={value}")
822
+ return
823
+
824
+ if message.stream_type is StreamType.EVENTS:
825
+ event_type = str(message.data.get("type") or "")
826
+ msg = message.data.get("message") or ""
827
+ data = message.data.get("data") or {}
828
+
829
+ if event_type == "context.learning.generation.completed":
830
+ gen = data.get("generation") or data.get("gen") or self.current_generation
831
+ score = data.get("best_score") or data.get("score") or self.best_score_so_far
832
+ try:
833
+ score_f = float(score)
834
+ if score_f > self.best_score_so_far:
835
+ self.best_score_so_far = score_f
836
+ click.echo(f"[{timestamp}] generation {gen} best={score_f:.3f}")
837
+ except Exception:
838
+ click.echo(f"[{timestamp}] generation {gen} completed")
839
+ return
840
+
841
+ if event_type.endswith(".failed"):
842
+ click.echo(f"[{timestamp}] {event_type}: {msg}")
843
+ return
844
+
845
+ if msg:
846
+ click.echo(f"[{timestamp}] {event_type}: {msg}")
847
+ else:
848
+ click.echo(f"[{timestamp}] {event_type}")
849
+
850
+
851
+ class PromptLearningHandler(StreamHandler):
852
+ """Enhanced handler for GEPA/MIPRO prompt optimization jobs with rich formatting and metrics tracking.
853
+
854
+ This handler processes streaming events from both GEPA (Genetic Evolutionary Prompt
855
+ Algorithm) and MIPRO (Meta-Instruction PROposer) optimization jobs. It provides:
856
+
857
+ - **Real-time progress tracking**: Shows trial results, rollouts, iterations, and budget usage
858
+ - **Optimization curve tracking**: Maintains a history of best scores over time
859
+ - **GEPA-specific features**: Tracks transformations, rollouts, and validation results
860
+ - **MIPRO-specific features**: Tracks iterations, trials, minibatch/full evaluations, and budget
861
+ - **Dual output**: Writes to both console (via click.echo) and optional log file
862
+
863
+ The handler filters verbose events (like TPE updates, proposed instructions) to keep
864
+ output readable while preserving important progress information. It formats output
865
+ consistently between GEPA and MIPRO for easier comparison.
866
+
867
+ Example:
868
+ >>> handler = PromptLearningHandler(
869
+ ... show_trial_results=True,
870
+ ... max_tokens=1_000_000,
871
+ ... log_file=Path("optimization.log")
872
+ ... )
873
+ >>> # Handler is used by JobStreamer to process events
874
+ """
875
+
876
+ def __init__(
877
+ self,
878
+ *,
879
+ show_trial_results: bool = True,
880
+ show_transformations: bool = False,
881
+ show_validation: bool = True,
882
+ max_tokens: int | None = None,
883
+ max_time_seconds: float | None = None,
884
+ max_rollouts: int | None = None,
885
+ log_file: Path | None = None,
886
+ ):
887
+ """Initialize the prompt learning handler.
888
+
889
+ Args:
890
+ show_trial_results: Whether to display individual trial scores (default: True).
891
+ When True, shows each trial's score and best score so far.
892
+ show_transformations: Whether to display transformation/proposal details
893
+ (default: False). When True, shows verbose transformation events.
894
+ show_validation: Whether to display validation summaries (default: True).
895
+ Shows validation results comparing candidates against baseline.
896
+ max_tokens: Maximum token budget for MIPRO (from TOML termination_config).
897
+ Used to track progress and enforce limits.
898
+ max_time_seconds: Maximum time budget in seconds (from TOML termination_config).
899
+ Used to track elapsed time and ETA.
900
+ max_rollouts: Maximum rollouts budget (from TOML termination_config).
901
+ Used to track rollout progress for both GEPA and MIPRO.
902
+ log_file: Optional path to log file for persistent logging. If provided,
903
+ all output is written to both console and file. File is opened in
904
+ append mode and remains open for streaming.
905
+ """
906
+ self.show_trial_results = show_trial_results
907
+ self.show_transformations = show_transformations
908
+ self.show_validation = show_validation
909
+ self.optimization_curve: list[tuple[int, float]] = []
910
+ self.trial_counter = 0
911
+ self.best_score_so_far = 0.0
912
+
913
+ # MIPRO progress tracking
914
+ self.mipro_start_time: float | None = None
915
+ self.mipro_total_trials: int | None = None
916
+ self.mipro_completed_trials: int = 0
917
+ self.mipro_total_tokens: int = 0
918
+ self.mipro_policy_tokens: int = 0 # Rollout tokens (policy only)
919
+ self.mipro_max_tokens: int | None = max_tokens # From TOML termination_config
920
+ self.mipro_total_cost: float = 0.0
921
+ self.mipro_max_cost: float | None = None
922
+ self.mipro_current_iteration: int = 0
923
+ self.mipro_num_iterations: int | None = None
924
+ self.mipro_trials_per_iteration: int | None = None
925
+ self.mipro_best_score: float = 0.0 # Track best full eval score
926
+ self.mipro_baseline_score: float | None = None # Track baseline for comparison
927
+ self.mipro_batch_size: int | None = None # Track minibatch size (N for minibatch scores)
928
+ self.mipro_rollouts_completed: int = 0 # Total rollouts completed
929
+ self.mipro_max_rollouts: int | None = max_rollouts # From TOML termination_config
930
+ self.mipro_max_time_seconds: float | None = max_time_seconds # From TOML termination_config
931
+ self._last_progress_emit_time: float | None = None # Throttle progress updates
932
+ self._progress_emit_interval: float = 5.0 # Emit progress at most every 5 seconds
933
+
934
+ # Log file for real-time streaming
935
+ self.log_file: Path | None = log_file
936
+ self._log_file_handle = None
937
+ if self.log_file:
938
+ try:
939
+ # Create parent directory if needed
940
+ self.log_file.parent.mkdir(parents=True, exist_ok=True)
941
+ # Open file in append mode for live streaming
942
+ # Note: File must remain open for streaming, so we can't use context manager
943
+ from datetime import datetime
944
+ self._log_file_handle = open(self.log_file, "a", encoding="utf-8") # noqa: SIM115
945
+ # Write header
946
+ self._log_file_handle.write("=" * 80 + "\n")
947
+ self._log_file_handle.write("PROMPT LEARNING VERBOSE LOG\n")
948
+ self._log_file_handle.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
949
+ self._log_file_handle.write("=" * 80 + "\n\n")
950
+ self._log_file_handle.flush()
951
+ except Exception as e:
952
+ # If we can't open the log file, continue without it
953
+ click.echo(f"⚠️ Could not open log file {log_file}: {e}", err=True)
954
+ self.log_file = None
955
+ self._log_file_handle = None
956
+
957
+ def _write_log(self, text: str) -> None:
958
+ """Write text to both console and log file."""
959
+ click.echo(text)
960
+ if self._log_file_handle:
961
+ try:
962
+ self._log_file_handle.write(text + "\n")
963
+ self._log_file_handle.flush()
964
+ except Exception:
965
+ # If write fails, close handle and continue without logging
966
+ from contextlib import suppress
967
+ with suppress(Exception):
968
+ self._log_file_handle.close()
969
+ self._log_file_handle = None
970
+
971
+ def handle(self, message: StreamMessage) -> None:
972
+ """Handle a stream message from the prompt learning job.
973
+
974
+ Routes messages to appropriate handlers based on stream type:
975
+ - STATUS: Job status updates (queued, running, completed, etc.)
976
+ - EVENTS: Algorithm-specific events (trials, iterations, transformations)
977
+ - METRICS: Performance metrics (scores, accuracies, costs)
978
+ - TIMELINE: Phase transitions
979
+
980
+ Filters verbose events (TPE updates, proposed instructions) to keep output
981
+ readable. MIPRO and GEPA events are handled by specialized methods.
982
+
983
+ Args:
984
+ message: StreamMessage containing event data from the backend
985
+ """
986
+ if not self.should_handle(message):
987
+ return
988
+
989
+ timestamp = datetime.now().strftime("%H:%M:%S")
990
+
991
+ if message.stream_type is StreamType.STATUS:
992
+ status = str(message.data.get("status") or message.data.get("state") or "unknown")
993
+ self._write_log(f"[{timestamp}] status={status}")
994
+ return
995
+
996
+ if message.stream_type is StreamType.EVENTS:
997
+ event_type = message.data.get("type", "event")
998
+ level = message.data.get("level")
999
+ msg = message.data.get("message") or ""
1000
+
1001
+ # Handle MIPRO-specific events for progress tracking (before skipping hidden events)
1002
+ if event_type == "mipro.job.started":
1003
+ self._handle_mipro_job_started(message.data)
1004
+ # Continue to default display
1005
+
1006
+ if event_type == "mipro.budget.update":
1007
+ self._handle_mipro_budget_update(message.data)
1008
+ # Continue to default display
1009
+
1010
+ if event_type == "mipro.trial.complete":
1011
+ self._handle_mipro_trial_complete(message.data)
1012
+ # Continue to default display
1013
+
1014
+ # Show more MIPRO events - only hide the most verbose ones
1015
+ _hidden_mipro_events = {
1016
+ # Keep only the most verbose TPE updates hidden
1017
+ "mipro.tpe.update", # Very frequent, low value
1018
+ }
1019
+ if event_type in _hidden_mipro_events:
1020
+ return
1021
+
1022
+ # Show GEPA transformation proposals - they're useful for debugging
1023
+ # if event_type == "gepa.transformation.proposed":
1024
+ # return
1025
+
1026
+ # Handle trial results for optimization curve tracking
1027
+ if event_type == "prompt.learning.trial.results":
1028
+ self._handle_trial_results(message.data)
1029
+ # Continue to default display
1030
+
1031
+ # Handle validation summary
1032
+ if event_type == "prompt.learning.validation.summary":
1033
+ if self.show_validation:
1034
+ self._handle_validation_summary(message.data)
1035
+ # Continue to default display
1036
+
1037
+ # Handle progress events
1038
+ if event_type == "prompt.learning.progress":
1039
+ self._handle_progress(message.data)
1040
+ # Continue to default display
1041
+
1042
+ # Handle MIPRO-specific events for progress tracking
1043
+ if event_type == "mipro.iteration.start":
1044
+ self._handle_mipro_iteration_start(message.data)
1045
+ # Continue to default display
1046
+
1047
+ if event_type == "mipro.iteration.complete":
1048
+ self._handle_mipro_iteration_complete(message.data)
1049
+ # Continue to default display
1050
+
1051
+ if event_type == "mipro.fulleval.complete":
1052
+ self._handle_mipro_fulleval_complete(message.data)
1053
+ # Continue to default display
1054
+
1055
+ if event_type == "mipro.optimization.exhausted":
1056
+ # Graceful conclusion - show final progress
1057
+ self._emit_mipro_progress()
1058
+ # Continue to default display
1059
+
1060
+ if event_type == "mipro.new_incumbent":
1061
+ self._handle_mipro_new_incumbent(message.data)
1062
+ # Continue to default display
1063
+
1064
+ # Handle rollouts start event
1065
+ if event_type == "prompt.learning.rollouts.start":
1066
+ self._handle_rollouts_start(message.data)
1067
+ # Continue to default display
1068
+
1069
+ # Handle GEPA new best event
1070
+ if event_type == "prompt.learning.gepa.new_best":
1071
+ self._handle_gepa_new_best(message.data)
1072
+ # Continue to default display
1073
+
1074
+ # Handle phase changed event
1075
+ if event_type == "prompt.learning.phase.changed":
1076
+ self._handle_phase_changed(message.data)
1077
+ # Continue to default display
1078
+
1079
+ # Handle stream connected event (connection lifecycle)
1080
+ if event_type == "prompt.learning.stream.connected":
1081
+ self._handle_stream_connected(message.data)
1082
+ # Continue to default display
1083
+
1084
+ # Handle proposal scored events (transformations) - show by default
1085
+ if event_type == "prompt.learning.proposal.scored":
1086
+ self._handle_proposal_scored(message.data)
1087
+ # Continue to default display
1088
+
1089
+ # Show verbose transformation events by default - they're useful
1090
+ # Only skip if explicitly disabled via show_transformations=False
1091
+ # verbose_event_types = [
1092
+ # "prompt.learning.proposal.scored",
1093
+ # "prompt.learning.eval.summary",
1094
+ # "prompt.learning.validation.scored",
1095
+ # "prompt.learning.final.results",
1096
+ # ]
1097
+ # if event_type in verbose_event_types and not self.show_transformations:
1098
+ # return
1099
+
1100
+ # Default event display - show more details
1101
+ prefix = f"[{timestamp}] {event_type}"
1102
+ if level:
1103
+ prefix += f" ({level})"
1104
+ sanitized_msg = _mask_sensitive_urls(msg)
1105
+
1106
+ # Include key data fields if message is empty or short
1107
+ if not sanitized_msg or len(sanitized_msg) < 50:
1108
+ data = message.data.get("data", {})
1109
+ if isinstance(data, dict):
1110
+ # Show useful fields
1111
+ useful_fields = []
1112
+ for key in ["score", "accuracy", "mean", "step", "iteration", "trial", "completed", "total", "version_id"]:
1113
+ if key in data:
1114
+ value = data[key]
1115
+ if isinstance(value, (int, float)):
1116
+ useful_fields.append(f"{key}={value:.4f}" if isinstance(value, float) else f"{key}={value}")
1117
+ else:
1118
+ useful_fields.append(f"{key}={value}")
1119
+ if useful_fields:
1120
+ sanitized_msg = sanitized_msg + (" " if sanitized_msg else "") + " ".join(useful_fields[:5]) # Limit to 5 fields
1121
+
1122
+ self._write_log(f"{prefix}: {sanitized_msg}".rstrip(": "))
1123
+ return
1124
+
1125
+ if message.stream_type is StreamType.METRICS:
1126
+ name = message.data.get("name")
1127
+ value = message.data.get("value")
1128
+ step = message.data.get("step")
1129
+ data = message.data.get("data", {})
1130
+
1131
+ metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
1132
+ if step is not None:
1133
+ metric_str += f" (step={step})"
1134
+
1135
+ if isinstance(data, dict):
1136
+ n = data.get("n")
1137
+ if n is not None:
1138
+ metric_str += f" n={n}"
1139
+
1140
+ self._write_log(metric_str)
1141
+ return
1142
+
1143
+ if message.stream_type is StreamType.TIMELINE:
1144
+ phase = message.data.get("phase", "phase")
1145
+ self._write_log(f"[{timestamp}] timeline={phase}")
1146
+
1147
+ def _handle_trial_results(self, event_data: dict[str, Any]) -> None:
1148
+ """Handle GEPA trial results events and track optimization curve.
1149
+
1150
+ Processes trial completion events from GEPA optimization, tracking:
1151
+ - Mean score for the trial
1152
+ - Best score achieved so far
1153
+ - Number of rollouts completed (N)
1154
+ - Optimization curve data points
1155
+
1156
+ Updates the optimization curve with (trial_number, best_score) tuples
1157
+ for visualization. Displays trial results if show_trial_results is True.
1158
+
1159
+ Args:
1160
+ event_data: Event data dictionary containing:
1161
+ - data.mean: Mean score for this trial
1162
+ - data.completed: Number of rollouts completed
1163
+ - data.total: Total rollouts planned
1164
+ """
1165
+ data = event_data.get("data", {})
1166
+ if not isinstance(data, dict):
1167
+ return
1168
+
1169
+ mean_score = data.get("mean")
1170
+ if mean_score is not None:
1171
+ self.trial_counter += 1
1172
+ self.best_score_so_far = max(self.best_score_so_far, float(mean_score))
1173
+ self.optimization_curve.append((self.trial_counter, self.best_score_so_far))
1174
+
1175
+ if self.show_trial_results:
1176
+ timestamp = datetime.now().strftime("%H:%M:%S")
1177
+
1178
+ # Extract N (number of rollouts)
1179
+ completed = data.get("completed")
1180
+ total = data.get("total")
1181
+
1182
+ n_str = f" N={completed}/{total}" if completed is not None and total is not None else (f" N={completed}" if completed is not None else "")
1183
+
1184
+ self._write_log(f"[{timestamp}] [Trial {self.trial_counter}] Score: {mean_score:.4f} (Best: {self.best_score_so_far:.4f}){n_str}")
1185
+
1186
+ def _handle_validation_summary(self, event_data: dict[str, Any]) -> None:
1187
+ """Handle validation summary events showing candidate performance.
1188
+
1189
+ Displays validation results comparing optimized prompts against a baseline.
1190
+ Shows baseline score, number of candidates evaluated (N), and top candidate
1191
+ scores. Only displayed if show_validation is True.
1192
+
1193
+ Args:
1194
+ event_data: Event data dictionary containing:
1195
+ - data.baseline: Baseline score (dict with accuracy/score or number)
1196
+ - data.results: List of candidate results with accuracy/score fields
1197
+ """
1198
+ data = event_data.get("data", {})
1199
+ if not isinstance(data, dict):
1200
+ return
1201
+
1202
+ timestamp = datetime.now().strftime("%H:%M:%S")
1203
+
1204
+ # Extract baseline
1205
+ baseline = data.get("baseline")
1206
+ baseline_score = None
1207
+ if isinstance(baseline, dict):
1208
+ baseline_score = baseline.get("accuracy") or baseline.get("score")
1209
+ elif isinstance(baseline, int | float):
1210
+ baseline_score = baseline
1211
+
1212
+ # Extract results
1213
+ results = data.get("results", [])
1214
+ if not isinstance(results, list):
1215
+ results = []
1216
+
1217
+ # Display validation summary
1218
+ self._write_log(f"[{timestamp}] Validation Summary:")
1219
+
1220
+ # Show baseline if available
1221
+ if baseline_score is not None:
1222
+ self._write_log(f" Baseline: {baseline_score:.4f}")
1223
+
1224
+ # Show N (number of candidates)
1225
+ n_candidates = len(results)
1226
+ if n_candidates > 0:
1227
+ self._write_log(f" N={n_candidates}")
1228
+
1229
+ # Display validation results
1230
+ if results:
1231
+ for i, result in enumerate(results[:10]): # Show top 10
1232
+ if isinstance(result, dict):
1233
+ accuracy = result.get("accuracy") or result.get("score")
1234
+ if accuracy is not None:
1235
+ self._write_log(f" Candidate {i+1}: {accuracy:.4f}")
1236
+
1237
+ def _handle_progress(self, event_data: dict[str, Any]) -> None:
1238
+ """Handle GEPA progress events with detailed rollout and transformation tracking.
1239
+
1240
+ Displays comprehensive progress information including:
1241
+ - Overall completion percentage
1242
+ - Rollout progress (completed/total with percentage)
1243
+ - Transformation progress (tried/planned with percentage)
1244
+ - Token usage (used/budget in millions)
1245
+ - Elapsed time and ETA
1246
+
1247
+ Formats progress in a human-readable format similar to CLI progress bars.
1248
+
1249
+ Args:
1250
+ event_data: Event data dictionary containing:
1251
+ - data.rollouts_completed: Number of rollouts completed
1252
+ - data.rollouts_total: Total rollouts planned
1253
+ - data.transformations_tried: Number of transformations tried
1254
+ - data.transformations_planned: Total transformations planned
1255
+ - data.rollout_tokens_used: Tokens consumed
1256
+ - data.rollout_tokens_budget: Token budget
1257
+ - data.elapsed_seconds: Time elapsed
1258
+ - data.eta_seconds: Estimated time remaining
1259
+ - data.percent_overall: Overall completion percentage
1260
+ """
1261
+ data = event_data.get("data", {})
1262
+ if not isinstance(data, dict):
1263
+ return
1264
+
1265
+ timestamp = datetime.now().strftime("%H:%M:%S")
1266
+
1267
+ # Extract rollout progress
1268
+ rollouts_completed = data.get("rollouts_completed")
1269
+ rollouts_total = data.get("rollouts_total")
1270
+ percent_rollouts = data.get("percent_rollouts")
1271
+
1272
+ # Extract transformation progress
1273
+ transformations_tried = data.get("transformations_tried")
1274
+ transformations_planned = data.get("transformations_planned")
1275
+ percent_transformations = data.get("percent_transformations")
1276
+
1277
+ # Extract overall progress
1278
+ percent_overall = data.get("percent_overall")
1279
+
1280
+ # Extract timing
1281
+ elapsed_seconds = data.get("elapsed_seconds")
1282
+ eta_seconds = data.get("eta_seconds")
1283
+
1284
+ # Extract token usage
1285
+ rollout_tokens_used = data.get("rollout_tokens_used")
1286
+ rollout_tokens_budget = data.get("rollout_tokens_budget")
1287
+
1288
+ # Build progress message
1289
+ parts = []
1290
+
1291
+ # Overall percentage
1292
+ if percent_overall is not None:
1293
+ parts.append(f"{int(percent_overall * 100)}% complete")
1294
+
1295
+ # Rollout progress
1296
+ if rollouts_completed is not None and rollouts_total is not None:
1297
+ parts.append(f"rollouts={rollouts_completed}/{rollouts_total}")
1298
+ if percent_rollouts is not None:
1299
+ parts.append(f"({int(percent_rollouts * 100)}%)")
1300
+ elif rollouts_completed is not None:
1301
+ parts.append(f"rollouts={rollouts_completed}")
1302
+
1303
+ # Transformation progress
1304
+ if transformations_tried is not None and transformations_planned is not None:
1305
+ parts.append(f"transformations={transformations_tried}/{transformations_planned}")
1306
+ if percent_transformations is not None:
1307
+ parts.append(f"({int(percent_transformations * 100)}%)")
1308
+ elif transformations_tried is not None:
1309
+ parts.append(f"transformations={transformations_tried}")
1310
+
1311
+ # Token usage
1312
+ if rollout_tokens_used is not None:
1313
+ tokens_millions = rollout_tokens_used / 1_000_000.0
1314
+ if rollout_tokens_budget is not None:
1315
+ budget_millions = rollout_tokens_budget / 1_000_000.0
1316
+ parts.append(f"tokens={tokens_millions:.2f}M/{budget_millions:.2f}M")
1317
+ else:
1318
+ parts.append(f"tokens={tokens_millions:.2f}M")
1319
+
1320
+ # Timing
1321
+ if elapsed_seconds is not None:
1322
+ if elapsed_seconds >= 60:
1323
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min"
1324
+ else:
1325
+ elapsed_str = f"{int(elapsed_seconds)}s"
1326
+ parts.append(f"elapsed={elapsed_str}")
1327
+
1328
+ if eta_seconds is not None:
1329
+ eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
1330
+ parts.append(f"eta={eta_str}")
1331
+
1332
+ # Fallback to simple step/total_steps if no detailed info
1333
+ if not parts:
1334
+ step = data.get("step") or data.get("current_step")
1335
+ total_steps = data.get("total_steps") or data.get("max_steps")
1336
+ if step is not None and total_steps is not None:
1337
+ parts.append(f"{step}/{total_steps} ({100 * step / total_steps:.1f}%)")
1338
+
1339
+ if parts:
1340
+ progress_msg = " ".join(parts)
1341
+ self._write_log(f"[{timestamp}] Progress: {progress_msg}")
1342
+
1343
+ def _handle_rollouts_start(self, event_data: dict[str, Any]) -> None:
1344
+ """Handle GEPA rollouts start event.
1345
+
1346
+ Displays when rollouts begin, showing the number of training seeds
1347
+ that will be evaluated. This marks the start of the main optimization
1348
+ phase for GEPA.
1349
+
1350
+ Args:
1351
+ event_data: Event data dictionary containing:
1352
+ - data.train_seeds: List of training seed values
1353
+ """
1354
+ data = event_data.get("data", {})
1355
+ if not isinstance(data, dict):
1356
+ return
1357
+
1358
+ timestamp = datetime.now().strftime("%H:%M:%S")
1359
+ train_seeds = data.get("train_seeds", [])
1360
+
1361
+ if isinstance(train_seeds, list) and train_seeds:
1362
+ num_seeds = len(train_seeds)
1363
+ self._write_log(f"[{timestamp}] Starting rollouts: {num_seeds} seeds")
1364
+ else:
1365
+ self._write_log(f"[{timestamp}] Starting rollouts")
1366
+
1367
+ def _handle_gepa_new_best(self, event_data: dict[str, Any]) -> None:
1368
+ """Handle GEPA new best candidate event.
1369
+
1370
+ Displays when a new best candidate is found during optimization,
1371
+ showing the improvement over the previous best.
1372
+
1373
+ Args:
1374
+ event_data: Event data dictionary containing:
1375
+ - data.accuracy: New best accuracy score
1376
+ - data.previous_best_score: Previous best score
1377
+ - data.improvement: Absolute improvement
1378
+ - data.version_id: ID of the new best candidate
1379
+ """
1380
+ data = event_data.get("data", {})
1381
+ if not isinstance(data, dict):
1382
+ return
1383
+
1384
+ timestamp = datetime.now().strftime("%H:%M:%S")
1385
+ accuracy = data.get("accuracy")
1386
+ previous = data.get("previous_best_score")
1387
+ improvement = data.get("improvement")
1388
+
1389
+ if accuracy is not None:
1390
+ msg = f"[{timestamp}] \u2728 New best: {accuracy:.4f}"
1391
+ if previous is not None and improvement is not None:
1392
+ msg += f" (+{improvement:.4f} from {previous:.4f})"
1393
+ elif previous is not None:
1394
+ msg += f" (was {previous:.4f})"
1395
+ self._write_log(msg)
1396
+
1397
+ def _handle_phase_changed(self, event_data: dict[str, Any]) -> None:
1398
+ """Handle phase transition event.
1399
+
1400
+ Displays when the optimization transitions between phases
1401
+ (e.g., bootstrap -> optimization -> validation -> complete).
1402
+
1403
+ Args:
1404
+ event_data: Event data dictionary containing:
1405
+ - data.from_phase: Previous phase name
1406
+ - data.to_phase: New phase name
1407
+ - data.phase_summary: Optional summary of completed phase
1408
+ """
1409
+ data = event_data.get("data", {})
1410
+ if not isinstance(data, dict):
1411
+ return
1412
+
1413
+ timestamp = datetime.now().strftime("%H:%M:%S")
1414
+ from_phase = data.get("from_phase") or "start"
1415
+ to_phase = data.get("to_phase")
1416
+
1417
+ if to_phase:
1418
+ self._write_log(f"[{timestamp}] Phase: {from_phase} \u2192 {to_phase}")
1419
+
1420
+ def _handle_stream_connected(self, event_data: dict[str, Any]) -> None:
1421
+ """Handle SSE stream connection event.
1422
+
1423
+ Displays connection confirmation with cursor position for debugging.
1424
+
1425
+ Args:
1426
+ event_data: Event data dictionary containing:
1427
+ - data.cursor: Current sequence cursor position
1428
+ - data.heartbeat_interval_seconds: Heartbeat interval
1429
+ """
1430
+ data = event_data.get("data", {})
1431
+ if not isinstance(data, dict):
1432
+ return
1433
+
1434
+ timestamp = datetime.now().strftime("%H:%M:%S")
1435
+ cursor = data.get("cursor", 0)
1436
+ self._write_log(f"[{timestamp}] Stream connected (cursor={cursor})")
1437
+
1438
+ def _handle_mipro_job_started(self, event_data: dict[str, Any]) -> None:
1439
+ """Handle MIPRO job start event and extract configuration.
1440
+
1441
+ Captures initial MIPRO configuration from the job start event to enable
1442
+ progress tracking. Extracts num_iterations and num_trials_per_iteration
1443
+ to estimate total trials and rollouts.
1444
+
1445
+ Args:
1446
+ event_data: Event data dictionary containing:
1447
+ - data.num_iterations: Total number of optimization iterations
1448
+ - data.num_trials_per_iteration: Trials per iteration
1449
+ """
1450
+ data = event_data.get("data", {})
1451
+ if not isinstance(data, dict):
1452
+ return
1453
+
1454
+ # Extract config values to estimate max rollouts
1455
+ num_iterations = data.get("num_iterations")
1456
+ num_trials_per_iteration = data.get("num_trials_per_iteration")
1457
+
1458
+ if num_iterations is not None:
1459
+ self.mipro_num_iterations = num_iterations
1460
+ if num_trials_per_iteration is not None:
1461
+ self.mipro_trials_per_iteration = num_trials_per_iteration
1462
+
1463
+ def _handle_mipro_iteration_start(self, event_data: dict[str, Any]) -> None:
1464
+ """Handle MIPRO iteration start event and initialize progress tracking.
1465
+
1466
+ Called at the start of each MIPRO iteration. On the first iteration (0),
1467
+ initializes all progress tracking variables including:
1468
+ - Total iterations and trials per iteration
1469
+ - Batch size (for minibatch evaluations)
1470
+ - Max rollouts estimate (iterations * trials * batch_size)
1471
+ - Time and token budgets
1472
+
1473
+ Sets the start time for elapsed time tracking.
1474
+
1475
+ Args:
1476
+ event_data: Event data dictionary containing:
1477
+ - data.iteration: Current iteration number (0-indexed)
1478
+ - data.num_iterations: Total iterations
1479
+ - data.num_trials_per_iteration: Trials per iteration
1480
+ - data.batch_size: Minibatch size (N for minibatch scores)
1481
+ - data.max_trials: Maximum trials limit (optional)
1482
+ - data.max_rollouts: Maximum rollouts limit (optional)
1483
+ - data.max_time_seconds: Maximum time limit (optional)
1484
+ """
1485
+ import time
1486
+
1487
+ data = event_data.get("data", {})
1488
+ if not isinstance(data, dict):
1489
+ return
1490
+
1491
+ iteration = data.get("iteration")
1492
+ if iteration == 0 and self.mipro_start_time is None:
1493
+ self.mipro_start_time = time.time()
1494
+
1495
+ # Extract total iterations and trials per iteration from first iteration
1496
+ if iteration == 0:
1497
+ self.mipro_num_iterations = data.get("num_iterations") or self.mipro_num_iterations
1498
+ self.mipro_trials_per_iteration = data.get("num_trials_per_iteration") or self.mipro_trials_per_iteration
1499
+ batch_size = data.get("batch_size")
1500
+ if batch_size is not None:
1501
+ self.mipro_batch_size = batch_size
1502
+
1503
+ if self.mipro_num_iterations and self.mipro_trials_per_iteration:
1504
+ self.mipro_total_trials = self.mipro_num_iterations * self.mipro_trials_per_iteration
1505
+
1506
+ # Extract max limits if available (from events, but TOML value takes precedence)
1507
+ # Only override if TOML value wasn't set
1508
+ max_trials = data.get("max_trials")
1509
+ max_rollouts_from_event = data.get("max_rollouts")
1510
+ if self.mipro_max_rollouts is None:
1511
+ if max_rollouts_from_event is not None:
1512
+ # Use event value if TOML value wasn't set
1513
+ self.mipro_max_rollouts = max_rollouts_from_event
1514
+ elif max_trials is not None:
1515
+ # Fallback: If max_trials is set, use it as max rollouts (approximation)
1516
+ self.mipro_max_rollouts = max_trials
1517
+ elif self.mipro_num_iterations and self.mipro_trials_per_iteration and self.mipro_batch_size:
1518
+ # Estimate max rollouts: iterations * trials_per_iteration * batch_size
1519
+ self.mipro_max_rollouts = self.mipro_num_iterations * self.mipro_trials_per_iteration * self.mipro_batch_size
1520
+
1521
+ max_time_seconds = data.get("max_time_seconds") or data.get("max_wall_clock_seconds")
1522
+ if max_time_seconds is not None and self.mipro_max_time_seconds is None:
1523
+ # Use event value only if TOML value wasn't set
1524
+ self.mipro_max_time_seconds = float(max_time_seconds)
1525
+
1526
+ self.mipro_current_iteration = iteration if iteration is not None else self.mipro_current_iteration
1527
+
1528
+ def _handle_mipro_iteration_complete(self, event_data: dict[str, Any]) -> None:
1529
+ """Handle MIPRO iteration completion event.
1530
+
1531
+ Updates progress tracking when an iteration completes, including:
1532
+ - Cumulative trial count
1533
+ - Current iteration number
1534
+
1535
+ Emits a progress update showing overall progress, trials completed,
1536
+ iterations, rollouts, tokens, and time.
1537
+
1538
+ Args:
1539
+ event_data: Event data dictionary containing:
1540
+ - data.iteration: Completed iteration number
1541
+ - data.cumulative: Cumulative trial count across all iterations
1542
+ """
1543
+ data = event_data.get("data", {})
1544
+ if not isinstance(data, dict):
1545
+ return
1546
+
1547
+ cumulative = data.get("cumulative")
1548
+ if cumulative is not None:
1549
+ self.mipro_completed_trials = cumulative
1550
+
1551
+ # Update current iteration
1552
+ iteration = data.get("iteration")
1553
+ if iteration is not None:
1554
+ self.mipro_current_iteration = iteration
1555
+
1556
+ # Emit progress update
1557
+ self._emit_mipro_progress()
1558
+
1559
+ def _handle_mipro_trial_complete(self, event_data: dict[str, Any]) -> None:
1560
+ """Handle MIPRO trial completion event (minibatch evaluation).
1561
+
1562
+ Processes minibatch trial completion events, which occur frequently during
1563
+ MIPRO optimization. Tracks:
1564
+ - Completed trial count
1565
+ - Rollouts completed (from num_seeds)
1566
+ - Minibatch scores (displayed if show_trial_results is True)
1567
+
1568
+ Displays trial results in GEPA-like format: [Trial X] Score: Y (Best: Z) N=W
1569
+ where N is the minibatch size. Emits throttled progress updates.
1570
+
1571
+ Args:
1572
+ event_data: Event data dictionary containing:
1573
+ - data.minibatch_score: Score from minibatch evaluation
1574
+ - data.iteration: Current iteration number
1575
+ - data.trial: Trial number within iteration
1576
+ - data.num_seeds: Number of seeds evaluated (minibatch size N)
1577
+ """
1578
+ data = event_data.get("data", {})
1579
+ if not isinstance(data, dict):
1580
+ return
1581
+
1582
+ # Increment completed trials counter
1583
+ self.mipro_completed_trials += 1
1584
+
1585
+ # Count rollouts from trial events
1586
+ num_seeds = data.get("num_seeds") or data.get("num_instances", 0)
1587
+ if num_seeds:
1588
+ self.mipro_rollouts_completed += num_seeds
1589
+
1590
+ # Show trial score (minibatch) - like GEPA trial format
1591
+ if self.show_trial_results:
1592
+ timestamp = datetime.now().strftime("%H:%M:%S")
1593
+ minibatch_score = data.get("minibatch_score")
1594
+ iteration = data.get("iteration")
1595
+ trial = data.get("trial")
1596
+
1597
+ if minibatch_score is not None:
1598
+ try:
1599
+ score_float = float(minibatch_score)
1600
+ # Calculate trial number for display
1601
+ if iteration is not None and trial is not None and self.mipro_trials_per_iteration:
1602
+ trial_num_display = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
1603
+ else:
1604
+ trial_num_display = self.mipro_completed_trials
1605
+
1606
+ n_str = f" N={num_seeds}" if num_seeds else ""
1607
+ best_str = f" (Best: {self.mipro_best_score:.4f})" if self.mipro_best_score > 0 else ""
1608
+
1609
+ self._write_log(
1610
+ f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f}{best_str}{n_str}"
1611
+ )
1612
+ except (ValueError, TypeError):
1613
+ pass
1614
+
1615
+ # Emit progress update after each trial (throttled internally)
1616
+ self._emit_mipro_progress()
1617
+
1618
+ def _handle_mipro_fulleval_complete(self, event_data: dict[str, Any]) -> None:
1619
+ """Handle MIPRO full evaluation completion event.
1620
+
1621
+ Processes full evaluation events, which occur less frequently than minibatch
1622
+ trials. Full evaluations use the full validation set and are more expensive.
1623
+ Only displays results if the score is "promising":
1624
+ - Better than current best score, OR
1625
+ - At least 5% improvement over baseline
1626
+
1627
+ Tracks rollouts from full evaluations and updates best score. Displays
1628
+ results with baseline comparison and improvement percentage.
1629
+
1630
+ Args:
1631
+ event_data: Event data dictionary containing:
1632
+ - data.score: Full evaluation score
1633
+ - data.iteration: Current iteration number
1634
+ - data.trial: Trial number within iteration
1635
+ - data.num_seeds: Number of seeds evaluated (full eval size)
1636
+ - data.seeds: List of seed values (alternative to num_seeds)
1637
+ """
1638
+ data = event_data.get("data", {})
1639
+ if not isinstance(data, dict):
1640
+ return
1641
+
1642
+ # Count rollouts from full eval
1643
+ num_seeds = data.get("num_seeds") or data.get("seeds", 0)
1644
+ if isinstance(num_seeds, list):
1645
+ num_seeds = len(num_seeds)
1646
+ if num_seeds:
1647
+ self.mipro_rollouts_completed += num_seeds
1648
+
1649
+ score = data.get("score")
1650
+ if score is None:
1651
+ return
1652
+
1653
+ try:
1654
+ score_float = float(score)
1655
+ except (ValueError, TypeError):
1656
+ return
1657
+
1658
+ # Initialize baseline if not set (use first score as baseline)
1659
+ if self.mipro_baseline_score is None:
1660
+ self.mipro_baseline_score = score_float
1661
+
1662
+ # Only show if score is promising:
1663
+ # - Better than current best, OR
1664
+ # - At least 5% improvement over baseline
1665
+ is_promising = False
1666
+ if score_float > self.mipro_best_score:
1667
+ self.mipro_best_score = score_float
1668
+ is_promising = True
1669
+ elif self.mipro_baseline_score is not None:
1670
+ improvement = score_float - self.mipro_baseline_score
1671
+ improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
1672
+ if improvement_pct >= 5.0: # At least 5% improvement over baseline
1673
+ is_promising = True
1674
+
1675
+ if is_promising:
1676
+ timestamp = datetime.now().strftime("%H:%M:%S")
1677
+ iteration = data.get("iteration")
1678
+ trial = data.get("trial")
1679
+ seeds = data.get("seeds") or data.get("num_seeds", 0)
1680
+ if isinstance(seeds, list):
1681
+ seeds = len(seeds)
1682
+
1683
+ # Format similar to GEPA trial results with N displayed
1684
+ iter_str = f" iter={iteration}" if iteration is not None else ""
1685
+ trial_str = f" trial={trial}" if trial is not None else ""
1686
+ n_str = f" N={seeds}" if seeds else ""
1687
+
1688
+ baseline_str = ""
1689
+ if self.mipro_baseline_score is not None:
1690
+ improvement = score_float - self.mipro_baseline_score
1691
+ improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
1692
+ baseline_str = f" (Baseline: {self.mipro_baseline_score:.4f}, +{improvement_pct:.1f}%)"
1693
+
1694
+ self._write_log(
1695
+ f"[{timestamp}] Full eval: Score={score_float:.4f} (Best: {self.mipro_best_score:.4f}){n_str}{baseline_str}{iter_str}{trial_str}"
1696
+ )
1697
+
1698
+ def _handle_mipro_new_incumbent(self, event_data: dict[str, Any]) -> None:
1699
+ """Handle MIPRO new incumbent event (best candidate found).
1700
+
1701
+ Processes events when MIPRO finds a new best candidate (incumbent).
1702
+ Updates the optimization curve and displays the result in GEPA-like format
1703
+ for consistency. Tracks cumulative trial count for curve visualization.
1704
+
1705
+ Args:
1706
+ event_data: Event data dictionary containing:
1707
+ - data.minibatch_score: Minibatch score of the new incumbent
1708
+ - data.best_score: Overall best score
1709
+ - data.iteration: Current iteration number
1710
+ - data.trial: Trial number within iteration
1711
+ - data.cumulative_trials: Cumulative trial count across iterations
1712
+ - data.num_seeds: Minibatch size (N)
1713
+ """
1714
+ data = event_data.get("data", {})
1715
+ if not isinstance(data, dict):
1716
+ return
1717
+
1718
+ timestamp = datetime.now().strftime("%H:%M:%S")
1719
+ minibatch_score = data.get("minibatch_score")
1720
+ best_score = data.get("best_score")
1721
+ iteration = data.get("iteration")
1722
+ trial = data.get("trial")
1723
+ num_seeds = data.get("num_seeds") # N for minibatch
1724
+
1725
+ if minibatch_score is None:
1726
+ return
1727
+
1728
+ try:
1729
+ score_float = float(minibatch_score)
1730
+ except (ValueError, TypeError):
1731
+ return
1732
+
1733
+ # Update best score if this is better
1734
+ if best_score is not None:
1735
+ best_float = float(best_score)
1736
+ if best_float > self.best_score_so_far:
1737
+ self.best_score_so_far = best_float
1738
+ elif score_float > self.best_score_so_far:
1739
+ self.best_score_so_far = score_float
1740
+
1741
+ # Track optimization curve
1742
+ if trial is not None:
1743
+ # Use cumulative trial count for x-axis
1744
+ cumulative_trials = data.get("cumulative_trials")
1745
+ if cumulative_trials is not None:
1746
+ trial_num = cumulative_trials
1747
+ else:
1748
+ # Estimate: (iteration * trials_per_iteration) + trial
1749
+ if iteration is not None and self.mipro_trials_per_iteration:
1750
+ trial_num = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
1751
+ else:
1752
+ trial_num = self.trial_counter + 1
1753
+
1754
+ self.optimization_curve.append((trial_num, self.best_score_so_far))
1755
+ self.trial_counter = trial_num
1756
+
1757
+ # Format like GEPA: [Trial X] Score: X (Best: Y) N=Z
1758
+ trial_num_display = self.trial_counter if self.trial_counter > 0 else (trial + 1 if trial is not None else 1)
1759
+ n_str = f" N={num_seeds}" if num_seeds is not None else ""
1760
+
1761
+ click.echo(
1762
+ f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f} (Best: {self.best_score_so_far:.4f}){n_str}"
1763
+ )
1764
+
1765
+ # Emit progress update after each trial (throttled internally)
1766
+ self._emit_mipro_progress()
1767
+
1768
+ def _handle_mipro_budget_update(self, event_data: dict[str, Any]) -> None:
1769
+ """Handle MIPRO budget update events.
1770
+
1771
+ Tracks token usage and cost accumulation during optimization. Updates:
1772
+ - Total tokens consumed (all operations)
1773
+ - Policy tokens (rollout tokens only)
1774
+ - Total cost in USD
1775
+ - Max token and cost limits (if provided in event)
1776
+
1777
+ Emits throttled progress updates to show budget consumption.
1778
+
1779
+ Args:
1780
+ event_data: Event data dictionary containing:
1781
+ - data.total_tokens: Total tokens consumed
1782
+ - data.policy_tokens: Tokens used for rollouts (policy only)
1783
+ - data.total_cost_usd: Total cost in USD
1784
+ - data.max_token_limit: Maximum token budget (optional)
1785
+ - data.max_spend_usd: Maximum cost budget (optional)
1786
+ """
1787
+ data = event_data.get("data", {})
1788
+ if not isinstance(data, dict):
1789
+ return
1790
+
1791
+ # Update token tracking
1792
+ total_tokens = data.get("total_tokens")
1793
+ if total_tokens is not None:
1794
+ self.mipro_total_tokens = total_tokens
1795
+
1796
+ # Track policy tokens separately (rollout tokens)
1797
+ policy_tokens = data.get("policy_tokens")
1798
+ if policy_tokens is not None:
1799
+ self.mipro_policy_tokens = policy_tokens
1800
+
1801
+ # Update cost tracking
1802
+ total_cost = data.get("total_cost_usd")
1803
+ if total_cost is not None:
1804
+ self.mipro_total_cost = total_cost
1805
+
1806
+ # Extract max limits if available in event data
1807
+ max_token_limit = data.get("max_token_limit")
1808
+ if max_token_limit is not None:
1809
+ self.mipro_max_tokens = max_token_limit
1810
+
1811
+ max_spend_usd = data.get("max_spend_usd")
1812
+ if max_spend_usd is not None:
1813
+ self.mipro_max_cost = max_spend_usd
1814
+
1815
+ # Emit progress update periodically (throttled)
1816
+ self._emit_mipro_progress()
1817
+
1818
+ def _emit_mipro_progress(self) -> None:
1819
+ """Emit a comprehensive progress update for MIPRO (throttled).
1820
+
1821
+ Formats and displays MIPRO progress in a format similar to GEPA for consistency.
1822
+ Shows:
1823
+ - Overall completion percentage
1824
+ - Trial progress (completed/total with remaining)
1825
+ - Iteration progress (current/total)
1826
+ - Rollout progress (completed/max)
1827
+ - Token usage (used/budget in millions)
1828
+ - Cost (USD)
1829
+ - Elapsed time and ETA
1830
+
1831
+ Progress updates are throttled to emit at most every 5 seconds to avoid
1832
+ overwhelming the console. This method is called after significant events
1833
+ (trial completion, iteration completion, budget updates).
1834
+
1835
+ Note:
1836
+ Only emits if start_time is set (job has started) and sufficient time
1837
+ has passed since the last update.
1838
+ """
1839
+ import time
1840
+
1841
+ if self.mipro_start_time is None:
1842
+ return
1843
+
1844
+ # Throttle progress updates - only emit every N seconds
1845
+ now = time.time()
1846
+ if self._last_progress_emit_time is not None:
1847
+ time_since_last = now - self._last_progress_emit_time
1848
+ if time_since_last < self._progress_emit_interval:
1849
+ return # Skip this update
1850
+
1851
+ self._last_progress_emit_time = now
1852
+
1853
+ timestamp = datetime.now().strftime("%H:%M:%S")
1854
+ elapsed = now - self.mipro_start_time
1855
+
1856
+ parts = []
1857
+
1858
+ # Overall progress percentage
1859
+ percent_overall = None
1860
+ if self.mipro_total_trials and self.mipro_completed_trials is not None:
1861
+ percent_overall = (self.mipro_completed_trials / self.mipro_total_trials) * 100
1862
+ parts.append(f"{int(percent_overall)}% complete")
1863
+
1864
+ # Trial progress (like rollouts in GEPA)
1865
+ if self.mipro_total_trials and self.mipro_completed_trials is not None:
1866
+ parts.append(f"trials={self.mipro_completed_trials}/{self.mipro_total_trials}")
1867
+ # Calculate remaining trials
1868
+ remaining_trials = self.mipro_total_trials - self.mipro_completed_trials
1869
+ if remaining_trials > 0:
1870
+ parts.append(f"rem={remaining_trials}")
1871
+ # Show percentage
1872
+ if percent_overall is not None:
1873
+ parts.append(f"({int(percent_overall)}%)")
1874
+ elif self.mipro_completed_trials is not None:
1875
+ parts.append(f"trials={self.mipro_completed_trials}")
1876
+
1877
+ # Iteration progress
1878
+ if self.mipro_num_iterations and self.mipro_current_iteration is not None:
1879
+ parts.append(f"iter={self.mipro_current_iteration + 1}/{self.mipro_num_iterations}")
1880
+
1881
+ # Rollouts completed vs max (like GEPA) - always show if we have any rollouts
1882
+ if self.mipro_rollouts_completed > 0:
1883
+ # Always try to show max if available (from TOML, event, or estimate)
1884
+ max_rollouts_to_show = self.mipro_max_rollouts
1885
+ if max_rollouts_to_show is None and self.mipro_total_trials and self.mipro_batch_size:
1886
+ # Estimate max rollouts from total trials if available
1887
+ max_rollouts_to_show = self.mipro_total_trials * self.mipro_batch_size
1888
+
1889
+ if max_rollouts_to_show:
1890
+ rollouts_pct = (self.mipro_rollouts_completed / max_rollouts_to_show) * 100
1891
+ parts.append(f"rollouts={self.mipro_rollouts_completed}/{max_rollouts_to_show} ({int(rollouts_pct)}%)")
1892
+ else:
1893
+ parts.append(f"rollouts={self.mipro_rollouts_completed}")
1894
+
1895
+ # Tokens (policy tokens only, like GEPA rollout_tokens) - always show max if available
1896
+ if self.mipro_policy_tokens > 0:
1897
+ rollout_tokens_millions = self.mipro_policy_tokens / 1_000_000.0
1898
+ if self.mipro_max_tokens:
1899
+ # Use max_tokens as budget for rollout tokens (approximation)
1900
+ budget_millions = self.mipro_max_tokens / 1_000_000.0
1901
+ tokens_pct = (self.mipro_policy_tokens / self.mipro_max_tokens * 100) if self.mipro_max_tokens > 0 else 0
1902
+ parts.append(f"tokens={rollout_tokens_millions:.2f}M/{budget_millions:.2f}M ({int(tokens_pct)}%)")
1903
+ else:
1904
+ parts.append(f"tokens={rollout_tokens_millions:.2f}M")
1905
+
1906
+ # Timing (elapsed out of max, like GEPA)
1907
+ elapsed_seconds = int(elapsed)
1908
+ if self.mipro_max_time_seconds:
1909
+ elapsed_pct = (elapsed / self.mipro_max_time_seconds * 100) if self.mipro_max_time_seconds > 0 else 0
1910
+ max_time_minutes = self.mipro_max_time_seconds / 60.0
1911
+ if elapsed_seconds >= 60:
1912
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min/{max_time_minutes:.1f}min ({int(elapsed_pct)}%)"
1913
+ else:
1914
+ elapsed_str = f"{elapsed_seconds}s/{int(self.mipro_max_time_seconds)}s ({int(elapsed_pct)}%)"
1915
+ else:
1916
+ if elapsed_seconds >= 60:
1917
+ elapsed_str = f"{elapsed_seconds / 60:.1f}min"
1918
+ else:
1919
+ elapsed_str = f"{elapsed_seconds}s"
1920
+ parts.append(f"elapsed={elapsed_str}")
1921
+
1922
+ # ETA calculation (similar to GEPA) - always show if we have progress
1923
+ eta_seconds = None
1924
+ if self.mipro_completed_trials is not None and self.mipro_completed_trials > 0 and elapsed > 0:
1925
+ rate = self.mipro_completed_trials / elapsed
1926
+ if rate > 0:
1927
+ if self.mipro_total_trials:
1928
+ # Calculate ETA based on remaining trials
1929
+ remaining = self.mipro_total_trials - self.mipro_completed_trials
1930
+ if remaining > 0:
1931
+ eta_seconds = remaining / rate
1932
+ else:
1933
+ # Estimate based on iterations if we don't have total trials
1934
+ if self.mipro_num_iterations and self.mipro_current_iteration is not None:
1935
+ remaining_iterations = self.mipro_num_iterations - (self.mipro_current_iteration + 1)
1936
+ if remaining_iterations > 0 and self.mipro_trials_per_iteration:
1937
+ # Estimate: assume same rate for remaining iterations
1938
+ remaining_trials_estimate = remaining_iterations * self.mipro_trials_per_iteration
1939
+ eta_seconds = remaining_trials_estimate / rate
1940
+
1941
+ if eta_seconds is not None and eta_seconds > 0:
1942
+ eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
1943
+ parts.append(f"eta={eta_str}")
1944
+
1945
+ if parts:
1946
+ progress_msg = " ".join(parts)
1947
+ self._write_log(f"[{timestamp}] Progress: {progress_msg}")
1948
+
1949
+ def flush(self) -> None:
1950
+ """Flush buffered output and close log file."""
1951
+ if self._log_file_handle:
1952
+ try:
1953
+ from datetime import datetime
1954
+ self._log_file_handle.write("\n" + "=" * 80 + "\n")
1955
+ self._log_file_handle.write(f"Ended: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
1956
+ self._log_file_handle.write("=" * 80 + "\n")
1957
+ self._log_file_handle.flush()
1958
+ self._log_file_handle.close()
1959
+ except Exception:
1960
+ pass
1961
+ finally:
1962
+ self._log_file_handle = None
1963
+
1964
+ def _handle_proposal_scored(self, event_data: dict[str, Any]) -> None:
1965
+ """Handle GEPA proposal scored events (transformations).
1966
+
1967
+ Displays transformation/proposal scoring events from GEPA optimization.
1968
+ Only called if show_transformations is True (default: False) to avoid
1969
+ verbose output. Shows the score assigned to each proposed transformation.
1970
+
1971
+ Args:
1972
+ event_data: Event data dictionary containing:
1973
+ - data.score: Score assigned to the transformation/proposal
1974
+ """
1975
+ # Only called if show_transformations=True
1976
+ data = event_data.get("data", {})
1977
+ if not isinstance(data, dict):
1978
+ return
1979
+
1980
+ timestamp = datetime.now().strftime("%H:%M:%S")
1981
+ score = data.get("score")
1982
+ if score is not None:
1983
+ click.echo(f"[{timestamp}] Proposal scored: {score:.4f}")
1984
+
1985
+
1986
+ __all__ = [
1987
+ "GraphGenHandler",
1988
+ "BufferedHandler",
1989
+ "CallbackHandler",
1990
+ "CLIHandler",
1991
+ "PromptLearningHandler",
1992
+ "JSONHandler",
1993
+ "IntegrationTestHandler",
1994
+ "LossCurveHandler",
1995
+ "RichHandler",
1996
+ "StreamHandler",
1997
+ ]