synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1086) hide show
  1. synth_ai/__init__.py +25 -46
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +98 -72
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +258 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +51 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
  63. synth_ai/cli/commands/train/validation.py +392 -0
  64. synth_ai/cli/commands/train/verifier_schemas.py +200 -0
  65. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  70. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  71. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  72. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  73. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  75. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  79. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  80. synth_ai/cli/demo_apps/math/_common.py +16 -0
  81. synth_ai/cli/demo_apps/math/app.py +38 -0
  82. synth_ai/cli/demo_apps/math/config.toml +75 -0
  83. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  84. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  85. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  86. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  87. synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
  88. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  89. synth_ai/cli/demos/__init__.py +12 -0
  90. synth_ai/cli/demos/demo.py +32 -0
  91. synth_ai/cli/demos/rl_demo.py +254 -0
  92. synth_ai/cli/deploy.py +216 -0
  93. synth_ai/cli/infra/__init__.py +14 -0
  94. synth_ai/cli/infra/balance.py +216 -0
  95. synth_ai/cli/infra/mcp.py +35 -0
  96. synth_ai/cli/infra/modal_app.py +36 -0
  97. synth_ai/cli/infra/setup.py +69 -0
  98. synth_ai/cli/infra/status.py +16 -0
  99. synth_ai/cli/infra/turso.py +77 -0
  100. synth_ai/cli/lib/__init__.py +10 -0
  101. synth_ai/cli/lib/agents.py +76 -0
  102. synth_ai/cli/lib/apps/modal_app.py +101 -0
  103. synth_ai/cli/lib/apps/task_app.py +642 -0
  104. synth_ai/cli/lib/bin.py +39 -0
  105. synth_ai/cli/lib/env.py +375 -0
  106. synth_ai/cli/lib/errors.py +85 -0
  107. synth_ai/cli/lib/modal.py +315 -0
  108. synth_ai/cli/lib/plotting.py +126 -0
  109. synth_ai/cli/lib/prompt_args.py +39 -0
  110. synth_ai/cli/lib/prompts.py +284 -0
  111. synth_ai/cli/lib/sqld.py +122 -0
  112. synth_ai/cli/lib/task_app_discovery.py +884 -0
  113. synth_ai/cli/lib/task_app_env.py +295 -0
  114. synth_ai/cli/lib/train_cfgs.py +300 -0
  115. synth_ai/cli/lib/tunnel_records.py +207 -0
  116. synth_ai/cli/local/__init__.py +14 -0
  117. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  118. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  119. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  120. synth_ai/cli/local/experiment_queue/config.py +128 -0
  121. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  122. synth_ai/cli/local/experiment_queue/database.py +175 -0
  123. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  124. synth_ai/cli/local/experiment_queue/models.py +231 -0
  125. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  126. synth_ai/cli/local/experiment_queue/results.py +373 -0
  127. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  128. synth_ai/cli/local/experiment_queue/service.py +344 -0
  129. synth_ai/cli/local/experiment_queue/status.py +372 -0
  130. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  131. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  132. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  133. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  134. synth_ai/cli/local/session/__init__.py +92 -0
  135. synth_ai/cli/local/session/client.py +383 -0
  136. synth_ai/cli/local/session/constants.py +63 -0
  137. synth_ai/cli/local/session/exceptions.py +105 -0
  138. synth_ai/cli/local/session/manager.py +139 -0
  139. synth_ai/cli/local/session/models.py +89 -0
  140. synth_ai/cli/local/session/query.py +110 -0
  141. synth_ai/cli/root.py +30 -6
  142. synth_ai/cli/task_apps/__init__.py +37 -0
  143. synth_ai/cli/task_apps/commands.py +3145 -0
  144. synth_ai/cli/task_apps/deploy.py +7 -0
  145. synth_ai/cli/task_apps/list.py +26 -0
  146. synth_ai/cli/task_apps/main.py +36 -0
  147. synth_ai/cli/task_apps/modal_serve.py +11 -0
  148. synth_ai/cli/task_apps/serve.py +11 -0
  149. synth_ai/cli/training/__init__.py +8 -0
  150. synth_ai/cli/training/train.py +5 -0
  151. synth_ai/cli/training/train_cfg.py +34 -0
  152. synth_ai/cli/training/watch.py +506 -0
  153. synth_ai/cli/turso.py +34 -55
  154. synth_ai/cli/utils/__init__.py +8 -0
  155. synth_ai/cli/utils/experiments.py +235 -0
  156. synth_ai/cli/utils/queue.py +504 -0
  157. synth_ai/cli/utils/recent.py +133 -0
  158. synth_ai/cli/utils/traces.py +164 -0
  159. synth_ai/contracts/__init__.py +67 -0
  160. synth_ai/core/__init__.py +100 -0
  161. synth_ai/core/_utils/__init__.py +54 -0
  162. synth_ai/core/_utils/base_url.py +10 -0
  163. synth_ai/core/_utils/http.py +10 -0
  164. synth_ai/core/_utils/prompts.py +14 -0
  165. synth_ai/core/_utils/task_app_state.py +12 -0
  166. synth_ai/core/_utils/user_config.py +10 -0
  167. synth_ai/core/apps/common.py +116 -0
  168. synth_ai/core/auth.py +95 -0
  169. synth_ai/core/cfgs.py +240 -0
  170. synth_ai/core/config/__init__.py +16 -0
  171. synth_ai/core/config/base.py +168 -0
  172. synth_ai/core/config/resolver.py +89 -0
  173. synth_ai/core/env.py +231 -0
  174. synth_ai/core/errors.py +125 -0
  175. synth_ai/core/http.py +230 -0
  176. synth_ai/core/integrations/__init__.py +11 -0
  177. synth_ai/core/integrations/cloudflare.py +1886 -0
  178. synth_ai/core/integrations/mcp/__init__.py +6 -0
  179. synth_ai/core/integrations/mcp/__main__.py +8 -0
  180. synth_ai/core/integrations/mcp/claude.py +36 -0
  181. synth_ai/core/integrations/mcp/main.py +254 -0
  182. synth_ai/core/integrations/mcp/setup.py +100 -0
  183. synth_ai/core/integrations/modal.py +277 -0
  184. synth_ai/core/json.py +72 -0
  185. synth_ai/core/log_filter.py +99 -0
  186. synth_ai/core/logging.py +82 -0
  187. synth_ai/core/paths.py +107 -0
  188. synth_ai/core/pricing.py +109 -0
  189. synth_ai/core/process.py +233 -0
  190. synth_ai/core/ssl.py +25 -0
  191. synth_ai/core/storage/__init__.py +71 -0
  192. synth_ai/core/task_app_state.py +318 -0
  193. synth_ai/core/telemetry.py +282 -0
  194. synth_ai/core/tracing_v3/__init__.py +99 -0
  195. synth_ai/core/tracing_v3/abstractions.py +348 -0
  196. synth_ai/core/tracing_v3/config.py +229 -0
  197. synth_ai/core/tracing_v3/constants.py +21 -0
  198. synth_ai/core/tracing_v3/db_config.py +182 -0
  199. synth_ai/core/tracing_v3/decorators.py +401 -0
  200. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  201. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  202. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  203. synth_ai/core/tracing_v3/storage/base.py +211 -0
  204. synth_ai/core/tracing_v3/storage/config.py +109 -0
  205. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  206. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  207. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  208. synth_ai/core/tracing_v3/turso/models.py +470 -0
  209. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  210. synth_ai/core/tracing_v3/utils.py +108 -0
  211. synth_ai/core/urls.py +18 -0
  212. synth_ai/core/user_config.py +137 -0
  213. synth_ai/core/uvicorn.py +222 -0
  214. synth_ai/data/__init__.py +83 -0
  215. synth_ai/data/enums.py +122 -0
  216. synth_ai/data/rewards.py +249 -0
  217. synth_ai/data/traces.py +35 -0
  218. synth_ai/products/__init__.py +6 -0
  219. synth_ai/products/graph_evolve/__init__.py +45 -0
  220. synth_ai/products/graph_evolve/client.py +226 -0
  221. synth_ai/products/graph_evolve/config.py +591 -0
  222. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  223. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  224. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  225. synth_ai/products/graph_evolve/run.py +222 -0
  226. synth_ai/products/graph_gepa/__init__.py +23 -0
  227. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  228. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  229. synth_ai/sdk/__init__.py +129 -0
  230. synth_ai/sdk/api/__init__.py +1 -0
  231. synth_ai/sdk/api/eval/__init__.py +33 -0
  232. synth_ai/sdk/api/eval/job.py +732 -0
  233. synth_ai/sdk/api/models/supported.py +514 -0
  234. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  235. synth_ai/sdk/api/train/__init__.py +85 -0
  236. synth_ai/sdk/api/train/builders.py +1076 -0
  237. synth_ai/sdk/api/train/cli.py +2196 -0
  238. synth_ai/sdk/api/train/config_finder.py +267 -0
  239. synth_ai/sdk/api/train/configs/__init__.py +67 -0
  240. synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
  241. synth_ai/sdk/api/train/configs/rl.py +436 -0
  242. synth_ai/sdk/api/train/configs/sft.py +263 -0
  243. synth_ai/sdk/api/train/configs/shared.py +81 -0
  244. synth_ai/sdk/api/train/context_learning.py +312 -0
  245. synth_ai/sdk/api/train/env_resolver.py +418 -0
  246. synth_ai/sdk/api/train/graph_validators.py +216 -0
  247. synth_ai/sdk/api/train/graphgen.py +1102 -0
  248. synth_ai/sdk/api/train/graphgen_models.py +873 -0
  249. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  250. synth_ai/sdk/api/train/local_api.py +10 -0
  251. synth_ai/sdk/api/train/pollers.py +160 -0
  252. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  253. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  254. synth_ai/sdk/api/train/progress/events.py +326 -0
  255. synth_ai/sdk/api/train/progress/results.py +428 -0
  256. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  257. synth_ai/sdk/api/train/prompt_learning.py +800 -0
  258. synth_ai/sdk/api/train/rl.py +478 -0
  259. synth_ai/sdk/api/train/sft.py +398 -0
  260. synth_ai/sdk/api/train/summary.py +522 -0
  261. synth_ai/sdk/api/train/supported_algos.py +147 -0
  262. synth_ai/sdk/api/train/task_app.py +351 -0
  263. synth_ai/sdk/api/train/utils.py +279 -0
  264. synth_ai/sdk/api/train/validators.py +2424 -0
  265. synth_ai/sdk/graphs/__init__.py +15 -0
  266. synth_ai/sdk/graphs/completions.py +776 -0
  267. synth_ai/sdk/graphs/verifier_schemas.py +222 -0
  268. synth_ai/sdk/inference/__init__.py +6 -0
  269. synth_ai/sdk/inference/client.py +128 -0
  270. synth_ai/sdk/jobs/__init__.py +16 -0
  271. synth_ai/sdk/jobs/client.py +371 -0
  272. synth_ai/sdk/learning/__init__.py +99 -0
  273. synth_ai/sdk/learning/client.py +240 -0
  274. synth_ai/sdk/learning/context_learning_client.py +531 -0
  275. synth_ai/sdk/learning/context_learning_types.py +294 -0
  276. synth_ai/sdk/learning/ft_client.py +7 -0
  277. synth_ai/sdk/learning/health.py +49 -0
  278. synth_ai/sdk/learning/jobs.py +202 -0
  279. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  280. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  281. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  282. synth_ai/sdk/learning/rl/__init__.py +35 -0
  283. synth_ai/sdk/learning/rl/client.py +268 -0
  284. synth_ai/sdk/learning/rl/contracts.py +23 -0
  285. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  286. synth_ai/sdk/learning/rl/secrets.py +13 -0
  287. synth_ai/sdk/learning/sft/client.py +95 -0
  288. synth_ai/sdk/learning/sft/config.py +270 -0
  289. synth_ai/sdk/learning/sft/data.py +698 -0
  290. synth_ai/sdk/learning/validators.py +52 -0
  291. synth_ai/sdk/localapi/__init__.py +40 -0
  292. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  293. synth_ai/sdk/localapi/client.py +10 -0
  294. synth_ai/sdk/localapi/contracts.py +10 -0
  295. synth_ai/sdk/localapi/helpers.py +519 -0
  296. synth_ai/sdk/localapi/rollouts.py +93 -0
  297. synth_ai/sdk/localapi/server.py +29 -0
  298. synth_ai/sdk/localapi/template.py +49 -0
  299. synth_ai/sdk/streaming/__init__.py +35 -0
  300. synth_ai/sdk/streaming/config.py +94 -0
  301. synth_ai/sdk/streaming/handlers.py +1997 -0
  302. synth_ai/sdk/streaming/streamer.py +708 -0
  303. synth_ai/sdk/streaming/types.py +112 -0
  304. synth_ai/sdk/task/__init__.py +164 -0
  305. synth_ai/sdk/task/apps/__init__.py +169 -0
  306. synth_ai/sdk/task/client.py +175 -0
  307. synth_ai/sdk/task/config.py +256 -0
  308. synth_ai/sdk/task/contracts.py +340 -0
  309. synth_ai/sdk/task/datasets.py +108 -0
  310. synth_ai/sdk/task/in_process.py +1200 -0
  311. synth_ai/sdk/task/in_process_runner.py +314 -0
  312. synth_ai/sdk/task/inference_api.py +299 -0
  313. synth_ai/sdk/task/proxy.py +287 -0
  314. synth_ai/sdk/task/rubrics/__init__.py +54 -0
  315. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  316. synth_ai/sdk/task/rubrics/strict.py +148 -0
  317. synth_ai/sdk/task/rubrics.py +219 -0
  318. synth_ai/sdk/task/server.py +640 -0
  319. synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
  320. synth_ai/sdk/task/tracing_utils.py +95 -0
  321. synth_ai/sdk/task/validators.py +441 -0
  322. synth_ai/sdk/training/__init__.py +93 -0
  323. synth_ai/sdk/tunnels/__init__.py +118 -0
  324. synth_ai/sdk/tunnels/cleanup.py +83 -0
  325. synth_ai/sdk/tunnels/ports.py +120 -0
  326. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  327. synth_ai/utils/__init__.py +213 -0
  328. synth_ai-0.4.4.dist-info/METADATA +262 -0
  329. synth_ai-0.4.4.dist-info/RECORD +369 -0
  330. synth_ai-0.4.4.dist-info/top_level.txt +1 -0
  331. examples/__init__.py +0 -16
  332. examples/analyze_semantic_words.sh +0 -17
  333. examples/crafter_debug_render.py +0 -186
  334. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  335. examples/multi_step/configs/README_verilog_rl.md +0 -77
  336. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  337. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  338. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  339. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  340. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  341. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  342. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  343. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  344. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  345. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  346. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  347. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  348. examples/multi_step/crafter_rl_lora.md +0 -70
  349. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  350. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  351. examples/multi_step/readme.md +0 -48
  352. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  353. examples/multi_step/task_app_config_notes.md +0 -494
  354. examples/multi_step/verilog_rl_lora.md +0 -218
  355. examples/qwen_coder/README.md +0 -102
  356. examples/qwen_coder/_shared.py +0 -113
  357. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  358. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  359. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  360. examples/qwen_coder/generate_dataset.py +0 -98
  361. examples/qwen_coder/infer_ft_smoke.py +0 -65
  362. examples/qwen_coder/infer_prod_proxy.py +0 -73
  363. examples/qwen_coder/infer_via_synth.py +0 -87
  364. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  365. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  366. examples/qwen_coder/sft_full_17b.py +0 -103
  367. examples/qwen_coder/sft_lora_30b.py +0 -110
  368. examples/qwen_coder/subset_jsonl.py +0 -39
  369. examples/qwen_coder/todos.md +0 -38
  370. examples/qwen_coder/validate_jsonl.py +0 -60
  371. examples/rl/README.md +0 -169
  372. examples/rl/download_dataset.py +0 -80
  373. examples/run_crafter_demo.sh +0 -10
  374. examples/sft/README.md +0 -139
  375. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  376. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  377. examples/sft/evaluate.py +0 -119
  378. examples/sft/export_dataset.py +0 -117
  379. examples/sft/generate_traces.py +0 -164
  380. examples/swe/__init__.py +0 -12
  381. examples/swe/task_app/README.md +0 -105
  382. examples/swe/task_app/__init__.py +0 -2
  383. examples/swe/task_app/grpo_swe_mini.py +0 -601
  384. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  385. examples/swe/task_app/hosted/README.md +0 -173
  386. examples/swe/task_app/hosted/__init__.py +0 -5
  387. examples/swe/task_app/hosted/branching.py +0 -143
  388. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  389. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  390. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  391. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  392. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  393. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  394. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  395. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  396. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  397. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  398. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  399. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  400. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  401. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  402. examples/swe/task_app/hosted/hosted_app.py +0 -204
  403. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  404. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  405. examples/swe/task_app/hosted/main.py +0 -100
  406. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  407. examples/swe/task_app/hosted/registry.py +0 -195
  408. examples/swe/task_app/hosted/rollout.py +0 -1911
  409. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  410. examples/swe/task_app/hosted/storage/volume.py +0 -211
  411. examples/swe/task_app/hosted/test_agents.py +0 -161
  412. examples/swe/task_app/hosted/test_service.py +0 -136
  413. examples/swe/task_app/hosted/utils.py +0 -62
  414. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  415. examples/task_apps/TESTING.md +0 -275
  416. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  417. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  418. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  419. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  420. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  421. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  422. examples/task_apps/crafter/__init__.py +0 -0
  423. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  424. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  425. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  426. examples/task_apps/crafter/task_app/README.md +0 -42
  427. examples/task_apps/crafter/task_app/__init__.py +0 -5
  428. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  429. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  430. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  431. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  432. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  433. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  434. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  454. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  455. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  456. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  457. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  458. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  459. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  510. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  511. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  512. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  513. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  514. examples/task_apps/enron/__init__.py +0 -1
  515. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  516. examples/task_apps/enron/filter_sft.toml +0 -5
  517. examples/task_apps/enron/task_app/README.md +0 -14
  518. examples/task_apps/enron/task_app/__init__.py +0 -1
  519. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  520. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  521. examples/task_apps/enron/tests/__init__.py +0 -4
  522. examples/task_apps/enron/tests/conftest.py +0 -115
  523. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  524. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  525. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  526. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  527. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  528. examples/task_apps/math/README.md +0 -22
  529. examples/task_apps/math/__init__.py +0 -0
  530. examples/task_apps/math/math_single_step.py +0 -1000
  531. examples/task_apps/math/math_task_app.py +0 -115
  532. examples/task_apps/pokemon_battle/__init__.py +0 -2
  533. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  534. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  535. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  536. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  537. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  538. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  539. examples/task_apps/pokemon_red/README.md +0 -357
  540. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  541. examples/task_apps/pokemon_red/__init__.py +0 -3
  542. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  543. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  544. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  545. examples/task_apps/pokemon_red/task_app.py +0 -799
  546. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  547. examples/task_apps/sokoban/README.md +0 -307
  548. examples/task_apps/sokoban/__init__.py +0 -3
  549. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  550. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  551. examples/task_apps/sokoban/filter_sft.toml +0 -5
  552. examples/task_apps/sokoban/task_app.py +0 -1058
  553. examples/task_apps/sokoban/tests/__init__.py +0 -4
  554. examples/task_apps/sokoban/tests/conftest.py +0 -113
  555. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  556. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  557. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  558. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  560. examples/task_apps/verilog/__init__.py +0 -1
  561. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  562. examples/task_apps/verilog/filter_sft.toml +0 -5
  563. examples/task_apps/verilog/task_app/README.md +0 -12
  564. examples/task_apps/verilog/task_app/__init__.py +0 -1
  565. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  566. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  567. examples/task_apps/verilog/tests/__init__.py +0 -4
  568. examples/task_apps/verilog/tests/conftest.py +0 -115
  569. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  570. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  571. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  572. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  574. examples/vlm/PROPOSAL.md +0 -53
  575. examples/vlm/README.md +0 -68
  576. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  577. examples/vlm/crafter_image_only_agent.py +0 -207
  578. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  579. examples/vlm/filter_image_rows.py +0 -63
  580. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  581. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  582. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  583. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  584. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  585. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  586. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  587. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  588. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  589. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  590. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  591. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  592. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  593. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  594. examples/warming_up_to_rl/groq_test.py +0 -97
  595. examples/warming_up_to_rl/manage_secrets.py +0 -131
  596. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  597. examples/warming_up_to_rl/old/notes.md +0 -73
  598. examples/warming_up_to_rl/readme.md +0 -179
  599. examples/warming_up_to_rl/run_eval.py +0 -736
  600. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  601. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  602. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  603. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  604. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  605. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  606. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  607. examples/workflows/__init__.py +0 -0
  608. examples/workflows/math_rl/__init__.py +0 -0
  609. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  610. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  611. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  612. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  613. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  614. examples/workflows/math_rl/download_dataset.py +0 -80
  615. examples/workflows/math_rl/run_eval.py +0 -436
  616. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  617. synth_ai/api/models/supported.py +0 -377
  618. synth_ai/api/train/__init__.py +0 -5
  619. synth_ai/api/train/builders.py +0 -351
  620. synth_ai/api/train/cli.py +0 -635
  621. synth_ai/api/train/config_finder.py +0 -228
  622. synth_ai/api/train/configs/__init__.py +0 -44
  623. synth_ai/api/train/configs/rl.py +0 -134
  624. synth_ai/api/train/configs/sft.py +0 -95
  625. synth_ai/api/train/configs/shared.py +0 -24
  626. synth_ai/api/train/env_resolver.py +0 -349
  627. synth_ai/api/train/pollers.py +0 -75
  628. synth_ai/api/train/supported_algos.py +0 -147
  629. synth_ai/api/train/task_app.py +0 -195
  630. synth_ai/api/train/utils.py +0 -225
  631. synth_ai/cli/_modal_wrapper.py +0 -29
  632. synth_ai/cli/_storage.py +0 -20
  633. synth_ai/cli/_typer_patch.py +0 -49
  634. synth_ai/cli/_validate_task_app.py +0 -11
  635. synth_ai/cli/balance.py +0 -216
  636. synth_ai/cli/calc.py +0 -84
  637. synth_ai/cli/demo.py +0 -165
  638. synth_ai/cli/legacy_root_backup.py +0 -468
  639. synth_ai/cli/man.py +0 -106
  640. synth_ai/cli/recent.py +0 -132
  641. synth_ai/cli/rl_demo.py +0 -254
  642. synth_ai/cli/status.py +0 -134
  643. synth_ai/cli/task_apps.py +0 -4523
  644. synth_ai/cli/traces.py +0 -164
  645. synth_ai/cli/tui.py +0 -57
  646. synth_ai/cli/watch.py +0 -506
  647. synth_ai/compound/cais.py +0 -0
  648. synth_ai/config/base_url.py +0 -107
  649. synth_ai/core/experiment.py +0 -13
  650. synth_ai/core/system.py +0 -15
  651. synth_ai/demo_registry.py +0 -295
  652. synth_ai/demos/core/__init__.py +0 -1
  653. synth_ai/demos/core/cli.py +0 -1718
  654. synth_ai/demos/demo_task_apps/core.py +0 -440
  655. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  656. synth_ai/demos/demo_task_apps/math/config.toml +0 -74
  657. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  658. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  659. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  660. synth_ai/environments/__init__.py +0 -31
  661. synth_ai/environments/environment/__init__.py +0 -1
  662. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  663. synth_ai/environments/environment/artifacts/base.py +0 -52
  664. synth_ai/environments/environment/core.py +0 -67
  665. synth_ai/environments/environment/db/__init__.py +0 -1
  666. synth_ai/environments/environment/db/sqlite.py +0 -45
  667. synth_ai/environments/environment/registry.py +0 -233
  668. synth_ai/environments/environment/resources/sqlite.py +0 -45
  669. synth_ai/environments/environment/results.py +0 -1
  670. synth_ai/environments/environment/rewards/__init__.py +0 -1
  671. synth_ai/environments/environment/rewards/core.py +0 -29
  672. synth_ai/environments/environment/shared_engine.py +0 -26
  673. synth_ai/environments/environment/tools/__init__.py +0 -200
  674. synth_ai/environments/examples/__init__.py +0 -1
  675. synth_ai/environments/examples/bandit/__init__.py +0 -33
  676. synth_ai/environments/examples/bandit/engine.py +0 -302
  677. synth_ai/environments/examples/bandit/environment.py +0 -194
  678. synth_ai/environments/examples/bandit/taskset.py +0 -200
  679. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  680. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  681. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  682. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  683. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  684. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  716. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  717. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  718. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  719. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  720. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  744. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  745. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  746. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  747. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  748. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  749. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  750. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  751. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  752. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  753. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  754. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  755. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  756. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  757. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  758. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  759. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  760. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  761. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  762. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  763. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  764. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  765. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  766. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  767. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  768. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  769. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  770. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  771. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  772. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  773. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  774. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  775. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  776. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  777. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  778. synth_ai/environments/examples/enron/engine.py +0 -300
  779. synth_ai/environments/examples/enron/environment.py +0 -234
  780. synth_ai/environments/examples/enron/taskset.py +0 -112
  781. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  782. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  783. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  784. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  785. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  786. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  787. synth_ai/environments/examples/minigrid/engine.py +0 -589
  788. synth_ai/environments/examples/minigrid/environment.py +0 -274
  789. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  790. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  791. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  792. synth_ai/environments/examples/nethack/__init__.py +0 -7
  793. synth_ai/environments/examples/nethack/achievements.py +0 -337
  794. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  795. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  796. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  797. synth_ai/environments/examples/nethack/engine.py +0 -739
  798. synth_ai/environments/examples/nethack/environment.py +0 -256
  799. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  800. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  801. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  802. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  803. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  804. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  805. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  806. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  807. synth_ai/environments/examples/nethack/taskset.py +0 -323
  808. synth_ai/environments/examples/red/__init__.py +0 -7
  809. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  810. synth_ai/environments/examples/red/config_logging.py +0 -110
  811. synth_ai/environments/examples/red/engine.py +0 -721
  812. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  813. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  814. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  815. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  816. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  817. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  818. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  828. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  829. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  830. synth_ai/environments/examples/red/environment.py +0 -298
  831. synth_ai/environments/examples/red/taskset.py +0 -79
  832. synth_ai/environments/examples/red/units/__init__.py +0 -1
  833. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  834. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  835. synth_ai/environments/examples/sokoban/engine.py +0 -678
  836. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  838. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  839. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  840. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  841. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  848. synth_ai/environments/examples/sokoban/environment.py +0 -229
  849. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  850. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  851. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  852. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  853. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  854. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  855. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  856. synth_ai/environments/examples/verilog/__init__.py +0 -10
  857. synth_ai/environments/examples/verilog/engine.py +0 -421
  858. synth_ai/environments/examples/verilog/environment.py +0 -350
  859. synth_ai/environments/examples/verilog/taskset.py +0 -420
  860. synth_ai/environments/examples/wordle/__init__.py +0 -29
  861. synth_ai/environments/examples/wordle/engine.py +0 -398
  862. synth_ai/environments/examples/wordle/environment.py +0 -159
  863. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  864. synth_ai/environments/examples/wordle/taskset.py +0 -230
  865. synth_ai/environments/reproducibility/core.py +0 -42
  866. synth_ai/environments/reproducibility/helpers.py +0 -0
  867. synth_ai/environments/reproducibility/tree.py +0 -363
  868. synth_ai/environments/service/app.py +0 -97
  869. synth_ai/environments/service/core_routes.py +0 -1021
  870. synth_ai/environments/service/external_registry.py +0 -56
  871. synth_ai/environments/service/registry.py +0 -9
  872. synth_ai/environments/stateful/__init__.py +0 -1
  873. synth_ai/environments/stateful/core.py +0 -163
  874. synth_ai/environments/stateful/engine.py +0 -21
  875. synth_ai/environments/stateful/state.py +0 -7
  876. synth_ai/environments/tasks/api.py +0 -19
  877. synth_ai/environments/tasks/core.py +0 -81
  878. synth_ai/environments/tasks/filters.py +0 -40
  879. synth_ai/environments/tasks/utils.py +0 -90
  880. synth_ai/environments/v0_observability/history.py +0 -3
  881. synth_ai/environments/v0_observability/log.py +0 -2
  882. synth_ai/evals/__init__.py +0 -15
  883. synth_ai/evals/base.py +0 -13
  884. synth_ai/evals/client.py +0 -82
  885. synth_ai/evals/types.py +0 -42
  886. synth_ai/handshake.py +0 -109
  887. synth_ai/http.py +0 -26
  888. synth_ai/http_client.py +0 -136
  889. synth_ai/inference/__init__.py +0 -5
  890. synth_ai/inference/client.py +0 -34
  891. synth_ai/jobs/client.py +0 -295
  892. synth_ai/judge_schemas.py +0 -127
  893. synth_ai/learning/__init__.py +0 -59
  894. synth_ai/learning/client.py +0 -241
  895. synth_ai/learning/ft_client.py +0 -7
  896. synth_ai/learning/health.py +0 -49
  897. synth_ai/learning/jobs.py +0 -201
  898. synth_ai/learning/rl/__init__.py +0 -39
  899. synth_ai/learning/rl/client.py +0 -267
  900. synth_ai/learning/rl/contracts.py +0 -27
  901. synth_ai/learning/rl/env_keys.py +0 -166
  902. synth_ai/learning/rl/secrets.py +0 -13
  903. synth_ai/learning/sft/client.py +0 -68
  904. synth_ai/learning/sft/config.py +0 -270
  905. synth_ai/learning/sft/data.py +0 -295
  906. synth_ai/learning/validators.py +0 -49
  907. synth_ai/lm/__init__.py +0 -25
  908. synth_ai/task/__init__.py +0 -121
  909. synth_ai/task/apps/__init__.py +0 -129
  910. synth_ai/task/client.py +0 -167
  911. synth_ai/task/config.py +0 -257
  912. synth_ai/task/contracts.py +0 -236
  913. synth_ai/task/datasets.py +0 -108
  914. synth_ai/task/proxy.py +0 -251
  915. synth_ai/task/rubrics/__init__.py +0 -56
  916. synth_ai/task/rubrics/loaders.py +0 -152
  917. synth_ai/task/rubrics/strict.py +0 -149
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1057. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1058. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1068. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1069. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1077. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1078. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1079. /synth_ai/{task → sdk/task}/health.py +0 -0
  1080. /synth_ai/{task → sdk/task}/json.py +0 -0
  1081. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1082. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1083. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1084. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  1085. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  1086. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1800 @@
1
+ """Prompt Learning configuration models for MIPRO and GEPA.
2
+
3
+ This module defines the configuration schema for prompt optimization jobs using:
4
+ - **GEPA**: Genetic Evolution of Prompt Architectures - evolutionary optimization
5
+ - **MIPRO**: Meta-learning with bootstrap phase and TPE optimization
6
+
7
+ Example TOML configuration (GEPA):
8
+ ```toml
9
+ [prompt_learning]
10
+ algorithm = "gepa"
11
+ task_app_url = "https://your-tunnel.trycloudflare.com"
12
+ task_app_api_key = "$ENVIRONMENT_API_KEY"
13
+
14
+ [prompt_learning.policy]
15
+ model = "gpt-4o-mini"
16
+ provider = "openai"
17
+
18
+ [prompt_learning.gepa]
19
+ env_name = "banking77"
20
+ proposer_effort = "LOW"
21
+
22
+ [prompt_learning.gepa.rollout]
23
+ budget = 100
24
+ max_concurrent = 20
25
+
26
+ [prompt_learning.gepa.evaluation]
27
+ seeds = {start = 0, end = 50}
28
+
29
+ [prompt_learning.gepa.population]
30
+ num_generations = 10
31
+ children_per_generation = 5
32
+ ```
33
+
34
+ See Also:
35
+ - Training reference: /training/gepa, /training/mipro
36
+ - Quickstart: /quickstart/prompt-optimization-gepa
37
+ """
38
+ from __future__ import annotations
39
+
40
+ from collections.abc import Mapping, Sequence
41
+ from enum import Enum
42
+ from pathlib import Path
43
+ from typing import Any, Dict, Literal, Optional
44
+
45
+ from pydantic import Field, field_validator, model_validator
46
+
47
+ from ..utils import load_toml
48
+ from .shared import ExtraModel
49
+
50
+
51
+ class SeedRange(ExtraModel):
52
+ """Compact seed range notation for TOML configs.
53
+
54
+ Allows writing `seeds = {start = 0, end = 50}` instead of `seeds = [0, 1, 2, ..., 49]`.
55
+
56
+ Examples:
57
+ seeds = {start = 0, end = 10} # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
58
+ seeds = {start = 0, end = 100, step = 2} # [0, 2, 4, ..., 98]
59
+ """
60
+ start: int
61
+ end: int
62
+ step: int = 1
63
+
64
+ def to_list(self) -> list[int]:
65
+ """Convert range to list of integers."""
66
+ return list(range(self.start, self.end, self.step))
67
+
68
+
69
+ def _parse_seeds(value: Any) -> list[int] | None:
70
+ """Parse seed values that can be either a list or a range dict.
71
+
72
+ Args:
73
+ value: Either a list of ints or a dict with 'start', 'end', and optional 'step'.
74
+
75
+ Returns:
76
+ List of integers, or None if value is None.
77
+
78
+ Examples:
79
+ _parse_seeds([0, 1, 2, 3]) # [0, 1, 2, 3]
80
+ _parse_seeds({"start": 0, "end": 4}) # [0, 1, 2, 3]
81
+ _parse_seeds({"start": 0, "end": 10, "step": 2}) # [0, 2, 4, 6, 8]
82
+ """
83
+ if value is None:
84
+ return None
85
+ if isinstance(value, dict) and "start" in value and "end" in value:
86
+ seed_range = SeedRange.model_validate(value)
87
+ return seed_range.to_list()
88
+ if isinstance(value, list):
89
+ return list(value)
90
+ raise ValueError(f"Seeds must be a list or a range dict with 'start' and 'end' keys, got {type(value).__name__}")
91
+
92
+
93
+ class InferenceMode(str, Enum):
94
+ synth_hosted = "synth_hosted"
95
+
96
+
97
+ class ProviderName(str, Enum):
98
+ openai = "openai"
99
+ groq = "groq"
100
+ google = "google"
101
+
102
+
103
+ class PromptLearningPolicyConfig(ExtraModel):
104
+ """Policy configuration for prompt learning (model, provider, etc.)."""
105
+ model: str
106
+ provider: ProviderName
107
+ inference_url: str | None = None # Optional - trainer provides it in rollout requests (ignored if present)
108
+ inference_mode: InferenceMode = InferenceMode.synth_hosted
109
+ temperature: float = 0.0
110
+ max_completion_tokens: int = 512
111
+ policy_name: str | None = None
112
+
113
+ @field_validator("inference_url", mode="before")
114
+ @classmethod
115
+ def _strip_inference_url(cls, v: str | None) -> str | None:
116
+ """Strip whitespace from inference_url if provided."""
117
+ if v is None:
118
+ return None
119
+ if isinstance(v, str):
120
+ v = v.strip()
121
+ # Validate that URL starts with http:// or https:// if provided (non-empty)
122
+ if v and not v.startswith(("http://", "https://")):
123
+ raise ValueError("inference_url must start with http:// or https://")
124
+ # Reject empty strings after stripping
125
+ if not v:
126
+ raise ValueError("inference_url must start with http:// or https://")
127
+ return v
128
+
129
+
130
+ class MessagePatternConfig(ExtraModel):
131
+ """Configuration for a single message pattern."""
132
+ role: str
133
+ pattern: str
134
+ order: int = 0
135
+
136
+
137
+ class PromptPatternConfig(ExtraModel):
138
+ """Initial prompt pattern configuration."""
139
+ id: str | None = None
140
+ name: str | None = None
141
+ messages: list[MessagePatternConfig] = []
142
+ wildcards: dict[str, str] = Field(default_factory=dict)
143
+
144
+
145
+ class MIPROMetaConfig(ExtraModel):
146
+ """DEPRECATED: Meta-model config is now controlled by proposer_effort and proposer_output_tokens.
147
+
148
+ This class is kept for backwards compatibility but should not be used.
149
+ Use proposer_effort (LOW_CONTEXT, LOW, MEDIUM, HIGH) and proposer_output_tokens (RAPID, FAST, SLOW) instead.
150
+ """
151
+ model: str | None = None
152
+ provider: str | None = None
153
+ inference_url: str | None = None
154
+ temperature: float | None = None
155
+ max_tokens: int | None = None
156
+
157
+
158
+ class MIPROStageConfig(ExtraModel):
159
+ """Configuration for a single MIPRO stage inside a module.
160
+
161
+ Each stage MUST have its own policy configuration. The policy field is required
162
+ and must include 'model' and 'provider' fields.
163
+ """
164
+ stage_id: str
165
+ baseline_instruction: str
166
+ baseline_messages: list[dict[str, str]] = Field(default_factory=list)
167
+ max_instruction_slots: int | None = None
168
+ max_demo_slots: int | None = None
169
+ policy: PromptLearningPolicyConfig | dict[str, Any] = Field(
170
+ ...,
171
+ description="Required per-stage policy configuration. Must include 'model' and 'provider' fields."
172
+ )
173
+
174
+
175
+ class MIPROModuleConfig(ExtraModel):
176
+ """Configuration for a single module in a MIPRO pipeline."""
177
+ module_id: str
178
+ stages: list[MIPROStageConfig] = Field(default_factory=list)
179
+
180
+
181
+ class MIPROSeedConfig(ExtraModel):
182
+ """Seed pools used across bootstrap, optimization, and evaluation."""
183
+ bootstrap: list[int] = Field(default_factory=list)
184
+ online: list[int] = Field(default_factory=list)
185
+ test: list[int] = Field(default_factory=list)
186
+ reference: list[int] = Field(default_factory=list)
187
+
188
+ @field_validator("bootstrap", "online", "test", "reference", mode="before")
189
+ @classmethod
190
+ def _parse_seed_pools(cls, v: Any) -> list[int]:
191
+ """Parse seed pools that can be either a list or range dict."""
192
+ return _parse_seeds(v) or []
193
+
194
+
195
+ class PromptLearningVerifierConfig(ExtraModel):
196
+ """Verifier configuration shared by GEPA and MIPRO.
197
+
198
+ This configures LLM-based evaluation of agent trajectories during prompt optimization.
199
+ You can use standard rubrics or registered Verifier Graphs.
200
+
201
+ Attributes:
202
+ enabled: Whether to enable verifier-based scoring.
203
+ reward_source: Source of the final reward for optimization.
204
+ - "task_app": Use only environment rewards from task app (default).
205
+ - "verifier": Use only verifier quality scores.
206
+ - "fused": Weighted combination of environment and verifier rewards.
207
+ backend_base: Base URL for the verifier service (e.g. "https://api.usesynth.ai").
208
+ backend_api_key_env: Env var containing the Synth API key (default: "SYNTH_API_KEY").
209
+ backend_provider: Provider for the verifier model (e.g. "openai", "groq").
210
+ backend_model: Model used to execute the verifier rubric or graph (e.g. "gpt-4o-mini").
211
+ verifier_graph_id: ID or name of a registered Verifier Graph on the backend.
212
+ backend_event_enabled: Whether to enable fine-grained event-level scoring.
213
+ backend_outcome_enabled: Whether to enable episode-level outcome scoring.
214
+ weight_env: Weight for environment rewards in "fused" mode (default: 1.0).
215
+ weight_event: Weight for verifier event rewards in "fused" mode (default: 0.0).
216
+ weight_outcome: Weight for verifier outcome rewards in "fused" mode (default: 0.0).
217
+ """
218
+ enabled: bool = False
219
+ reward_source: Literal["task_app", "verifier", "fused"] = "task_app"
220
+ backend_base: str = ""
221
+ backend_api_key_env: str = "SYNTH_API_KEY"
222
+ backend_provider: str = ""
223
+ backend_model: str = ""
224
+ verifier_graph_id: str = ""
225
+ backend_event_enabled: bool = True
226
+ backend_outcome_enabled: bool = True
227
+ backend_options: Dict[str, Any] = Field(default_factory=dict)
228
+ concurrency: int = 8
229
+ timeout: float = 60.0
230
+ weight_env: float = 1.0
231
+ weight_event: float = 0.0
232
+ weight_outcome: float = 0.0
233
+ spec_path: Optional[str] = None
234
+ spec_max_tokens: int = 5000
235
+ spec_context: Optional[str] = None
236
+
237
+
238
+ class ProxyModelsConfig(ExtraModel):
239
+ """Configuration for proxy usage on policy evaluations.
240
+
241
+ Uses a low-fidelity (LO) model for most evaluations and a high-fidelity (HI) model
242
+ for verification, with dynamic switching based on calibration and correlation.
243
+
244
+ The proxy system starts by evaluating examples with both HI and LO models to build
245
+ a calibration regression. Once calibrated (R² >= r2_thresh), it switches to using
246
+ only the LO model for most evaluations, falling back to HI when reliability drops.
247
+
248
+ Attributes:
249
+ hi_provider: Provider for high-fidelity model (e.g., "openai", "groq", "google").
250
+ This is the expensive model used for ground-truth evaluations.
251
+ hi_model: High-fidelity model name (e.g., "gpt-4o", "gpt-oss-120b").
252
+ Must be a supported model for the provider.
253
+ lo_provider: Provider for low-fidelity proxy model (e.g., "groq", "openai").
254
+ This is the cheaper model used for most evaluations after calibration.
255
+ lo_model: Low-fidelity proxy model name (e.g., "gpt-oss-20b", "gpt-4o-mini").
256
+ Must be a supported model for the provider. Should be cheaper than hi_model.
257
+ n_min_hi: Minimum number of HI evaluations before allowing proxy substitution.
258
+ Default: 5. Ensures sufficient calibration data before proxying.
259
+ r2_thresh: R² correlation threshold (0.0-1.0) required to enable proxying.
260
+ Default: 0.5. Higher values require stronger correlation before proxying.
261
+ r2_stop: R² threshold (0.0-1.0) below which proxying is disabled.
262
+ Default: 0.2. If correlation drops below this, revert to HI-only.
263
+ sigma_max: Maximum residual variance (sigma²) allowed for proxy calibration.
264
+ Default: 1e6. Higher values allow more variance in predictions.
265
+ sigma_stop: Stop proxying if residual variance exceeds this value.
266
+ Default: 1e9. If variance exceeds this, revert to HI-only.
267
+ verify_every: Periodically verify calibration every N LO-only evaluations.
268
+ Default: 0 (no periodic verification). Set to >0 to periodically run BOTH
269
+ to check if calibration is still valid.
270
+ proxy_patience_usd: Stop proxying if cumulative net gain drops below this (USD).
271
+ Default: -100.0. Negative values allow some loss before stopping. Set to 0.0
272
+ to stop immediately if proxy becomes unprofitable.
273
+ """
274
+ hi_provider: str
275
+ hi_model: str
276
+ lo_provider: str
277
+ lo_model: str
278
+ n_min_hi: int = 5
279
+ r2_thresh: float = 0.5
280
+ r2_stop: float = 0.2
281
+ sigma_max: float = 1e6
282
+ sigma_stop: float = 1e9
283
+ verify_every: int = 0
284
+ proxy_patience_usd: float = -100.0
285
+
286
+
287
+ class AdaptiveCurriculumLevel(str, Enum):
288
+ """Preset levels for adaptive pooling curriculum."""
289
+ NONE = "NONE"
290
+ LOW = "LOW"
291
+ MODERATE = "MODERATE"
292
+ HIGH = "HIGH"
293
+
294
+
295
+ class AdaptivePoolConfig(ExtraModel):
296
+ """Configuration for adaptive pooling (dynamically adjusting evaluation pool size).
297
+
298
+ Reduces evaluation costs by focusing on the most informative examples while
299
+ maintaining optimization quality through informativeness-based selection.
300
+
301
+ The adaptive pool starts with a larger pool and gradually reduces to a minimum
302
+ size, selecting examples based on informativeness (variance across prompts).
303
+ Examples are divided into anchors (always evaluated) and exploration pool
304
+ (selected based on informativeness).
305
+
306
+ Attributes:
307
+ level: Preset level (NONE, LOW, MODERATE, HIGH). Default: LOW.
308
+ NONE disables adaptive pooling. Higher levels use smaller pools and
309
+ more aggressive annealing for greater cost savings.
310
+ anchor_size: Number of anchor examples that are always evaluated.
311
+ Default: 30. Anchors provide stable baseline for optimization.
312
+ Must be <= pool_min_size.
313
+ pool_init_size: Initial pool size at start of optimization.
314
+ Default: None (uses all available examples). Set to limit initial pool.
315
+ Must be >= pool_min_size if both are set.
316
+ pool_min_size: Target minimum pool size after annealing completes.
317
+ Default: None (uses anchor_size). Pool anneals linearly from
318
+ pool_init_size to pool_min_size between warmup_iters and anneal_stop_iter.
319
+ Must be >= anchor_size.
320
+ warmup_iters: Number of iterations before starting pool annealing.
321
+ Default: 5. During warmup, pool stays at pool_init_size to gather
322
+ informativeness data.
323
+ anneal_stop_iter: Iteration at which pool reaches pool_min_size.
324
+ Default: 20. Pool size decreases linearly from warmup_iters to this.
325
+ Must be > warmup_iters.
326
+ pool_update_period: Update informativeness scores every N generations.
327
+ Default: 3. More frequent updates (lower value) adapt faster but
328
+ require more computation.
329
+ min_evals_per_example: Minimum evaluations per example before computing
330
+ informativeness. Default: 3. Examples with fewer evals get info=0.0.
331
+ k_info_prompts: Number of top-performing prompts used for informativeness
332
+ computation. Default: 10. Only scores from these prompts are used to
333
+ compute variance-based informativeness.
334
+ info_buffer_factor: Buffer factor (0.0-1.0) for preserving informativeness
335
+ during pool reduction. Default: 0.9. Higher values preserve more
336
+ informativeness but allow less reduction. Lower values allow more
337
+ aggressive reduction but may lose informativeness.
338
+ info_epsilon: Small epsilon value added to prevent division by zero in
339
+ informativeness calculations. Default: 1e-6.
340
+ anchor_selection_method: Method for selecting anchor examples.
341
+ Default: "clustering". Options:
342
+ - "random": Random selection
343
+ - "clustering": Select diverse examples via clustering
344
+ exploration_strategy: Strategy for selecting exploration pool examples.
345
+ Default: "diversity". Options:
346
+ - "random": Random selection
347
+ - "diversity": Select diverse examples based on informativeness
348
+ heatup_reserve_pool: Optional list of seed IDs reserved for heat-up phase.
349
+ Default: None. If provided, these seeds are added back to pool during
350
+ heat-up phases to prevent overfitting to small pool.
351
+ heatup_trigger: When to trigger heat-up phase (adding seeds back to pool).
352
+ Default: "after_min_size". Options:
353
+ - "after_min_size": Trigger after pool reaches min_size
354
+ - "immediate": Trigger immediately
355
+ - "every_N_trials_after_min": Trigger periodically after min_size
356
+ heatup_size: Number of seeds to add during heat-up phase.
357
+ Default: 20. Seeds are selected from heatup_reserve_pool or reserve pool.
358
+ heatup_cooldown_trials: Number of trials to wait before cooling down
359
+ (removing heat-up seeds) after heat-up. Default: 50.
360
+ heatup_schedule: Whether heat-up repeats or happens once.
361
+ Default: "repeat". Options:
362
+ - "once": Heat-up happens once
363
+ - "repeat": Heat-up repeats after cooldown
364
+ """
365
+ level: AdaptiveCurriculumLevel = AdaptiveCurriculumLevel.LOW
366
+ anchor_size: int = 30
367
+ pool_init_size: int | None = None
368
+ pool_min_size: int | None = None
369
+ warmup_iters: int = 5
370
+ anneal_stop_iter: int = 20
371
+ pool_update_period: int = 3
372
+ min_evals_per_example: int = 3
373
+ k_info_prompts: int = 10
374
+ info_buffer_factor: float = 0.9
375
+ info_epsilon: float = 1e-6
376
+ anchor_selection_method: Literal["random", "clustering"] = "clustering"
377
+ exploration_strategy: Literal["random", "diversity"] = "diversity"
378
+ heatup_reserve_pool: list[int] | None = None
379
+ heatup_trigger: Literal["after_min_size", "immediate", "every_N_trials_after_min"] = "after_min_size"
380
+ heatup_size: int = 20
381
+ heatup_cooldown_trials: int = 50
382
+ heatup_schedule: Literal["repeat", "once"] = "repeat"
383
+
384
+ @property
385
+ def enabled(self) -> bool:
386
+ """Whether adaptive pooling is enabled (level != NONE)."""
387
+ return self.level != AdaptiveCurriculumLevel.NONE
388
+
389
+
390
+ class AdaptiveBatchLevel(str, Enum):
391
+ """Preset levels for adaptive batch curriculum (GEPA only)."""
392
+ NONE = "NONE"
393
+ LOW = "LOW"
394
+ MODERATE = "MODERATE"
395
+ HIGH = "HIGH"
396
+
397
+
398
+ class GEPAAdaptiveBatchConfig(ExtraModel):
399
+ """Configuration for adaptive batch evaluation (GEPA only).
400
+
401
+ Reduces evaluation costs by using smaller minibatches and subsampling validation.
402
+ """
403
+ level: AdaptiveBatchLevel = AdaptiveBatchLevel.MODERATE
404
+ reflection_minibatch_size: int = 3 # Train examples per reflection step
405
+ min_local_improvement: float = 0.0 # Threshold for accepting proposals
406
+ val_evaluation_mode: Literal["full", "subsample"] = "subsample" # Validation mode
407
+ val_subsample_size: int = 64 # Subsample size when mode="subsample"
408
+ candidate_selection_strategy: Literal["coverage", "random"] = "coverage"
409
+
410
+ @property
411
+ def enabled(self) -> bool:
412
+ """Whether adaptive batch is enabled (level != NONE)."""
413
+ return self.level != AdaptiveBatchLevel.NONE
414
+
415
+
416
+ # Default presets for adaptive pool (mirrors monorepo structure)
417
+ _ADAPTIVE_POOL_DEFAULTS: dict[AdaptiveCurriculumLevel, dict[str, Any]] = {
418
+ AdaptiveCurriculumLevel.NONE: {
419
+ "anchor_size": 0,
420
+ "pool_init_size": None,
421
+ "pool_min_size": None,
422
+ "warmup_iters": 999_999,
423
+ "anneal_stop_iter": 999_999,
424
+ "pool_update_period": 999_999,
425
+ "min_evals_per_example": 1,
426
+ "k_info_prompts": 0,
427
+ "info_buffer_factor": 1.0,
428
+ "info_epsilon": 1e-6,
429
+ "anchor_selection_method": "random",
430
+ "exploration_strategy": "random",
431
+ "heatup_reserve_pool": None,
432
+ "heatup_trigger": "after_min_size",
433
+ "heatup_size": 20,
434
+ "heatup_cooldown_trials": 50,
435
+ "heatup_schedule": "repeat",
436
+ },
437
+ AdaptiveCurriculumLevel.LOW: {
438
+ "anchor_size": 50,
439
+ "pool_init_size": 150,
440
+ "pool_min_size": 100,
441
+ "warmup_iters": 10,
442
+ "anneal_stop_iter": 30,
443
+ "pool_update_period": 2,
444
+ "min_evals_per_example": 5,
445
+ "k_info_prompts": 15,
446
+ "info_buffer_factor": 0.95,
447
+ "info_epsilon": 1e-6,
448
+ "anchor_selection_method": "clustering",
449
+ "exploration_strategy": "diversity",
450
+ "heatup_reserve_pool": None,
451
+ "heatup_trigger": "after_min_size",
452
+ "heatup_size": 20,
453
+ "heatup_cooldown_trials": 50,
454
+ "heatup_schedule": "repeat",
455
+ },
456
+ AdaptiveCurriculumLevel.MODERATE: {
457
+ "anchor_size": 30,
458
+ "pool_init_size": 100,
459
+ "pool_min_size": 50,
460
+ "warmup_iters": 5,
461
+ "anneal_stop_iter": 20,
462
+ "pool_update_period": 3,
463
+ "min_evals_per_example": 3,
464
+ "k_info_prompts": 10,
465
+ "info_buffer_factor": 0.9,
466
+ "info_epsilon": 1e-6,
467
+ "anchor_selection_method": "clustering",
468
+ "exploration_strategy": "diversity",
469
+ "heatup_reserve_pool": None,
470
+ "heatup_trigger": "after_min_size",
471
+ "heatup_size": 20,
472
+ "heatup_cooldown_trials": 50,
473
+ "heatup_schedule": "repeat",
474
+ },
475
+ AdaptiveCurriculumLevel.HIGH: {
476
+ "anchor_size": 20,
477
+ "pool_init_size": 60,
478
+ "pool_min_size": 30,
479
+ "warmup_iters": 3,
480
+ "anneal_stop_iter": 10,
481
+ "pool_update_period": 5,
482
+ "min_evals_per_example": 2,
483
+ "k_info_prompts": 5,
484
+ "info_buffer_factor": 0.8,
485
+ "info_epsilon": 1e-6,
486
+ "anchor_selection_method": "clustering",
487
+ "exploration_strategy": "diversity",
488
+ "heatup_reserve_pool": None,
489
+ "heatup_trigger": "after_min_size",
490
+ "heatup_size": 20,
491
+ "heatup_cooldown_trials": 50,
492
+ "heatup_schedule": "repeat",
493
+ },
494
+ }
495
+
496
+ # Default presets for adaptive batch (GEPA only)
497
+ _ADAPTIVE_BATCH_DEFAULTS: dict[AdaptiveBatchLevel, dict[str, Any]] = {
498
+ AdaptiveBatchLevel.NONE: {
499
+ "reflection_minibatch_size": 8,
500
+ "min_local_improvement": 0.0,
501
+ "val_evaluation_mode": "full",
502
+ "val_subsample_size": 64,
503
+ "candidate_selection_strategy": "random",
504
+ },
505
+ AdaptiveBatchLevel.LOW: {
506
+ "reflection_minibatch_size": 5,
507
+ "min_local_improvement": 0.0,
508
+ "val_evaluation_mode": "subsample",
509
+ "val_subsample_size": 80,
510
+ "candidate_selection_strategy": "coverage",
511
+ },
512
+ AdaptiveBatchLevel.MODERATE: {
513
+ "reflection_minibatch_size": 3,
514
+ "min_local_improvement": 0.0,
515
+ "val_evaluation_mode": "subsample",
516
+ "val_subsample_size": 64,
517
+ "candidate_selection_strategy": "coverage",
518
+ },
519
+ AdaptiveBatchLevel.HIGH: {
520
+ "reflection_minibatch_size": 2,
521
+ "min_local_improvement": 0.0,
522
+ "val_evaluation_mode": "subsample",
523
+ "val_subsample_size": 48,
524
+ "candidate_selection_strategy": "coverage",
525
+ },
526
+ }
527
+
528
+
529
+ def resolve_adaptive_pool_config(
530
+ *,
531
+ level: AdaptiveCurriculumLevel | str | None = None,
532
+ overrides: dict[str, Any] | None = None,
533
+ dev_pool_size: int | None = None,
534
+ ) -> AdaptivePoolConfig:
535
+ """Resolve adaptive pool config from level preset and overrides.
536
+
537
+ Args:
538
+ level: Preset level (NONE, LOW, MODERATE, HIGH). Defaults to LOW if None.
539
+ overrides: Dict of field overrides to apply on top of level defaults.
540
+ dev_pool_size: Optional dev pool size to cap pool_init_size if needed.
541
+
542
+ Returns:
543
+ AdaptivePoolConfig with resolved values.
544
+ """
545
+ # Normalize level
546
+ if level is None:
547
+ level = AdaptiveCurriculumLevel.LOW
548
+ elif isinstance(level, str):
549
+ try:
550
+ level = AdaptiveCurriculumLevel[level.strip().upper()]
551
+ except KeyError:
552
+ valid_levels = ", ".join(level_item.name for level_item in AdaptiveCurriculumLevel)
553
+ raise ValueError(f"Invalid adaptive pool level '{level}'. Must be one of: {valid_levels}") from None
554
+
555
+ # Get defaults for level
556
+ defaults = _ADAPTIVE_POOL_DEFAULTS[level].copy()
557
+
558
+ # Apply overrides
559
+ if overrides:
560
+ defaults.update(overrides)
561
+
562
+ # Handle pool_init_size and pool_min_size with dev_pool_size
563
+ pool_init_size = defaults.get("pool_init_size")
564
+ pool_min_size = defaults.get("pool_min_size")
565
+
566
+ if pool_init_size is None:
567
+ pool_init_size = dev_pool_size
568
+ if pool_min_size is None:
569
+ pool_min_size = dev_pool_size
570
+
571
+ # Cap pool_init_size if dev_pool_size is provided
572
+ if dev_pool_size is not None and pool_init_size is not None and pool_init_size > dev_pool_size:
573
+ pool_init_size = dev_pool_size
574
+
575
+ # Handle heatup_reserve_pool (can be list, None, or single value)
576
+ heatup_reserve = defaults.get("heatup_reserve_pool")
577
+ if heatup_reserve is not None and not isinstance(heatup_reserve, list | tuple):
578
+ # Convert single value or other types to list
579
+ heatup_reserve = [heatup_reserve] if heatup_reserve else None
580
+
581
+ # Create config with proper types
582
+ config = AdaptivePoolConfig(
583
+ level=level,
584
+ anchor_size=int(defaults["anchor_size"]),
585
+ pool_init_size=None if pool_init_size is None else int(pool_init_size),
586
+ pool_min_size=None if pool_min_size is None else int(pool_min_size),
587
+ warmup_iters=int(defaults["warmup_iters"]),
588
+ anneal_stop_iter=int(defaults["anneal_stop_iter"]),
589
+ pool_update_period=int(defaults["pool_update_period"]),
590
+ min_evals_per_example=int(defaults["min_evals_per_example"]),
591
+ k_info_prompts=int(defaults["k_info_prompts"]),
592
+ info_buffer_factor=float(defaults["info_buffer_factor"]),
593
+ info_epsilon=float(defaults["info_epsilon"]),
594
+ anchor_selection_method=defaults["anchor_selection_method"] if defaults["anchor_selection_method"] in ("random", "clustering") else "clustering",
595
+ exploration_strategy=defaults["exploration_strategy"] if defaults["exploration_strategy"] in ("random", "diversity") else "diversity",
596
+ heatup_reserve_pool=list(heatup_reserve) if heatup_reserve else None,
597
+ heatup_trigger=defaults.get("heatup_trigger", "after_min_size") if defaults.get("heatup_trigger", "after_min_size") in ("after_min_size", "immediate", "every_N_trials_after_min") else "after_min_size",
598
+ heatup_size=int(defaults.get("heatup_size", 20)),
599
+ heatup_cooldown_trials=int(defaults.get("heatup_cooldown_trials", 50)),
600
+ heatup_schedule=defaults.get("heatup_schedule", "repeat") if defaults.get("heatup_schedule", "repeat") in ("repeat", "once") else "repeat",
601
+ )
602
+
603
+ return config
604
+
605
+
606
+ def resolve_adaptive_batch_config(
607
+ *,
608
+ level: AdaptiveBatchLevel | str | None = None,
609
+ overrides: dict[str, Any] | None = None,
610
+ ) -> GEPAAdaptiveBatchConfig:
611
+ """Resolve adaptive batch config from level preset and overrides.
612
+
613
+ Args:
614
+ level: Preset level (NONE, LOW, MODERATE, HIGH). Defaults to MODERATE if None.
615
+ overrides: Dict of field overrides to apply on top of level defaults.
616
+
617
+ Returns:
618
+ GEPAAdaptiveBatchConfig with resolved values.
619
+ """
620
+ # Normalize level
621
+ if level is None:
622
+ level = AdaptiveBatchLevel.MODERATE
623
+ elif isinstance(level, str):
624
+ try:
625
+ level = AdaptiveBatchLevel[level.strip().upper()]
626
+ except KeyError:
627
+ valid_levels = ", ".join(level_item.name for level_item in AdaptiveBatchLevel)
628
+ raise ValueError(f"Invalid adaptive batch level '{level}'. Must be one of: {valid_levels}") from None
629
+
630
+ # Get defaults for level
631
+ defaults = _ADAPTIVE_BATCH_DEFAULTS[level].copy()
632
+
633
+ # Apply overrides
634
+ if overrides:
635
+ defaults.update(overrides)
636
+
637
+ # Create config with proper types
638
+ return GEPAAdaptiveBatchConfig(
639
+ level=level,
640
+ reflection_minibatch_size=int(defaults["reflection_minibatch_size"]),
641
+ min_local_improvement=float(defaults["min_local_improvement"]),
642
+ val_evaluation_mode=defaults["val_evaluation_mode"] if defaults["val_evaluation_mode"] in ("full", "subsample") else "full",
643
+ val_subsample_size=int(defaults["val_subsample_size"]),
644
+ candidate_selection_strategy=defaults["candidate_selection_strategy"] if defaults["candidate_selection_strategy"] in ("coverage", "random") else "coverage",
645
+ )
646
+
647
+
648
+ class MIPROConfig(ExtraModel):
649
+ """MIPRO-specific configuration.
650
+
651
+ MIPROv2 uses meta-learning with bootstrap phase, TPE optimization, and mini-batch evaluation
652
+ to efficiently optimize prompts with fewer evaluations than genetic algorithms.
653
+
654
+ Attributes:
655
+ proposer_effort: Effort level for proposer model selection. Controls which model
656
+ is used for generating prompt proposals. Default: "LOW".
657
+ Options:
658
+ - "LOW_CONTEXT": Uses gpt-oss-120b (Groq) with minimal context. Fastest/cheapest.
659
+ Required when proposer_output_tokens="RAPID".
660
+ - "LOW": Uses smaller/faster models (e.g., gpt-4o-mini). Good balance.
661
+ - "MEDIUM": Uses medium models (e.g., gpt-4o). Higher quality proposals.
662
+ - "HIGH": Uses best models (e.g., gpt-5). Highest quality but expensive.
663
+ proposer_output_tokens: Maximum output tokens allowed for proposer model.
664
+ Default: "FAST". Controls proposal length and cost.
665
+ Options:
666
+ - "RAPID": 3000 tokens max. Fastest/cheapest. Requires proposer_effort="LOW_CONTEXT"
667
+ and gpt-oss-120b model. Use for short, focused proposals.
668
+ - "FAST": 10000 tokens max. Good balance. Works with any effort level.
669
+ - "SLOW": 25000 tokens max. Allows longer proposals. Use for complex prompts.
670
+ min_bootstrap_demos: Minimum number of qualified bootstrap demonstrations required.
671
+ Default: None (no minimum). If set, bootstrap phase will fail early if fewer than
672
+ this many demos pass the few_shot_score_threshold. Use with strict_bootstrap=True
673
+ for fail-fast behavior.
674
+ strict_bootstrap: If True, fail immediately when bootstrap doesn't produce enough
675
+ qualified demos (< min_bootstrap_demos). Default: False. When False, optimization
676
+ continues but may produce suboptimal results with insufficient demos.
677
+ """
678
+ task_app_url: str | None = None
679
+ task_app_api_key: str | None = None
680
+ task_app_id: str | None = None
681
+ num_iterations: int = 20
682
+ num_evaluations_per_iteration: int = 5
683
+ batch_size: int = 32
684
+ max_concurrent: int = 20
685
+ env_name: str = "banking77"
686
+ env_config: dict[str, Any] | None = None
687
+ few_shot_score_threshold: float = 0.8
688
+ results_file: str | None = None
689
+ max_wall_clock_seconds: float | None = None
690
+ max_total_tokens: int | None = None
691
+ policy_config: dict[str, Any] | None = None
692
+ meta: MIPROMetaConfig | dict[str, Any] | None = None
693
+ modules: list[MIPROModuleConfig] | list[dict[str, Any]] | None = None
694
+ seeds: MIPROSeedConfig | dict[str, Any] | None = None
695
+
696
+ # Proposer configuration
697
+ proposer_effort: Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"] = "LOW"
698
+ proposer_output_tokens: Literal["RAPID", "FAST", "SLOW"] = "FAST"
699
+
700
+ # Token and budget configuration (mirrors GEPA pattern)
701
+ max_token_limit: int | None = None # Total tokens across all rollouts (policy + proposer)
702
+ max_spend_usd: float | None = None # Maximum spend in USD
703
+ token_counting_model: str = "gpt-4" # Model for token estimation (tiktoken)
704
+ enforce_token_limit: bool = True # Halt optimization if limit exceeded
705
+
706
+ # TPE configuration
707
+ tpe: dict[str, Any] | None = None
708
+
709
+ # Demo configuration
710
+ demo: dict[str, Any] | None = None
711
+
712
+ # Grounding configuration
713
+ grounding: dict[str, Any] | None = None
714
+
715
+ # Meta-update configuration
716
+ meta_update: dict[str, Any] | None = None
717
+
718
+ # Verifier configuration (shared with GEPA)
719
+ verifier: PromptLearningVerifierConfig | dict[str, Any] | None = None
720
+
721
+ # Proxy models configuration (optional, can also be at top-level)
722
+ proxy_models: ProxyModelsConfig | dict[str, Any] | None = None
723
+
724
+ # Adaptive pool configuration (optional)
725
+ adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None
726
+
727
+ # System spec configuration
728
+ spec_path: str | None = None # Path to system spec JSON file
729
+ spec_max_tokens: int = 5000 # Max tokens for spec context in meta-prompt
730
+ spec_include_examples: bool = True # Include examples from spec
731
+ spec_priority_threshold: int | None = None # Only include rules with priority >= threshold
732
+ # Custom metaprompt (optional)
733
+ metaprompt: str | None = None # Custom metaprompt text to include in instruction generation prompts
734
+
735
+ # Bootstrap seeds (for few-shot examples)
736
+ bootstrap_train_seeds: list[int] | None = None
737
+
738
+ # Online pool (for mini-batch evaluation)
739
+ online_pool: list[int] | None = None
740
+
741
+ # Test pool (held-out seeds)
742
+ test_pool: list[int] | None = None
743
+
744
+ # Reference pool (for dataset context in meta-prompt, must not overlap with train/test)
745
+ reference_pool: list[int] | None = None
746
+
747
+ # Strict bootstrap mode: minimum qualified demos required
748
+ # If fewer demos qualify (score >= few_shot_score_threshold), job fails early with clear error
749
+ # Default: 0 (no minimum - current behavior for backwards compatibility)
750
+ min_bootstrap_demos: int = 0
751
+
752
+ @model_validator(mode="before")
753
+ @classmethod
754
+ def _forbid_meta_model_config(cls, data: dict[str, Any]) -> dict[str, Any]:
755
+ """Forbid deprecated meta_model configuration fields.
756
+
757
+ Meta-model selection is now controlled by proposer_effort and proposer_output_tokens.
758
+ The backend automatically selects the model based on these settings.
759
+ """
760
+ if not isinstance(data, dict):
761
+ return data
762
+
763
+ deprecated_meta_fields = {
764
+ "meta_model": "Meta-model selection is now controlled by 'proposer_effort' (LOW_CONTEXT, LOW, MEDIUM, HIGH). Remove 'meta_model' from your config.",
765
+ "meta_model_provider": "Meta-model provider is now controlled by 'proposer_effort'. Remove 'meta_model_provider' from your config.",
766
+ "meta_model_inference_url": "Meta-model inference URL is now controlled by 'proposer_effort'. Remove 'meta_model_inference_url' from your config.",
767
+ "meta_model_temperature": "Meta-model temperature is now controlled by 'proposer_effort'. Remove 'meta_model_temperature' from your config.",
768
+ "meta_model_max_tokens": "Meta-model max_tokens is now controlled by 'proposer_effort' and 'proposer_output_tokens'. Remove 'meta_model_max_tokens' from your config.",
769
+ }
770
+
771
+ for field, message in deprecated_meta_fields.items():
772
+ if field in data and data[field] is not None:
773
+ raise ValueError(f"Deprecated field '{field}': {message}")
774
+
775
+ # Also check in nested meta section
776
+ if "meta" in data and isinstance(data["meta"], dict):
777
+ meta_data = data["meta"]
778
+ if meta_data.get("model") is not None:
779
+ raise ValueError("Deprecated field 'meta.model': Meta-model selection is now controlled by 'proposer_effort'. Remove [prompt_learning.mipro.meta] section.")
780
+ if meta_data.get("provider") is not None:
781
+ raise ValueError("Deprecated field 'meta.provider': Meta-model provider is now controlled by 'proposer_effort'. Remove [prompt_learning.mipro.meta] section.")
782
+
783
+ return data
784
+
785
+ @field_validator("bootstrap_train_seeds", "online_pool", "test_pool", "reference_pool", mode="before")
786
+ @classmethod
787
+ def _parse_mipro_seed_lists(cls, v: Any) -> list[int] | None:
788
+ """Parse MIPRO seed lists that can be either a list or range dict."""
789
+ return _parse_seeds(v)
790
+
791
+ @classmethod
792
+ def simple(
793
+ cls,
794
+ *,
795
+ task_app_url: str,
796
+ task_app_api_key: str,
797
+ env_name: str,
798
+ rollout_budget: int,
799
+ initial_prompt_messages: Sequence[Mapping[str, Any]] | Sequence[Any],
800
+ task_app_id: str | None = None,
801
+ bootstrap_seeds: list[int] | None = None,
802
+ online_seeds: list[int] | None = None,
803
+ test_seeds: list[int] | None = None,
804
+ reference_pool: list[int] | None = None,
805
+ env_config: dict[str, Any] | None = None,
806
+ num_iterations: int | None = None,
807
+ num_evaluations_per_iteration: int | None = None,
808
+ batch_size: int | None = None,
809
+ max_concurrent: int | None = None,
810
+ meta_preset: Literal["fast", "balanced", "high_quality"] = "balanced",
811
+ policy_model: str = "openai/gpt-oss-20b",
812
+ policy_provider: str = "groq",
813
+ policy_temperature: float = 1.0,
814
+ policy_max_completion_tokens: int = 512,
815
+ policy_name: str | None = None,
816
+ meta_model: str | None = None,
817
+ meta_provider: str | None = None,
818
+ meta_inference_url: str | None = None,
819
+ ) -> MIPROConfig:
820
+ """Convenience constructor for single-stage MIPRO tasks.
821
+
822
+ Automatically infers reasonable defaults for seeds, iterations, and module layout
823
+ based on the rollout budget. This keeps simple benchmarks (e.g., Iris) readable
824
+ while leaving the full constructor available for complex multi-stage pipelines.
825
+ """
826
+ if rollout_budget <= 0:
827
+ raise ValueError("rollout_budget must be positive for MIPROConfig.simple()")
828
+ normalized_messages = _normalize_messages(initial_prompt_messages)
829
+ if not normalized_messages:
830
+ raise ValueError("initial_prompt_messages must contain at least one message")
831
+
832
+ bootstrap = bootstrap_seeds or _auto_calculate_bootstrap_seeds(rollout_budget)
833
+ online = online_seeds or _auto_calculate_online_seeds(rollout_budget)
834
+ tests = test_seeds or []
835
+ reference = reference_pool or _auto_calculate_reference_pool(rollout_budget)
836
+
837
+ iterations = num_iterations or _auto_calculate_iterations(rollout_budget)
838
+ evals_per_iteration = (
839
+ num_evaluations_per_iteration
840
+ or _auto_calculate_evaluations_per_iteration(rollout_budget)
841
+ )
842
+ derived_batch_size = batch_size or max(1, min(len(online), 32))
843
+ derived_max_concurrent = max_concurrent or 10
844
+
845
+ baseline_instruction = _extract_baseline_instruction(normalized_messages)
846
+ meta_config = _create_meta_config_from_preset(meta_preset)
847
+ if meta_model:
848
+ meta_config.model = meta_model
849
+ if meta_provider:
850
+ meta_config.provider = meta_provider
851
+ if meta_inference_url is not None:
852
+ meta_config.inference_url = meta_inference_url
853
+
854
+ stage = MIPROStageConfig(
855
+ stage_id="default_stage_0",
856
+ baseline_instruction=baseline_instruction,
857
+ baseline_messages=normalized_messages,
858
+ )
859
+ module = MIPROModuleConfig(
860
+ module_id="default",
861
+ stages=[stage],
862
+ )
863
+ seeds = MIPROSeedConfig(
864
+ bootstrap=bootstrap,
865
+ online=online,
866
+ test=tests,
867
+ reference=reference,
868
+ )
869
+ policy_config = {
870
+ "model": policy_model,
871
+ "provider": policy_provider,
872
+ "temperature": policy_temperature,
873
+ "max_completion_tokens": policy_max_completion_tokens,
874
+ }
875
+ if policy_name:
876
+ policy_config["policy_name"] = policy_name
877
+
878
+ return cls(
879
+ task_app_url=task_app_url,
880
+ task_app_api_key=task_app_api_key,
881
+ task_app_id=task_app_id or env_name,
882
+ env_name=env_name,
883
+ env_config=env_config,
884
+ seeds=seeds,
885
+ num_iterations=iterations,
886
+ num_evaluations_per_iteration=evals_per_iteration,
887
+ batch_size=derived_batch_size,
888
+ max_concurrent=derived_max_concurrent,
889
+ policy_config=policy_config,
890
+ meta=meta_config,
891
+ modules=[module],
892
+ )
893
+
894
+
895
+ def _auto_calculate_bootstrap_seeds(rollout_budget: int) -> list[int]:
896
+ """Auto-calculate bootstrap seeds from rollout budget."""
897
+ count = max(3, min(10, max(rollout_budget // 10, 1)))
898
+ return list(range(count))
899
+
900
+
901
+ def _auto_calculate_online_seeds(rollout_budget: int) -> list[int]:
902
+ """Auto-calculate online pool seeds from rollout budget."""
903
+ count = max(5, min(50, max(rollout_budget // 3, 1)))
904
+ return list(range(10, 10 + count))
905
+
906
+
907
+ def _auto_calculate_reference_pool(rollout_budget: int) -> list[int]:
908
+ """Auto-calculate reference pool seeds from rollout budget."""
909
+ count = max(5, min(30, max(rollout_budget // 5, 1)))
910
+ return list(range(20, 20 + count))
911
+
912
+
913
+ def _auto_calculate_iterations(rollout_budget: int) -> int:
914
+ """Auto-calculate number of optimization iterations."""
915
+ online_pool_size = max(5, min(50, max(rollout_budget // 3, 1)))
916
+ evals_per_iteration = max(3, min(10, max(rollout_budget // max(online_pool_size * 2, 1), 1)))
917
+ iterations = max(5, min(20, max(rollout_budget // max(online_pool_size * evals_per_iteration, 1), 1)))
918
+ return iterations
919
+
920
+
921
+ def _auto_calculate_evaluations_per_iteration(rollout_budget: int) -> int:
922
+ """Auto-calculate number of evaluations per iteration."""
923
+ online_pool_size = max(5, min(50, max(rollout_budget // 3, 1)))
924
+ iterations = max(5, min(20, max(rollout_budget // max(online_pool_size * 5, 1), 1)))
925
+ evals_per_iteration = max(3, min(10, max(rollout_budget // max(online_pool_size * iterations, 1), 1)))
926
+ return evals_per_iteration
927
+
928
+
929
+ def _coerce_message_mapping(message: Mapping[str, Any] | Any) -> dict[str, Any]:
930
+ """Convert message objects or dicts into a mutable dict."""
931
+ if isinstance(message, Mapping):
932
+ return dict(message)
933
+ if hasattr(message, "model_dump"):
934
+ try:
935
+ data = message.model_dump()
936
+ if isinstance(data, dict):
937
+ return data
938
+ except Exception: # pragma: no cover - defensive
939
+ pass
940
+ if hasattr(message, "__dict__"):
941
+ try:
942
+ return {
943
+ key: value
944
+ for key, value in vars(message).items()
945
+ if not key.startswith("_")
946
+ }
947
+ except Exception: # pragma: no cover - defensive
948
+ return {}
949
+ return {}
950
+
951
+
952
+ def _extract_baseline_instruction(messages: Sequence[Mapping[str, str]] | Sequence[Any]) -> str:
953
+ """Extract the baseline instruction string from message templates."""
954
+ for raw in messages:
955
+ msg = _coerce_message_mapping(raw)
956
+ if msg.get("role", "user") == "system":
957
+ text = (msg.get("content") or msg.get("pattern") or "").strip()
958
+ if text:
959
+ return text
960
+ for raw in messages:
961
+ msg = _coerce_message_mapping(raw)
962
+ if msg.get("role", "user") == "user":
963
+ text = (msg.get("content") or msg.get("pattern") or "").strip()
964
+ if text:
965
+ return text
966
+ return "Complete the task."
967
+
968
+
969
+ def _normalize_messages(messages: Sequence[Mapping[str, str]] | Sequence[Any]) -> list[dict[str, str]]:
970
+ """Normalize message dictionaries so downstream tools can rely on `content`."""
971
+ normalized: list[dict[str, str]] = []
972
+ for raw in messages:
973
+ msg = _coerce_message_mapping(raw)
974
+ role = msg.get("role", "user") or "user"
975
+ content = msg.get("content") or msg.get("pattern") or ""
976
+ normalized.append({"role": str(role), "content": str(content)})
977
+ return normalized
978
+
979
+
980
+ def _create_meta_config_from_preset(preset: str) -> MIPROMetaConfig:
981
+ """Create a meta config preset (fast/balanced/high_quality)."""
982
+ preset_key = preset.lower().strip()
983
+ presets: dict[str, MIPROMetaConfig] = {
984
+ "fast": MIPROMetaConfig(
985
+ model="gpt-4o-mini",
986
+ provider="openai",
987
+ temperature=0.7,
988
+ max_tokens=512,
989
+ inference_url=None,
990
+ ),
991
+ "balanced": MIPROMetaConfig(
992
+ model="gpt-4o-mini",
993
+ provider="openai",
994
+ temperature=0.8,
995
+ max_tokens=1024,
996
+ inference_url=None,
997
+ ),
998
+ "high_quality": MIPROMetaConfig(
999
+ model="gpt-4o",
1000
+ provider="openai",
1001
+ temperature=0.9,
1002
+ max_tokens=2048,
1003
+ inference_url=None,
1004
+ ),
1005
+ }
1006
+ return presets.get(preset_key, presets["balanced"])
1007
+
1008
+
1009
+ # GEPA nested configs (mirroring RL structure)
1010
+ class GEPARolloutConfig(ExtraModel):
1011
+ """GEPA rollout configuration (mirrors RL [rollout] section)."""
1012
+ budget: int | None = None # Total rollout budget
1013
+ max_concurrent: int = 20 # Maximum concurrent rollouts
1014
+ minibatch_size: int = 8 # Minibatch size for evaluation
1015
+
1016
+
1017
+ class GEPAEvaluationConfig(ExtraModel):
1018
+ """GEPA evaluation configuration (mirrors RL [evaluation] section)."""
1019
+ seeds: list[int] | None = None # Evaluation seeds (training set)
1020
+ validation_seeds: list[int] | None = None # Validation seeds (held-out)
1021
+ test_pool: list[int] | None = None # Test pool (final evaluation)
1022
+ validation_pool: str | None = None # Pool name for validation (e.g., "validation")
1023
+ validation_top_k: int | None = None # Top-K prompts to validate
1024
+
1025
+ @field_validator("seeds", "validation_seeds", "test_pool", mode="before")
1026
+ @classmethod
1027
+ def _parse_seed_lists(cls, v: Any) -> list[int] | None:
1028
+ """Parse seed lists that can be either a list or range dict."""
1029
+ return _parse_seeds(v)
1030
+
1031
+
1032
+ class GEPAMutationConfig(ExtraModel):
1033
+ """GEPA mutation configuration.
1034
+
1035
+ NOTE: Mutation model selection is controlled by proposer_effort, NOT llm_model.
1036
+ The llm_model/llm_provider fields are deprecated and should not be used.
1037
+ """
1038
+ rate: float = 0.3 # Mutation rate
1039
+ llm_model: str | None = None # DEPRECATED: Use proposer_effort instead
1040
+ llm_provider: str | None = None # DEPRECATED: Use proposer_effort instead
1041
+ llm_inference_url: str | None = None # DEPRECATED: Not used
1042
+ prompt: str | None = None # Custom mutation prompt
1043
+
1044
+ @model_validator(mode="before")
1045
+ @classmethod
1046
+ def _forbid_mutation_llm_config(cls, data: dict[str, Any]) -> dict[str, Any]:
1047
+ """Forbid deprecated mutation LLM configuration fields.
1048
+
1049
+ Mutation model selection is now controlled by proposer_effort at the gepa level.
1050
+ """
1051
+ if not isinstance(data, dict):
1052
+ return data
1053
+
1054
+ deprecated_mutation_fields = {
1055
+ "llm_model": "Mutation model selection is now controlled by 'proposer_effort' (LOW_CONTEXT, LOW, MEDIUM, HIGH) at [prompt_learning.gepa] level. Remove 'llm_model' from [prompt_learning.gepa.mutation].",
1056
+ "llm_provider": "Mutation provider is now controlled by 'proposer_effort'. Remove 'llm_provider' from [prompt_learning.gepa.mutation].",
1057
+ "llm_inference_url": "Mutation inference URL is not used. Remove 'llm_inference_url' from [prompt_learning.gepa.mutation].",
1058
+ }
1059
+
1060
+ for field, message in deprecated_mutation_fields.items():
1061
+ if field in data and data[field] is not None:
1062
+ raise ValueError(f"Deprecated field '{field}': {message}")
1063
+
1064
+ return data
1065
+
1066
+
1067
+ class GEPAPopulationConfig(ExtraModel):
1068
+ """GEPA population configuration (evolution parameters)."""
1069
+ initial_size: int = 20 # Initial population size
1070
+ num_generations: int = 10 # Number of generations
1071
+ children_per_generation: int = 5 # Children generated per generation
1072
+ crossover_rate: float = 0.5 # Crossover rate
1073
+ selection_pressure: float = 1.0 # Pareto selection pressure
1074
+ patience_generations: int = 3 # Early stopping patience
1075
+
1076
+
1077
+ class GEPAArchiveConfig(ExtraModel):
1078
+ """GEPA archive configuration (Pareto archive settings)."""
1079
+ size: int = 64 # Archive size
1080
+ pareto_set_size: int = 64 # Pareto set size
1081
+ pareto_eps: float = 1e-6 # Pareto epsilon
1082
+ feedback_fraction: float = 0.5 # Fraction of archive for feedback
1083
+
1084
+
1085
+ class GEPATokenConfig(ExtraModel):
1086
+ """GEPA token and budget configuration."""
1087
+ max_limit: int | None = None # Maximum tokens allowed in prompt
1088
+ counting_model: str = "gpt-4" # Model for token counting
1089
+ enforce_pattern_limit: bool = True # Enforce token limit on patterns
1090
+ max_spend_usd: float | None = None # Maximum spend in USD
1091
+
1092
+
1093
+ class GEPAModuleConfig(ExtraModel):
1094
+ """Configuration for a single GEPA pipeline module/stage (instruction-only).
1095
+
1096
+ Each module MUST have its own policy configuration. The policy field is required
1097
+ and must include 'model' and 'provider' fields.
1098
+ """
1099
+ module_id: str
1100
+ max_instruction_slots: int = 3
1101
+ allowed_tools: list[str] | None = None
1102
+ max_tokens: int | None = None
1103
+ policy: PromptLearningPolicyConfig | dict[str, Any] = Field(
1104
+ ...,
1105
+ description="Required per-module policy configuration. Must include 'model' and 'provider' fields."
1106
+ )
1107
+
1108
+ @field_validator("module_id")
1109
+ @classmethod
1110
+ def _validate_module_id(cls, v: str) -> str:
1111
+ v = v.strip()
1112
+ if not v:
1113
+ raise ValueError("module_id cannot be empty")
1114
+ return v
1115
+
1116
+ @field_validator("max_instruction_slots")
1117
+ @classmethod
1118
+ def _validate_slots(cls, v: int) -> int:
1119
+ if v < 1:
1120
+ raise ValueError("max_instruction_slots must be >= 1")
1121
+ return v
1122
+
1123
+ @field_validator("policy", mode="before")
1124
+ @classmethod
1125
+ def _validate_policy(cls, v: Any) -> dict[str, Any]:
1126
+ """Validate that policy is a dict with required fields."""
1127
+ if v is None:
1128
+ raise ValueError("policy is required for each module/stage")
1129
+ if isinstance(v, dict):
1130
+ if not v.get("model"):
1131
+ raise ValueError("policy must include 'model' field")
1132
+ if not v.get("provider"):
1133
+ raise ValueError("policy must include 'provider' field")
1134
+ return v
1135
+ # If it's already a PromptLearningPolicyConfig, it will be validated by Pydantic
1136
+ return v
1137
+
1138
+
1139
+ class GEPAConfig(ExtraModel):
1140
+ """GEPA-specific configuration with nested subsections.
1141
+
1142
+ GEPA (Genetic Evolution of Prompt Architectures) uses evolutionary algorithms
1143
+ with LLM-guided mutations to optimize prompts through population-based search.
1144
+
1145
+ Attributes:
1146
+ proposer_type: Type of proposer to use for generating mutations.
1147
+ Default: "dspy". Options: "dspy" (DSPy-style proposer) or "spec" (spec-based).
1148
+ proposer_effort: Effort level for proposer model selection. Controls which model
1149
+ is used for generating prompt mutations. Default: "LOW".
1150
+ Options:
1151
+ - "LOW_CONTEXT": Uses gpt-oss-120b (Groq) with minimal context. Fastest/cheapest.
1152
+ Required when proposer_output_tokens="RAPID".
1153
+ - "LOW": Uses smaller/faster models (e.g., gpt-4o-mini). Good balance.
1154
+ - "MEDIUM": Uses medium models (e.g., gpt-4o). Higher quality mutations.
1155
+ - "HIGH": Uses best models (e.g., gpt-5). Highest quality but expensive.
1156
+ proposer_output_tokens: Maximum output tokens allowed for proposer model.
1157
+ Default: "FAST". Controls mutation length and cost.
1158
+ Options:
1159
+ - "RAPID": 3000 tokens max. Fastest/cheapest. Requires proposer_effort="LOW_CONTEXT"
1160
+ and gpt-oss-120b model. Use for short, focused mutations.
1161
+ - "FAST": 10000 tokens max. Good balance. Works with any effort level.
1162
+ - "SLOW": 25000 tokens max. Allows longer mutations. Use for complex prompts.
1163
+ metaprompt: Optional custom metaprompt text to include in mutation prompts.
1164
+ Default: None. If provided, replaces default metaprompt template.
1165
+ """
1166
+ # Top-level fields (for backwards compatibility)
1167
+ env_name: str = "banking77"
1168
+ env_config: dict[str, Any] | None = None
1169
+ rng_seed: int | None = None
1170
+ proposer_type: str = "dspy"
1171
+ proposer_effort: Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"] = "LOW"
1172
+ proposer_output_tokens: Literal["RAPID", "FAST", "SLOW"] = "FAST"
1173
+ # Custom metaprompt (optional)
1174
+ metaprompt: str | None = None
1175
+
1176
+ # Multi-stage pipeline support
1177
+ modules: list[GEPAModuleConfig] | None = None
1178
+
1179
+ # Nested subsections (preferred, mirrors RL structure)
1180
+ rollout: GEPARolloutConfig | None = None
1181
+ evaluation: GEPAEvaluationConfig | None = None
1182
+ mutation: GEPAMutationConfig | None = None
1183
+ population: GEPAPopulationConfig | None = None
1184
+ archive: GEPAArchiveConfig | None = None
1185
+ token: GEPATokenConfig | None = None
1186
+ verifier: PromptLearningVerifierConfig | dict[str, Any] | None = None
1187
+ proxy_models: ProxyModelsConfig | dict[str, Any] | None = None # Proxy models config (can be at top-level or gepa-specific)
1188
+ adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None # Adaptive pooling config
1189
+ adaptive_batch: GEPAAdaptiveBatchConfig | dict[str, Any] | None = None # Adaptive batch config (GEPA only)
1190
+
1191
+ # Backwards compatibility: flat fields (DEPRECATED - DO NOT USE)
1192
+ # These are kept for backwards compatibility with _get_* methods but should not be used directly
1193
+ rollout_budget: int | None = None
1194
+ max_concurrent_rollouts: int | None = None
1195
+ minibatch_size: int | None = None
1196
+ evaluation_seeds: list[int] | None = None
1197
+ validation_seeds: list[int] | None = None
1198
+ test_pool: list[int] | None = None
1199
+ validation_pool: str | None = None
1200
+ validation_top_k: int | None = None
1201
+ mutation_rate: float | None = None
1202
+ mutation_llm_model: str | None = None
1203
+ mutation_llm_provider: str | None = None
1204
+ mutation_llm_inference_url: str | None = None
1205
+ mutation_prompt: str | None = None
1206
+ initial_population_size: int | None = None
1207
+ num_generations: int | None = None
1208
+ children_per_generation: int | None = None
1209
+ crossover_rate: float | None = None
1210
+ selection_pressure: float | None = None
1211
+ patience_generations: int | None = None
1212
+ archive_size: int | None = None
1213
+ pareto_set_size: int | None = None
1214
+ pareto_eps: float | None = None
1215
+ feedback_fraction: float | None = None
1216
+ max_token_limit: int | None = None
1217
+ token_counting_model: str | None = None
1218
+ enforce_pattern_token_limit: bool | None = None
1219
+ max_spend_usd: float | None = None
1220
+
1221
+ @model_validator(mode="before")
1222
+ @classmethod
1223
+ def _check_flat_format_deprecated(cls, data: dict[str, Any]) -> dict[str, Any]:
1224
+ """Forbid deprecated flat GEPA format fields.
1225
+
1226
+ Users must use nested format:
1227
+ - gepa.rollout.budget instead of gepa.rollout_budget
1228
+ - gepa.evaluation.seeds instead of gepa.evaluation_seeds
1229
+ - etc.
1230
+ """
1231
+ if not isinstance(data, dict):
1232
+ return data
1233
+
1234
+ flat_fields_map = {
1235
+ "rollout_budget": "Use [prompt_learning.gepa.rollout] section with 'budget' field instead.",
1236
+ "max_concurrent_rollouts": "Use [prompt_learning.gepa.rollout] section with 'max_concurrent' field instead.",
1237
+ "minibatch_size": "Use [prompt_learning.gepa.rollout] section with 'minibatch_size' field instead.",
1238
+ "evaluation_seeds": "Use [prompt_learning.gepa.evaluation] section with 'seeds' field instead.",
1239
+ "validation_seeds": "Use [prompt_learning.gepa.evaluation] section with 'validation_seeds' field instead.",
1240
+ "test_pool": "Use [prompt_learning.gepa.evaluation] section with 'test_pool' field instead.",
1241
+ "validation_pool": "Use [prompt_learning.gepa.evaluation] section with 'validation_pool' field instead.",
1242
+ "validation_top_k": "Use [prompt_learning.gepa.evaluation] section with 'validation_top_k' field instead.",
1243
+ "mutation_rate": "Use [prompt_learning.gepa.mutation] section with 'rate' field instead.",
1244
+ "mutation_llm_model": "Use [prompt_learning.gepa.mutation] section with 'llm_model' field instead.",
1245
+ "mutation_llm_provider": "Use [prompt_learning.gepa.mutation] section with 'llm_provider' field instead.",
1246
+ "mutation_llm_inference_url": "Use [prompt_learning.gepa.mutation] section with 'llm_inference_url' field instead.",
1247
+ "mutation_prompt": "Use [prompt_learning.gepa.mutation] section with 'prompt' field instead.",
1248
+ "initial_population_size": "Use [prompt_learning.gepa.population] section with 'initial_size' field instead.",
1249
+ "num_generations": "Use [prompt_learning.gepa.population] section with 'num_generations' field instead.",
1250
+ "children_per_generation": "Use [prompt_learning.gepa.population] section with 'children_per_generation' field instead.",
1251
+ "crossover_rate": "Use [prompt_learning.gepa.population] section with 'crossover_rate' field instead.",
1252
+ "selection_pressure": "Use [prompt_learning.gepa.population] section with 'selection_pressure' field instead.",
1253
+ "patience_generations": "Use [prompt_learning.gepa.population] section with 'patience_generations' field instead.",
1254
+ "archive_size": "Use [prompt_learning.gepa.archive] section with 'size' field instead.",
1255
+ "pareto_set_size": "Use [prompt_learning.gepa.archive] section with 'pareto_set_size' field instead.",
1256
+ "pareto_eps": "Use [prompt_learning.gepa.archive] section with 'pareto_eps' field instead.",
1257
+ "feedback_fraction": "Use [prompt_learning.gepa.archive] section with 'feedback_fraction' field instead.",
1258
+ "max_token_limit": "Use [prompt_learning.gepa.token] section with 'max_limit' field instead.",
1259
+ "token_counting_model": "Use [prompt_learning.gepa.token] section with 'counting_model' field instead.",
1260
+ "enforce_pattern_token_limit": "Use [prompt_learning.gepa.token] section with 'enforce_pattern_limit' field instead.",
1261
+ "max_spend_usd": "Use [prompt_learning.gepa.token] section with 'max_spend_usd' field instead.",
1262
+ }
1263
+
1264
+ for field, message in flat_fields_map.items():
1265
+ if field in data and data[field] is not None:
1266
+ raise ValueError(f"Deprecated flat GEPA format field '{field}': {message}")
1267
+
1268
+ return data
1269
+
1270
+ def _get_rollout_budget(self) -> int | None:
1271
+ """Get rollout budget from nested or flat structure."""
1272
+ if self.rollout and self.rollout.budget is not None:
1273
+ return self.rollout.budget
1274
+ return self.rollout_budget
1275
+
1276
+ def _get_max_concurrent_rollouts(self) -> int:
1277
+ """Get max concurrent rollouts from nested or flat structure."""
1278
+ if self.rollout and self.rollout.max_concurrent is not None:
1279
+ return self.rollout.max_concurrent
1280
+ return self.max_concurrent_rollouts or 20
1281
+
1282
+ def _get_minibatch_size(self) -> int:
1283
+ """Get minibatch size from nested or flat structure."""
1284
+ if self.rollout and self.rollout.minibatch_size is not None:
1285
+ return self.rollout.minibatch_size
1286
+ return self.minibatch_size or 8
1287
+
1288
+ def _get_evaluation_seeds(self) -> list[int] | None:
1289
+ """Get evaluation seeds from nested or flat structure."""
1290
+ if self.evaluation and self.evaluation.seeds is not None:
1291
+ return self.evaluation.seeds
1292
+ return self.evaluation_seeds
1293
+
1294
+ def _get_validation_seeds(self) -> list[int] | None:
1295
+ """Get validation seeds from nested or flat structure."""
1296
+ if self.evaluation and self.evaluation.validation_seeds is not None:
1297
+ return self.evaluation.validation_seeds
1298
+ return self.validation_seeds
1299
+
1300
+ def _get_test_pool(self) -> list[int] | None:
1301
+ """Get test pool from nested or flat structure."""
1302
+ if self.evaluation and self.evaluation.test_pool is not None:
1303
+ return self.evaluation.test_pool
1304
+ return self.test_pool
1305
+
1306
+ def _get_mutation_rate(self) -> float:
1307
+ """Get mutation rate from nested or flat structure."""
1308
+ if self.mutation and self.mutation.rate is not None:
1309
+ return self.mutation.rate
1310
+ return self.mutation_rate or 0.3
1311
+
1312
+ def _get_mutation_llm_model(self) -> str | None:
1313
+ """Get mutation LLM model from nested or flat structure."""
1314
+ if self.mutation and self.mutation.llm_model is not None:
1315
+ return self.mutation.llm_model
1316
+ return self.mutation_llm_model
1317
+
1318
+ def _get_mutation_llm_provider(self) -> str:
1319
+ """Get mutation LLM provider from nested or flat structure."""
1320
+ if self.mutation and self.mutation.llm_provider is not None:
1321
+ return self.mutation.llm_provider
1322
+ return self.mutation_llm_provider or "groq"
1323
+
1324
+ def _get_mutation_llm_inference_url(self) -> str | None:
1325
+ """Get mutation LLM inference URL from nested or flat structure."""
1326
+ if self.mutation and self.mutation.llm_inference_url is not None:
1327
+ return self.mutation.llm_inference_url
1328
+ return self.mutation_llm_inference_url
1329
+
1330
+ def _get_mutation_prompt(self) -> str | None:
1331
+ """Get mutation prompt from nested or flat structure."""
1332
+ if self.mutation and self.mutation.prompt is not None:
1333
+ return self.mutation.prompt
1334
+ return self.mutation_prompt
1335
+
1336
+ def _get_initial_population_size(self) -> int:
1337
+ """Get initial population size from nested or flat structure."""
1338
+ if self.population and self.population.initial_size is not None:
1339
+ return self.population.initial_size
1340
+ return self.initial_population_size or 20
1341
+
1342
+ def _get_num_generations(self) -> int:
1343
+ """Get num generations from nested or flat structure."""
1344
+ if self.population and self.population.num_generations is not None:
1345
+ return self.population.num_generations
1346
+ return self.num_generations or 10
1347
+
1348
+ def _get_children_per_generation(self) -> int:
1349
+ """Get children per generation from nested or flat structure."""
1350
+ if self.population and self.population.children_per_generation is not None:
1351
+ return self.population.children_per_generation
1352
+ return self.children_per_generation or 5
1353
+
1354
+ def _get_crossover_rate(self) -> float:
1355
+ """Get crossover rate from nested or flat structure."""
1356
+ if self.population and self.population.crossover_rate is not None:
1357
+ return self.population.crossover_rate
1358
+ return self.crossover_rate or 0.5
1359
+
1360
+ def _get_selection_pressure(self) -> float:
1361
+ """Get selection pressure from nested or flat structure."""
1362
+ if self.population and self.population.selection_pressure is not None:
1363
+ return self.population.selection_pressure
1364
+ return self.selection_pressure or 1.0
1365
+
1366
+ def _get_patience_generations(self) -> int:
1367
+ """Get patience generations from nested or flat structure."""
1368
+ if self.population and self.population.patience_generations is not None:
1369
+ return self.population.patience_generations
1370
+ return self.patience_generations or 3
1371
+
1372
+ def _get_archive_size(self) -> int:
1373
+ """Get archive size from nested or flat structure."""
1374
+ if self.archive and self.archive.size is not None:
1375
+ return self.archive.size
1376
+ return self.archive_size or 64
1377
+
1378
+ def _get_pareto_set_size(self) -> int:
1379
+ """Get pareto set size from nested or flat structure."""
1380
+ if self.archive and self.archive.pareto_set_size is not None:
1381
+ return self.archive.pareto_set_size
1382
+ return self.pareto_set_size or 64
1383
+
1384
+ def _get_pareto_eps(self) -> float:
1385
+ """Get pareto eps from nested or flat structure."""
1386
+ if self.archive and self.archive.pareto_eps is not None:
1387
+ return self.archive.pareto_eps
1388
+ return self.pareto_eps or 1e-6
1389
+
1390
+ def _get_feedback_fraction(self) -> float:
1391
+ """Get feedback fraction from nested or flat structure."""
1392
+ if self.archive and self.archive.feedback_fraction is not None:
1393
+ return self.archive.feedback_fraction
1394
+ return self.feedback_fraction or 0.5
1395
+
1396
+ def _get_max_token_limit(self) -> int | None:
1397
+ """Get max token limit from nested or flat structure."""
1398
+ if self.token and self.token.max_limit is not None:
1399
+ return self.token.max_limit
1400
+ return self.max_token_limit
1401
+
1402
+ def _get_token_counting_model(self) -> str:
1403
+ """Get token counting model from nested or flat structure."""
1404
+ if self.token and self.token.counting_model is not None:
1405
+ return self.token.counting_model
1406
+ return self.token_counting_model or "gpt-4"
1407
+
1408
+ def _get_enforce_pattern_token_limit(self) -> bool:
1409
+ """Get enforce pattern token limit from nested or flat structure."""
1410
+ if self.token and self.token.enforce_pattern_limit is not None:
1411
+ return self.token.enforce_pattern_limit
1412
+ return self.enforce_pattern_token_limit if self.enforce_pattern_token_limit is not None else True
1413
+
1414
+ def _get_max_spend_usd(self) -> float | None:
1415
+ """Get max spend USD from nested or flat structure."""
1416
+ if self.token and self.token.max_spend_usd is not None:
1417
+ return self.token.max_spend_usd
1418
+ return self.max_spend_usd
1419
+
1420
+ @classmethod
1421
+ def from_mapping(cls, data: Mapping[str, Any]) -> GEPAConfig:
1422
+ """Load GEPA config from dict/TOML, handling both nested and flat structures."""
1423
+ # Check for nested structure first
1424
+ nested_data = {}
1425
+ flat_data = {}
1426
+
1427
+ for key, value in data.items():
1428
+ if key in ("rollout", "evaluation", "mutation", "population", "archive", "token", "modules", "proxy_models", "adaptive_pool", "adaptive_batch", "verifier"):
1429
+ nested_data[key] = value
1430
+ else:
1431
+ flat_data[key] = value
1432
+
1433
+ # If we have nested data, create nested configs
1434
+ if nested_data:
1435
+ if "rollout" in nested_data:
1436
+ nested_data["rollout"] = GEPARolloutConfig.model_validate(nested_data["rollout"])
1437
+ if "evaluation" in nested_data:
1438
+ nested_data["evaluation"] = GEPAEvaluationConfig.model_validate(nested_data["evaluation"])
1439
+ if "mutation" in nested_data:
1440
+ nested_data["mutation"] = GEPAMutationConfig.model_validate(nested_data["mutation"])
1441
+ if "population" in nested_data:
1442
+ nested_data["population"] = GEPAPopulationConfig.model_validate(nested_data["population"])
1443
+ if "archive" in nested_data:
1444
+ nested_data["archive"] = GEPAArchiveConfig.model_validate(nested_data["archive"])
1445
+ if "token" in nested_data:
1446
+ nested_data["token"] = GEPATokenConfig.model_validate(nested_data["token"])
1447
+ if "modules" in nested_data:
1448
+ modules_data = nested_data["modules"]
1449
+ if isinstance(modules_data, list):
1450
+ nested_data["modules"] = [
1451
+ GEPAModuleConfig.model_validate(m) if isinstance(m, dict) else m
1452
+ for m in modules_data
1453
+ ]
1454
+ # Handle proxy_models in gepa config (only if specified, defaults to None)
1455
+ if "proxy_models" in nested_data and isinstance(nested_data["proxy_models"], dict):
1456
+ nested_data["proxy_models"] = ProxyModelsConfig.model_validate(nested_data["proxy_models"])
1457
+ # If proxy_models not specified, leave as None (defaults to disabled)
1458
+
1459
+ # Handle adaptive_pool in gepa config (only if specified, defaults to None)
1460
+ if "adaptive_pool" in nested_data and isinstance(nested_data["adaptive_pool"], dict):
1461
+ # Resolve adaptive pool config with level and overrides
1462
+ adaptive_pool_data = nested_data["adaptive_pool"]
1463
+ level = adaptive_pool_data.get("level")
1464
+ # If level not specified, default to LOW (conservative SDK default)
1465
+ overrides = {k: v for k, v in adaptive_pool_data.items() if k != "level"}
1466
+ # Get dev_pool_size from evaluation.seeds if available
1467
+ dev_pool_size = None
1468
+ if "evaluation" in nested_data:
1469
+ eval_config = nested_data["evaluation"]
1470
+ # Handle both dict and Pydantic model (GEPAEvaluationConfig)
1471
+ if isinstance(eval_config, dict):
1472
+ eval_seeds = eval_config.get("seeds")
1473
+ else:
1474
+ # Pydantic model - use attribute access
1475
+ eval_seeds = getattr(eval_config, "seeds", None)
1476
+ if isinstance(eval_seeds, list):
1477
+ dev_pool_size = len(eval_seeds)
1478
+ nested_data["adaptive_pool"] = resolve_adaptive_pool_config(
1479
+ level=level, # Will default to LOW if None (via resolve_adaptive_pool_config)
1480
+ overrides=overrides if overrides else None,
1481
+ dev_pool_size=dev_pool_size,
1482
+ )
1483
+ # If adaptive_pool not specified, leave as None (defaults to disabled)
1484
+ if "adaptive_batch" in nested_data and isinstance(nested_data["adaptive_batch"], dict):
1485
+ # Resolve adaptive batch config with level and overrides
1486
+ adaptive_batch_data = nested_data["adaptive_batch"]
1487
+ level = adaptive_batch_data.get("level")
1488
+ overrides = {k: v for k, v in adaptive_batch_data.items() if k != "level"}
1489
+ try:
1490
+ nested_data["adaptive_batch"] = resolve_adaptive_batch_config(
1491
+ level=level,
1492
+ overrides=overrides if overrides else None,
1493
+ )
1494
+ except Exception as exc:
1495
+ # Re-raise with clearer context
1496
+ raise ValueError(f"Failed to resolve adaptive_batch config: {exc}") from exc
1497
+
1498
+ # Merge nested and flat data
1499
+ merged_data = {**flat_data, **nested_data}
1500
+ return cls.model_validate(merged_data)
1501
+
1502
+
1503
+ class PromptLearningConfig(ExtraModel):
1504
+ """Root configuration for Prompt Learning jobs (GEPA and MIPRO).
1505
+
1506
+ This is the top-level config loaded from a TOML file. Use `PromptLearningConfig.from_path()`
1507
+ to load from a file, or `PromptLearningConfig.from_mapping()` to load from a dict.
1508
+
1509
+ Prompt learning optimizes prompts for a given task app and dataset using one of
1510
+ two algorithms:
1511
+ - **GEPA**: Genetic Evolution of Prompt Architectures - evolutionary optimization
1512
+ with crossover, mutation, and selection across generations
1513
+ - **MIPRO**: Meta-learning with bootstrap phase and Tree-structured Parzen Estimator
1514
+ (TPE) optimization for hyperparameter tuning
1515
+
1516
+ Example:
1517
+ ```python
1518
+ from synth_ai.sdk.api.train.configs.prompt_learning import PromptLearningConfig
1519
+
1520
+ # Load from file
1521
+ config = PromptLearningConfig.from_path("prompt_learning.toml")
1522
+
1523
+ # Or from dict
1524
+ config = PromptLearningConfig.from_mapping({
1525
+ "algorithm": "gepa",
1526
+ "task_app_url": "https://your-tunnel.trycloudflare.com",
1527
+ "gepa": {
1528
+ "env_name": "banking77",
1529
+ "policy": {"model": "gpt-4o-mini", "provider": "openai"},
1530
+ "generations": 5,
1531
+ "population_size": 4,
1532
+ },
1533
+ })
1534
+ ```
1535
+
1536
+ Attributes:
1537
+ algorithm: Optimization algorithm - "gepa" or "mipro".
1538
+ task_app_url: URL of your task app (typically a Cloudflare tunnel URL).
1539
+ task_app_api_key: API key for authenticating with the task app.
1540
+ Defaults to ENVIRONMENT_API_KEY env var.
1541
+ task_app_id: Optional identifier for the task app (for logging).
1542
+ initial_prompt: Initial prompt pattern to seed optimization.
1543
+ policy: Policy (LLM) configuration for rollouts.
1544
+ mipro: MIPRO-specific configuration (if algorithm="mipro").
1545
+ gepa: GEPA-specific configuration (if algorithm="gepa").
1546
+ verifier: Optional verifier configuration for LLM-based reward scoring.
1547
+ proxy_models: Proxy models configuration for cost-effective evaluation.
1548
+ env_config: Additional environment configuration passed to task app.
1549
+ free_tier: Enable free tier mode with cost-effective OSS models.
1550
+
1551
+ Returns:
1552
+ After training completes, you receive a result dict:
1553
+ ```python
1554
+ {
1555
+ "status": "succeeded",
1556
+ "best_score": 0.92,
1557
+ "best_snapshot_id": "snap_abc123",
1558
+ "final_prompt": "You are a helpful assistant...",
1559
+ "metrics": {
1560
+ "generations_completed": 5,
1561
+ "total_rollouts": 200,
1562
+ "improvement": 0.15,
1563
+ },
1564
+ }
1565
+ ```
1566
+
1567
+ Events:
1568
+ During training, you'll receive streaming events:
1569
+ - `prompt_learning.created` - Job created
1570
+ - `prompt_learning.running` - Training started
1571
+ - `prompt_learning.generation.started` - New generation began
1572
+ - `prompt_learning.candidate.evaluated` - Candidate prompt evaluated
1573
+ - `prompt_learning.generation.completed` - Generation finished with best score
1574
+ - `prompt_learning.frontier.updated` - Pareto frontier updated (new best found)
1575
+ - `prompt_learning.succeeded` / `prompt_learning.failed` - Terminal states
1576
+
1577
+ See Also:
1578
+ - Training reference: /training/gepa, /training/mipro
1579
+ - Quickstart: /quickstart/prompt-optimization-gepa
1580
+ """
1581
+ algorithm: str # "mipro" or "gepa"
1582
+ task_app_url: str
1583
+ task_app_api_key: str | None = None
1584
+ task_app_id: str | None = None
1585
+ initial_prompt: PromptPatternConfig | None = None
1586
+ policy: PromptLearningPolicyConfig | None = None
1587
+ mipro: MIPROConfig | None = None
1588
+ gepa: GEPAConfig | None = None
1589
+ verifier: PromptLearningVerifierConfig | dict[str, Any] | None = None
1590
+ proxy_models: ProxyModelsConfig | dict[str, Any] | None = None # Proxy models config (can be at top-level or algorithm-specific)
1591
+ env_config: dict[str, Any] | None = None
1592
+
1593
+ # Free tier configuration
1594
+ free_tier: bool = Field(
1595
+ default=False,
1596
+ description=(
1597
+ "Enable free tier mode. Uses cost-effective OSS models for policy and proposer. "
1598
+ "Requires proposer_effort='LOW' or 'MEDIUM' (not 'HIGH'). "
1599
+ "Counts against your org's free tier limits. When limits are exceeded, "
1600
+ "remove this flag to run as paid job."
1601
+ ),
1602
+ )
1603
+
1604
+ @model_validator(mode="before")
1605
+ @classmethod
1606
+ def _validate_free_tier_config(cls, data: dict[str, Any]) -> dict[str, Any]:
1607
+ """Validate that free tier jobs use eligible proposer_effort levels."""
1608
+ if not isinstance(data, dict):
1609
+ return data
1610
+
1611
+ # Check if free tier is enabled
1612
+ free_tier = data.get("free_tier", False)
1613
+ if isinstance(free_tier, str):
1614
+ free_tier = free_tier.lower() in ("true", "1", "yes", "on")
1615
+ if not free_tier:
1616
+ return data
1617
+
1618
+ # Get proposer_effort from GEPA or MIPRO config
1619
+ proposer_effort = None
1620
+ gepa = data.get("gepa", {})
1621
+ if isinstance(gepa, dict):
1622
+ proposer_effort = gepa.get("proposer_effort")
1623
+ if proposer_effort is None:
1624
+ mipro = data.get("mipro", {})
1625
+ if isinstance(mipro, dict):
1626
+ proposer_effort = mipro.get("proposer_effort")
1627
+
1628
+ # Default to "LOW" if not specified (which is free tier eligible)
1629
+ if proposer_effort is None:
1630
+ proposer_effort = "LOW"
1631
+
1632
+ # Validate proposer_effort is eligible for free tier
1633
+ free_tier_efforts = {"LOW_CONTEXT", "LOW", "MEDIUM"}
1634
+ effort_upper = proposer_effort.upper() if isinstance(proposer_effort, str) else str(proposer_effort).upper()
1635
+ if effort_upper not in free_tier_efforts:
1636
+ raise ValueError(
1637
+ f"Free tier requires proposer_effort to be one of: {', '.join(sorted(free_tier_efforts))}. "
1638
+ f"Got: '{proposer_effort}'. "
1639
+ f"Either change proposer_effort to 'LOW' or 'MEDIUM', or remove 'free_tier = true' from your config."
1640
+ )
1641
+
1642
+ return data
1643
+
1644
+ @model_validator(mode="before")
1645
+ @classmethod
1646
+ def _check_deprecated_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
1647
+ """Remove deprecated fields that are no longer used.
1648
+
1649
+ These fields are silently removed to maintain backwards compatibility
1650
+ with older configs while the CLI validation module warns about them.
1651
+ """
1652
+ if not isinstance(data, dict):
1653
+ return data
1654
+
1655
+ # Silently remove deprecated fields (don't raise errors)
1656
+ deprecated_fields = {"display", "results_folder", "env_file_path"}
1657
+
1658
+ for field in deprecated_fields:
1659
+ if field in data:
1660
+ data.pop(field, None)
1661
+
1662
+ return data
1663
+
1664
+ def to_dict(self) -> dict[str, Any]:
1665
+ """Convert config to dictionary for API payload."""
1666
+ result = self.model_dump(mode="python", exclude_none=True)
1667
+ # Ensure prompt_learning section wraps everything
1668
+ if "prompt_learning" not in result:
1669
+ pl_data = dict(result.items())
1670
+ result = {"prompt_learning": pl_data}
1671
+ return result
1672
+
1673
+ @classmethod
1674
+ def from_mapping(cls, data: Mapping[str, Any]) -> PromptLearningConfig:
1675
+ """Load prompt learning config from dict/TOML mapping."""
1676
+ # Remove deprecated fields at top level (silently for backwards compatibility)
1677
+ # The CLI validation module will warn about these
1678
+ deprecated_top_level = {"display", "results_folder", "env_file_path"}
1679
+
1680
+ # Convert to mutable dict if needed
1681
+ if not isinstance(data, dict):
1682
+ data = dict(data)
1683
+ else:
1684
+ data = dict(data) # Create a copy to avoid modifying the original
1685
+
1686
+ for field in deprecated_top_level:
1687
+ if field in data:
1688
+ data.pop(field, None)
1689
+
1690
+ # Handle both [prompt_learning] section and flat structure
1691
+ pl_data = data.get("prompt_learning", {})
1692
+ if not pl_data:
1693
+ # If no prompt_learning section, assume top-level is prompt_learning
1694
+ pl_data = dict(data)
1695
+
1696
+ # Handle proxy_models at top-level FIRST (takes precedence over algorithm-specific)
1697
+ # This ensures top-level proxy_models is available for algorithm configs to check
1698
+ # Default: None (proxy models disabled unless explicitly configured)
1699
+ top_level_proxy_models = None
1700
+ if "proxy_models" in pl_data and isinstance(pl_data["proxy_models"], dict):
1701
+ top_level_proxy_models = ProxyModelsConfig.model_validate(pl_data["proxy_models"])
1702
+ pl_data["proxy_models"] = top_level_proxy_models
1703
+ # If proxy_models not specified, leave as None (defaults to disabled)
1704
+
1705
+ # Handle gepa config specially to support nested structure
1706
+ if "gepa" in pl_data and isinstance(pl_data["gepa"], dict):
1707
+ gepa_data = pl_data["gepa"]
1708
+ # If top-level proxy_models exists, remove gepa-specific proxy_models (top-level takes precedence)
1709
+ if top_level_proxy_models is not None and "proxy_models" in gepa_data:
1710
+ gepa_data.pop("proxy_models")
1711
+ pl_data["gepa"] = GEPAConfig.from_mapping(gepa_data)
1712
+ # Ensure gepa config uses top-level proxy_models if available
1713
+ if top_level_proxy_models is not None:
1714
+ # Note: gepa.proxy_models will be None, but top-level proxy_models will be used by backend
1715
+ pass
1716
+
1717
+ # Handle mipro config - check for adaptive_pool
1718
+ if "mipro" in pl_data and isinstance(pl_data["mipro"], dict):
1719
+ mipro_data = pl_data["mipro"]
1720
+ # If top-level proxy_models exists, remove mipro-specific proxy_models (top-level takes precedence)
1721
+ if top_level_proxy_models is not None and "proxy_models" in mipro_data:
1722
+ mipro_data.pop("proxy_models")
1723
+
1724
+ # Extract bootstrap_train_seeds and online_pool from top-level pl_data if not in mipro_data
1725
+ # These fields can be at top-level [prompt_learning] or nested [prompt_learning.mipro]
1726
+ if "bootstrap_train_seeds" not in mipro_data and "bootstrap_train_seeds" in pl_data:
1727
+ mipro_data["bootstrap_train_seeds"] = pl_data["bootstrap_train_seeds"]
1728
+ if "online_pool" not in mipro_data and "online_pool" in pl_data:
1729
+ mipro_data["online_pool"] = pl_data["online_pool"]
1730
+ if "test_pool" not in mipro_data and "test_pool" in pl_data:
1731
+ mipro_data["test_pool"] = pl_data["test_pool"]
1732
+ if "reference_pool" not in mipro_data and "reference_pool" in pl_data:
1733
+ mipro_data["reference_pool"] = pl_data["reference_pool"]
1734
+
1735
+ # Handle adaptive_pool in mipro config (only if specified, defaults to None)
1736
+ if "adaptive_pool" in mipro_data and isinstance(mipro_data["adaptive_pool"], dict):
1737
+ adaptive_pool_data = mipro_data["adaptive_pool"]
1738
+ level = adaptive_pool_data.get("level")
1739
+ # If level not specified, default to LOW (conservative SDK default)
1740
+ overrides = {k: v for k, v in adaptive_pool_data.items() if k != "level"}
1741
+ # Get dev_pool_size from online_pool if available
1742
+ dev_pool_size = None
1743
+ online_pool = mipro_data.get("online_pool") or (mipro_data.get("seeds") or {}).get("online", [])
1744
+ if isinstance(online_pool, list):
1745
+ dev_pool_size = len(online_pool)
1746
+ try:
1747
+ mipro_data["adaptive_pool"] = resolve_adaptive_pool_config(
1748
+ level=level, # Will default to LOW if None (via resolve_adaptive_pool_config)
1749
+ overrides=overrides if overrides else None,
1750
+ dev_pool_size=dev_pool_size,
1751
+ )
1752
+ except Exception as exc:
1753
+ # Re-raise with clearer context
1754
+ raise ValueError(f"Failed to resolve mipro.adaptive_pool config: {exc}") from exc
1755
+ # If adaptive_pool not specified, leave as None (defaults to disabled)
1756
+
1757
+ # Handle proxy_models in mipro config (only if specified, defaults to None)
1758
+ if "proxy_models" in mipro_data and isinstance(mipro_data["proxy_models"], dict):
1759
+ mipro_data["proxy_models"] = ProxyModelsConfig.model_validate(mipro_data["proxy_models"])
1760
+ # If proxy_models not specified, leave as None (defaults to disabled)
1761
+
1762
+ if "verifier" in pl_data and isinstance(pl_data["verifier"], dict):
1763
+ pl_data["verifier"] = PromptLearningVerifierConfig.model_validate(pl_data["verifier"])
1764
+
1765
+ return cls.model_validate(pl_data)
1766
+
1767
+ @classmethod
1768
+ def from_path(cls, path: Path) -> PromptLearningConfig:
1769
+ """Load prompt learning config from TOML file."""
1770
+ content = load_toml(path)
1771
+ return cls.from_mapping(content)
1772
+
1773
+
1774
+ __all__ = [
1775
+ "GEPAConfig",
1776
+ "GEPAModuleConfig",
1777
+ "GEPARolloutConfig",
1778
+ "GEPAEvaluationConfig",
1779
+ "GEPAMutationConfig",
1780
+ "GEPAPopulationConfig",
1781
+ "GEPAArchiveConfig",
1782
+ "GEPATokenConfig",
1783
+ "GEPAAdaptiveBatchConfig",
1784
+ "MIPROConfig",
1785
+ "MIPROMetaConfig",
1786
+ "MIPROModuleConfig",
1787
+ "MIPROStageConfig",
1788
+ "MIPROSeedConfig",
1789
+ "MessagePatternConfig",
1790
+ "PromptLearningConfig",
1791
+ "PromptLearningPolicyConfig",
1792
+ "PromptPatternConfig",
1793
+ "PromptLearningVerifierConfig",
1794
+ "ProxyModelsConfig",
1795
+ "AdaptivePoolConfig",
1796
+ "AdaptiveCurriculumLevel",
1797
+ "AdaptiveBatchLevel",
1798
+ "resolve_adaptive_pool_config",
1799
+ "resolve_adaptive_batch_config",
1800
+ ]