synth-ai 0.2.8.dev2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (740) hide show
  1. synth_ai/__init__.py +44 -24
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +103 -48
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +256 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +53 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  63. synth_ai/cli/commands/train/judge_validation.py +305 -0
  64. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  65. synth_ai/cli/commands/train/validation.py +392 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/{demos → cli/demo_apps}/core/cli.py +783 -441
  69. synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
  70. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  71. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  72. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  73. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
  75. synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +75 -37
  76. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  80. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
  82. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  83. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
  84. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  85. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  86. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  87. synth_ai/cli/demo_apps/math/_common.py +16 -0
  88. synth_ai/cli/demo_apps/math/app.py +38 -0
  89. synth_ai/cli/demo_apps/math/config.toml +75 -0
  90. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  91. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  92. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  93. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  94. synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
  95. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  96. synth_ai/cli/demos/__init__.py +12 -0
  97. synth_ai/cli/demos/demo.py +32 -0
  98. synth_ai/cli/demos/rl_demo.py +254 -0
  99. synth_ai/cli/deploy.py +216 -0
  100. synth_ai/cli/infra/__init__.py +14 -0
  101. synth_ai/cli/{balance.py → infra/balance.py} +16 -4
  102. synth_ai/cli/infra/mcp.py +35 -0
  103. synth_ai/cli/infra/modal_app.py +36 -0
  104. synth_ai/cli/infra/setup.py +69 -0
  105. synth_ai/cli/infra/status.py +16 -0
  106. synth_ai/cli/infra/turso.py +77 -0
  107. synth_ai/cli/lib/__init__.py +10 -0
  108. synth_ai/cli/lib/agents.py +76 -0
  109. synth_ai/cli/lib/apps/modal_app.py +101 -0
  110. synth_ai/cli/lib/apps/task_app.py +642 -0
  111. synth_ai/cli/lib/bin.py +39 -0
  112. synth_ai/cli/lib/env.py +375 -0
  113. synth_ai/cli/lib/errors.py +85 -0
  114. synth_ai/cli/lib/modal.py +315 -0
  115. synth_ai/cli/lib/plotting.py +126 -0
  116. synth_ai/cli/lib/prompt_args.py +39 -0
  117. synth_ai/cli/lib/prompts.py +284 -0
  118. synth_ai/cli/lib/sqld.py +122 -0
  119. synth_ai/cli/lib/task_app_discovery.py +884 -0
  120. synth_ai/cli/lib/task_app_env.py +295 -0
  121. synth_ai/cli/lib/train_cfgs.py +300 -0
  122. synth_ai/cli/lib/tunnel_records.py +207 -0
  123. synth_ai/cli/local/__init__.py +14 -0
  124. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  125. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  126. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  127. synth_ai/cli/local/experiment_queue/config.py +128 -0
  128. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  129. synth_ai/cli/local/experiment_queue/database.py +175 -0
  130. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  131. synth_ai/cli/local/experiment_queue/models.py +231 -0
  132. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  133. synth_ai/cli/local/experiment_queue/results.py +373 -0
  134. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  135. synth_ai/cli/local/experiment_queue/service.py +344 -0
  136. synth_ai/cli/local/experiment_queue/status.py +372 -0
  137. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  138. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  139. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  140. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  141. synth_ai/cli/local/session/__init__.py +92 -0
  142. synth_ai/cli/local/session/client.py +383 -0
  143. synth_ai/cli/local/session/constants.py +63 -0
  144. synth_ai/cli/local/session/exceptions.py +105 -0
  145. synth_ai/cli/local/session/manager.py +139 -0
  146. synth_ai/cli/local/session/models.py +89 -0
  147. synth_ai/cli/local/session/query.py +110 -0
  148. synth_ai/cli/root.py +150 -108
  149. synth_ai/cli/task_apps/__init__.py +37 -0
  150. synth_ai/cli/task_apps/commands.py +3145 -0
  151. synth_ai/cli/task_apps/deploy.py +7 -0
  152. synth_ai/cli/task_apps/list.py +26 -0
  153. synth_ai/cli/task_apps/main.py +36 -0
  154. synth_ai/cli/task_apps/modal_serve.py +11 -0
  155. synth_ai/cli/task_apps/serve.py +11 -0
  156. synth_ai/cli/training/__init__.py +8 -0
  157. synth_ai/cli/training/train.py +5 -0
  158. synth_ai/cli/training/train_cfg.py +34 -0
  159. synth_ai/cli/{watch.py → training/watch.py} +13 -18
  160. synth_ai/cli/turso.py +52 -0
  161. synth_ai/cli/utils/__init__.py +8 -0
  162. synth_ai/cli/utils/experiments.py +235 -0
  163. synth_ai/cli/utils/queue.py +504 -0
  164. synth_ai/cli/{recent.py → utils/recent.py} +13 -7
  165. synth_ai/cli/{traces.py → utils/traces.py} +9 -5
  166. synth_ai/contracts/__init__.py +67 -0
  167. synth_ai/core/__init__.py +100 -0
  168. synth_ai/core/_utils/__init__.py +54 -0
  169. synth_ai/core/_utils/base_url.py +10 -0
  170. synth_ai/core/_utils/http.py +10 -0
  171. synth_ai/core/_utils/prompts.py +14 -0
  172. synth_ai/core/_utils/task_app_state.py +12 -0
  173. synth_ai/core/_utils/user_config.py +10 -0
  174. synth_ai/core/apps/common.py +116 -0
  175. synth_ai/core/auth.py +95 -0
  176. synth_ai/core/cfgs.py +240 -0
  177. synth_ai/core/config/__init__.py +16 -0
  178. synth_ai/core/config/base.py +168 -0
  179. synth_ai/core/config/resolver.py +89 -0
  180. synth_ai/core/env.py +231 -0
  181. synth_ai/core/errors.py +126 -0
  182. synth_ai/core/http.py +230 -0
  183. synth_ai/core/integrations/__init__.py +11 -0
  184. synth_ai/core/integrations/cloudflare.py +1710 -0
  185. synth_ai/core/integrations/mcp/__init__.py +6 -0
  186. synth_ai/core/integrations/mcp/__main__.py +8 -0
  187. synth_ai/core/integrations/mcp/claude.py +36 -0
  188. synth_ai/core/integrations/mcp/main.py +254 -0
  189. synth_ai/core/integrations/mcp/setup.py +100 -0
  190. synth_ai/core/integrations/modal.py +277 -0
  191. synth_ai/core/json.py +72 -0
  192. synth_ai/core/log_filter.py +99 -0
  193. synth_ai/core/logging.py +82 -0
  194. synth_ai/core/paths.py +107 -0
  195. synth_ai/core/pricing.py +109 -0
  196. synth_ai/core/process.py +233 -0
  197. synth_ai/core/ssl.py +25 -0
  198. synth_ai/core/storage/__init__.py +71 -0
  199. synth_ai/core/task_app_state.py +318 -0
  200. synth_ai/core/telemetry.py +282 -0
  201. synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
  202. synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
  203. synth_ai/core/tracing_v3/config.py +229 -0
  204. synth_ai/core/tracing_v3/constants.py +21 -0
  205. synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
  206. synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
  207. synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
  208. synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
  209. synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
  210. synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
  211. synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
  212. synth_ai/core/tracing_v3/serialization.py +130 -0
  213. synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
  214. synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
  215. synth_ai/core/tracing_v3/storage/config.py +109 -0
  216. synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
  217. synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
  218. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  219. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  220. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  221. synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
  222. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  223. synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
  224. synth_ai/core/urls.py +18 -0
  225. synth_ai/core/user_config.py +137 -0
  226. synth_ai/core/uvicorn.py +222 -0
  227. synth_ai/data/__init__.py +83 -0
  228. synth_ai/data/enums.py +123 -0
  229. synth_ai/data/rewards.py +152 -0
  230. synth_ai/data/traces.py +35 -0
  231. synth_ai/products/__init__.py +6 -0
  232. synth_ai/products/graph_evolve/__init__.py +46 -0
  233. synth_ai/products/graph_evolve/client.py +226 -0
  234. synth_ai/products/graph_evolve/config.py +591 -0
  235. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  236. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  237. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  238. synth_ai/products/graph_evolve/run.py +222 -0
  239. synth_ai/products/graph_gepa/__init__.py +23 -0
  240. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  241. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  242. synth_ai/sdk/__init__.py +123 -0
  243. synth_ai/sdk/api/__init__.py +1 -0
  244. synth_ai/sdk/api/models/supported.py +514 -0
  245. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  246. synth_ai/sdk/api/train/__init__.py +85 -0
  247. synth_ai/sdk/api/train/builders.py +895 -0
  248. synth_ai/sdk/api/train/cli.py +2199 -0
  249. synth_ai/sdk/api/train/config_finder.py +267 -0
  250. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  251. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  252. synth_ai/sdk/api/train/configs/rl.py +187 -0
  253. synth_ai/sdk/api/train/configs/sft.py +99 -0
  254. synth_ai/sdk/api/train/configs/shared.py +81 -0
  255. synth_ai/sdk/api/train/context_learning.py +312 -0
  256. synth_ai/sdk/api/train/env_resolver.py +418 -0
  257. synth_ai/sdk/api/train/graph_validators.py +216 -0
  258. synth_ai/sdk/api/train/graphgen.py +984 -0
  259. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  260. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  261. synth_ai/sdk/api/train/local_api.py +10 -0
  262. synth_ai/sdk/api/train/pollers.py +124 -0
  263. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  264. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  265. synth_ai/sdk/api/train/progress/events.py +326 -0
  266. synth_ai/sdk/api/train/progress/results.py +428 -0
  267. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  268. synth_ai/sdk/api/train/prompt_learning.py +469 -0
  269. synth_ai/sdk/api/train/rl.py +441 -0
  270. synth_ai/sdk/api/train/sft.py +396 -0
  271. synth_ai/sdk/api/train/summary.py +522 -0
  272. synth_ai/sdk/api/train/supported_algos.py +147 -0
  273. synth_ai/sdk/api/train/task_app.py +351 -0
  274. synth_ai/sdk/api/train/utils.py +279 -0
  275. synth_ai/sdk/api/train/validators.py +2424 -0
  276. synth_ai/sdk/graphs/__init__.py +15 -0
  277. synth_ai/sdk/graphs/completions.py +570 -0
  278. synth_ai/{inference → sdk/inference}/__init__.py +0 -1
  279. synth_ai/sdk/inference/client.py +128 -0
  280. synth_ai/sdk/jobs/__init__.py +16 -0
  281. synth_ai/sdk/jobs/client.py +371 -0
  282. synth_ai/sdk/judging/__init__.py +14 -0
  283. synth_ai/sdk/judging/base.py +24 -0
  284. synth_ai/sdk/judging/client.py +40 -0
  285. synth_ai/sdk/judging/schemas.py +222 -0
  286. synth_ai/sdk/judging/types.py +42 -0
  287. synth_ai/sdk/learning/__init__.py +99 -0
  288. synth_ai/sdk/learning/algorithms.py +14 -0
  289. synth_ai/{learning → sdk/learning}/client.py +121 -30
  290. synth_ai/sdk/learning/config.py +5 -0
  291. synth_ai/{learning → sdk/learning}/constants.py +0 -2
  292. synth_ai/sdk/learning/context_learning_client.py +531 -0
  293. synth_ai/sdk/learning/context_learning_types.py +292 -0
  294. synth_ai/sdk/learning/ft_client.py +7 -0
  295. synth_ai/{learning → sdk/learning}/health.py +15 -9
  296. synth_ai/{learning → sdk/learning}/jobs.py +44 -47
  297. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  298. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  299. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  300. synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
  301. synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
  302. synth_ai/sdk/learning/rl/config.py +31 -0
  303. synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
  304. synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
  305. synth_ai/sdk/learning/rl/secrets.py +13 -0
  306. synth_ai/sdk/learning/rl_client.py +5 -0
  307. synth_ai/sdk/learning/sft/__init__.py +29 -0
  308. synth_ai/sdk/learning/sft/client.py +95 -0
  309. synth_ai/sdk/learning/sft/config.py +270 -0
  310. synth_ai/sdk/learning/sft/data.py +698 -0
  311. synth_ai/sdk/learning/sse.py +57 -0
  312. synth_ai/sdk/learning/validators.py +52 -0
  313. synth_ai/sdk/localapi/__init__.py +40 -0
  314. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  315. synth_ai/sdk/localapi/client.py +10 -0
  316. synth_ai/sdk/localapi/contracts.py +10 -0
  317. synth_ai/sdk/localapi/helpers.py +519 -0
  318. synth_ai/sdk/localapi/rollouts.py +87 -0
  319. synth_ai/sdk/localapi/server.py +29 -0
  320. synth_ai/sdk/localapi/template.py +70 -0
  321. synth_ai/sdk/streaming/__init__.py +35 -0
  322. synth_ai/sdk/streaming/config.py +94 -0
  323. synth_ai/sdk/streaming/handlers.py +1997 -0
  324. synth_ai/sdk/streaming/streamer.py +713 -0
  325. synth_ai/sdk/streaming/types.py +112 -0
  326. synth_ai/sdk/task/__init__.py +164 -0
  327. synth_ai/sdk/task/apps/__init__.py +169 -0
  328. synth_ai/sdk/task/auth.py +165 -0
  329. synth_ai/sdk/task/client.py +175 -0
  330. synth_ai/sdk/task/config.py +257 -0
  331. synth_ai/sdk/task/contracts.py +219 -0
  332. synth_ai/sdk/task/datasets.py +108 -0
  333. synth_ai/sdk/task/errors.py +50 -0
  334. synth_ai/sdk/task/health.py +34 -0
  335. synth_ai/sdk/task/in_process.py +1190 -0
  336. synth_ai/sdk/task/in_process_runner.py +314 -0
  337. synth_ai/sdk/task/inference_api.py +299 -0
  338. synth_ai/sdk/task/json.py +111 -0
  339. synth_ai/sdk/task/proxy.py +287 -0
  340. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  341. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  342. synth_ai/sdk/task/rubrics/models.py +57 -0
  343. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  344. synth_ai/sdk/task/rubrics/strict.py +149 -0
  345. synth_ai/sdk/task/rubrics.py +219 -0
  346. synth_ai/sdk/task/server.py +631 -0
  347. synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
  348. synth_ai/sdk/task/tracing_utils.py +95 -0
  349. synth_ai/sdk/task/validators.py +441 -0
  350. synth_ai/sdk/task/vendors.py +59 -0
  351. synth_ai/sdk/training/__init__.py +102 -0
  352. synth_ai/sdk/tunnels/__init__.py +83 -0
  353. synth_ai/sdk/tunnels/cleanup.py +83 -0
  354. synth_ai/sdk/tunnels/ports.py +120 -0
  355. synth_ai/utils/__init__.py +213 -0
  356. synth_ai-0.4.3.dist-info/METADATA +262 -0
  357. synth_ai-0.4.3.dist-info/RECORD +370 -0
  358. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
  359. synth_ai/cli/calc.py +0 -69
  360. synth_ai/cli/demo.py +0 -144
  361. synth_ai/cli/legacy_root_backup.py +0 -470
  362. synth_ai/cli/man.py +0 -106
  363. synth_ai/cli/rl_demo.py +0 -202
  364. synth_ai/cli/status.py +0 -133
  365. synth_ai/config/base_url.py +0 -107
  366. synth_ai/core/experiment.py +0 -15
  367. synth_ai/core/system.py +0 -15
  368. synth_ai/demos/core/__init__.py +0 -1
  369. synth_ai/demos/demo_task_apps/__init__.py +0 -1
  370. synth_ai/demos/demo_task_apps/math/config.toml +0 -129
  371. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  372. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -415
  373. synth_ai/environments/__init__.py +0 -31
  374. synth_ai/environments/environment/__init__.py +0 -1
  375. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  376. synth_ai/environments/environment/artifacts/base.py +0 -52
  377. synth_ai/environments/environment/core.py +0 -67
  378. synth_ai/environments/environment/db/__init__.py +0 -1
  379. synth_ai/environments/environment/db/sqlite.py +0 -45
  380. synth_ai/environments/environment/registry.py +0 -233
  381. synth_ai/environments/environment/resources/sqlite.py +0 -45
  382. synth_ai/environments/environment/results.py +0 -1
  383. synth_ai/environments/environment/rewards/__init__.py +0 -1
  384. synth_ai/environments/environment/rewards/core.py +0 -29
  385. synth_ai/environments/environment/shared_engine.py +0 -26
  386. synth_ai/environments/environment/tools/__init__.py +0 -200
  387. synth_ai/environments/examples/__init__.py +0 -1
  388. synth_ai/environments/examples/bandit/__init__.py +0 -33
  389. synth_ai/environments/examples/bandit/engine.py +0 -294
  390. synth_ai/environments/examples/bandit/environment.py +0 -194
  391. synth_ai/environments/examples/bandit/taskset.py +0 -200
  392. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  393. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  394. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  395. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  396. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  397. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  398. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  399. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  400. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  401. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  402. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  403. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  404. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  405. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  406. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  407. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  408. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  409. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  410. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  411. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  412. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  413. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  414. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  415. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  416. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  417. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  418. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  419. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  420. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  421. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  422. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  423. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  424. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  425. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  426. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  427. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  428. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  429. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  430. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  431. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  432. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  433. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  434. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  435. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  436. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  437. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  438. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  439. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  440. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  441. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  442. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  443. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  444. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  445. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  446. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  447. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  448. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  449. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  450. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  451. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  452. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  453. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  454. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  455. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  456. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  457. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  458. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  459. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  460. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  461. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  462. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  463. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  464. synth_ai/environments/examples/crafter_classic/environment.py +0 -404
  465. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  466. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  467. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  468. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  469. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  470. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  471. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  472. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  473. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  474. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  475. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  476. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  477. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  478. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  479. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  480. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  481. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  482. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  483. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  484. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  485. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  486. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  487. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  488. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  489. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  490. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  491. synth_ai/environments/examples/enron/engine.py +0 -295
  492. synth_ai/environments/examples/enron/environment.py +0 -166
  493. synth_ai/environments/examples/enron/taskset.py +0 -112
  494. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  495. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  496. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  497. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  498. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  499. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  500. synth_ai/environments/examples/minigrid/engine.py +0 -589
  501. synth_ai/environments/examples/minigrid/environment.py +0 -274
  502. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  503. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  504. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  505. synth_ai/environments/examples/nethack/__init__.py +0 -7
  506. synth_ai/environments/examples/nethack/achievements.py +0 -337
  507. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  508. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  509. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  510. synth_ai/environments/examples/nethack/engine.py +0 -739
  511. synth_ai/environments/examples/nethack/environment.py +0 -256
  512. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  513. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  514. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  515. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  516. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  517. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  518. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  519. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  520. synth_ai/environments/examples/nethack/taskset.py +0 -323
  521. synth_ai/environments/examples/red/__init__.py +0 -7
  522. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  523. synth_ai/environments/examples/red/config_logging.py +0 -110
  524. synth_ai/environments/examples/red/engine.py +0 -694
  525. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  526. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  527. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  528. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  529. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  530. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  531. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  532. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  533. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  534. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  535. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  536. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  537. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  538. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  539. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  540. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  541. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  542. synth_ai/environments/examples/red/environment.py +0 -238
  543. synth_ai/environments/examples/red/taskset.py +0 -79
  544. synth_ai/environments/examples/red/units/__init__.py +0 -1
  545. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  546. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  547. synth_ai/environments/examples/sokoban/engine.py +0 -678
  548. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  549. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  550. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  551. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  552. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  553. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  554. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  555. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  556. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  557. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  558. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  559. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  560. synth_ai/environments/examples/sokoban/environment.py +0 -229
  561. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  562. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  563. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  564. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  565. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  566. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  567. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  568. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  569. synth_ai/environments/examples/verilog/__init__.py +0 -10
  570. synth_ai/environments/examples/verilog/engine.py +0 -329
  571. synth_ai/environments/examples/verilog/environment.py +0 -350
  572. synth_ai/environments/examples/verilog/taskset.py +0 -420
  573. synth_ai/environments/examples/wordle/__init__.py +0 -29
  574. synth_ai/environments/examples/wordle/engine.py +0 -398
  575. synth_ai/environments/examples/wordle/environment.py +0 -159
  576. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  577. synth_ai/environments/examples/wordle/taskset.py +0 -230
  578. synth_ai/environments/reproducibility/core.py +0 -42
  579. synth_ai/environments/reproducibility/helpers.py +0 -0
  580. synth_ai/environments/reproducibility/tree.py +0 -364
  581. synth_ai/environments/service/app.py +0 -98
  582. synth_ai/environments/service/core_routes.py +0 -1020
  583. synth_ai/environments/service/external_registry.py +0 -56
  584. synth_ai/environments/service/registry.py +0 -9
  585. synth_ai/environments/stateful/__init__.py +0 -1
  586. synth_ai/environments/stateful/core.py +0 -163
  587. synth_ai/environments/stateful/engine.py +0 -21
  588. synth_ai/environments/stateful/state.py +0 -7
  589. synth_ai/environments/tasks/api.py +0 -19
  590. synth_ai/environments/tasks/core.py +0 -80
  591. synth_ai/environments/tasks/filters.py +0 -41
  592. synth_ai/environments/tasks/utils.py +0 -91
  593. synth_ai/environments/v0_observability/history.py +0 -3
  594. synth_ai/environments/v0_observability/log.py +0 -2
  595. synth_ai/evals/base.py +0 -15
  596. synth_ai/experimental/synth_oss.py +0 -446
  597. synth_ai/handshake.py +0 -63
  598. synth_ai/http.py +0 -26
  599. synth_ai/http_client.py +0 -104
  600. synth_ai/inference/client.py +0 -20
  601. synth_ai/install_sqld.sh +0 -40
  602. synth_ai/jobs/client.py +0 -246
  603. synth_ai/learning/__init__.py +0 -24
  604. synth_ai/learning/config.py +0 -43
  605. synth_ai/learning/filtering.py +0 -0
  606. synth_ai/learning/ft_client.py +0 -59
  607. synth_ai/learning/offline/dpo.py +0 -0
  608. synth_ai/learning/offline/providers.py +0 -7
  609. synth_ai/learning/offline/sft.py +0 -0
  610. synth_ai/learning/offline/shared.py +0 -0
  611. synth_ai/learning/online/grpo.py +0 -0
  612. synth_ai/learning/online/irft.py +0 -0
  613. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  614. synth_ai/learning/prompts/gepa.py +0 -0
  615. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  616. synth_ai/learning/prompts/mipro.py +0 -289
  617. synth_ai/learning/prompts/random_search.py +0 -246
  618. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  619. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  620. synth_ai/learning/sse.py +0 -58
  621. synth_ai/learning/validators.py +0 -48
  622. synth_ai/lm/__init__.py +0 -51
  623. synth_ai/lm/caching/constants.py +0 -6
  624. synth_ai/lm/caching/dbs.py +0 -0
  625. synth_ai/lm/caching/ephemeral.py +0 -102
  626. synth_ai/lm/caching/handler.py +0 -137
  627. synth_ai/lm/caching/initialize.py +0 -11
  628. synth_ai/lm/caching/persistent.py +0 -114
  629. synth_ai/lm/config.py +0 -110
  630. synth_ai/lm/constants.py +0 -32
  631. synth_ai/lm/core/__init__.py +0 -8
  632. synth_ai/lm/core/all.py +0 -73
  633. synth_ai/lm/core/exceptions.py +0 -7
  634. synth_ai/lm/core/main.py +0 -319
  635. synth_ai/lm/core/main_v3.py +0 -594
  636. synth_ai/lm/core/synth_models.py +0 -48
  637. synth_ai/lm/core/vendor_clients.py +0 -188
  638. synth_ai/lm/cost/__init__.py +0 -0
  639. synth_ai/lm/cost/monitor.py +0 -1
  640. synth_ai/lm/cost/statefulness.py +0 -1
  641. synth_ai/lm/injection.py +0 -80
  642. synth_ai/lm/overrides.py +0 -206
  643. synth_ai/lm/provider_support/__init__.py +0 -8
  644. synth_ai/lm/provider_support/anthropic.py +0 -972
  645. synth_ai/lm/provider_support/openai.py +0 -1139
  646. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  647. synth_ai/lm/structured_outputs/__init__.py +0 -0
  648. synth_ai/lm/structured_outputs/handler.py +0 -440
  649. synth_ai/lm/structured_outputs/inject.py +0 -297
  650. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  651. synth_ai/lm/tools/__init__.py +0 -3
  652. synth_ai/lm/tools/base.py +0 -172
  653. synth_ai/lm/unified_interface.py +0 -202
  654. synth_ai/lm/vendors/__init__.py +0 -0
  655. synth_ai/lm/vendors/base.py +0 -81
  656. synth_ai/lm/vendors/core/__init__.py +0 -0
  657. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  658. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  659. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  660. synth_ai/lm/vendors/core/openai_api.py +0 -225
  661. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  662. synth_ai/lm/vendors/local/__init__.py +0 -0
  663. synth_ai/lm/vendors/local/ollama.py +0 -0
  664. synth_ai/lm/vendors/openai_standard.py +0 -780
  665. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  666. synth_ai/lm/vendors/retries.py +0 -22
  667. synth_ai/lm/vendors/supported/__init__.py +0 -0
  668. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  669. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  670. synth_ai/lm/vendors/supported/grok.py +0 -75
  671. synth_ai/lm/vendors/supported/groq.py +0 -16
  672. synth_ai/lm/vendors/supported/ollama.py +0 -15
  673. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  674. synth_ai/lm/vendors/supported/together.py +0 -11
  675. synth_ai/lm/vendors/synth_client.py +0 -808
  676. synth_ai/lm/warmup.py +0 -186
  677. synth_ai/rl/secrets.py +0 -19
  678. synth_ai/scripts/verify_rewards.py +0 -100
  679. synth_ai/task/__init__.py +0 -10
  680. synth_ai/task/contracts.py +0 -120
  681. synth_ai/task/health.py +0 -28
  682. synth_ai/task/validators.py +0 -12
  683. synth_ai/tracing/__init__.py +0 -30
  684. synth_ai/tracing_v1/__init__.py +0 -33
  685. synth_ai/tracing_v3/config.py +0 -84
  686. synth_ai/tracing_v3/storage/config.py +0 -62
  687. synth_ai/tracing_v3/turso/__init__.py +0 -25
  688. synth_ai/tracing_v3/turso/daemon.py +0 -144
  689. synth_ai/tracing_v3/turso/manager.py +0 -760
  690. synth_ai/v0/tracing/__init__.py +0 -0
  691. synth_ai/v0/tracing/abstractions.py +0 -224
  692. synth_ai/v0/tracing/base_client.py +0 -91
  693. synth_ai/v0/tracing/client_manager.py +0 -131
  694. synth_ai/v0/tracing/config.py +0 -142
  695. synth_ai/v0/tracing/context.py +0 -146
  696. synth_ai/v0/tracing/decorators.py +0 -682
  697. synth_ai/v0/tracing/events/__init__.py +0 -0
  698. synth_ai/v0/tracing/events/manage.py +0 -147
  699. synth_ai/v0/tracing/events/scope.py +0 -86
  700. synth_ai/v0/tracing/events/store.py +0 -228
  701. synth_ai/v0/tracing/immediate_client.py +0 -151
  702. synth_ai/v0/tracing/local.py +0 -18
  703. synth_ai/v0/tracing/log_client_base.py +0 -73
  704. synth_ai/v0/tracing/retry_queue.py +0 -186
  705. synth_ai/v0/tracing/trackers.py +0 -515
  706. synth_ai/v0/tracing/upload.py +0 -512
  707. synth_ai/v0/tracing/utils.py +0 -9
  708. synth_ai/v0/tracing_v1/__init__.py +0 -16
  709. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  710. synth_ai/v0/tracing_v1/base_client.py +0 -91
  711. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  712. synth_ai/v0/tracing_v1/config.py +0 -142
  713. synth_ai/v0/tracing_v1/context.py +0 -146
  714. synth_ai/v0/tracing_v1/decorators.py +0 -703
  715. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  716. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  717. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  718. synth_ai/v0/tracing_v1/events/store.py +0 -228
  719. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  720. synth_ai/v0/tracing_v1/local.py +0 -18
  721. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  722. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  723. synth_ai/v0/tracing_v1/trackers.py +0 -515
  724. synth_ai/v0/tracing_v1/upload.py +0 -527
  725. synth_ai/v0/tracing_v1/utils.py +0 -9
  726. synth_ai/zyk/__init__.py +0 -30
  727. synth_ai-0.2.8.dev2.dist-info/METADATA +0 -129
  728. synth_ai-0.2.8.dev2.dist-info/RECORD +0 -420
  729. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  730. /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
  731. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  732. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  733. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  734. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  735. /synth_ai/{compound/cais.py → py.typed} +0 -0
  736. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  737. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  738. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
  739. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
  740. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1706 @@
1
+ """Prompt Learning configuration models for MIPRO and GEPA."""
2
+ from __future__ import annotations
3
+
4
+ from collections.abc import Mapping, Sequence
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Literal, Optional
8
+
9
+ from pydantic import Field, field_validator, model_validator
10
+
11
+ from ..utils import load_toml
12
+ from .shared import ExtraModel
13
+
14
+
15
+ class SeedRange(ExtraModel):
16
+ """Compact seed range notation for TOML configs.
17
+
18
+ Allows writing `seeds = {start = 0, end = 50}` instead of `seeds = [0, 1, 2, ..., 49]`.
19
+
20
+ Examples:
21
+ seeds = {start = 0, end = 10} # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
22
+ seeds = {start = 0, end = 100, step = 2} # [0, 2, 4, ..., 98]
23
+ """
24
+ start: int
25
+ end: int
26
+ step: int = 1
27
+
28
+ def to_list(self) -> list[int]:
29
+ """Convert range to list of integers."""
30
+ return list(range(self.start, self.end, self.step))
31
+
32
+
33
+ def _parse_seeds(value: Any) -> list[int] | None:
34
+ """Parse seed values that can be either a list or a range dict.
35
+
36
+ Args:
37
+ value: Either a list of ints or a dict with 'start', 'end', and optional 'step'.
38
+
39
+ Returns:
40
+ List of integers, or None if value is None.
41
+
42
+ Examples:
43
+ _parse_seeds([0, 1, 2, 3]) # [0, 1, 2, 3]
44
+ _parse_seeds({"start": 0, "end": 4}) # [0, 1, 2, 3]
45
+ _parse_seeds({"start": 0, "end": 10, "step": 2}) # [0, 2, 4, 6, 8]
46
+ """
47
+ if value is None:
48
+ return None
49
+ if isinstance(value, dict) and "start" in value and "end" in value:
50
+ seed_range = SeedRange.model_validate(value)
51
+ return seed_range.to_list()
52
+ if isinstance(value, list):
53
+ return list(value)
54
+ raise ValueError(f"Seeds must be a list or a range dict with 'start' and 'end' keys, got {type(value).__name__}")
55
+
56
+
57
+ class InferenceMode(str, Enum):
58
+ synth_hosted = "synth_hosted"
59
+
60
+
61
+ class ProviderName(str, Enum):
62
+ openai = "openai"
63
+ groq = "groq"
64
+ google = "google"
65
+
66
+
67
+ class PromptLearningPolicyConfig(ExtraModel):
68
+ """Policy configuration for prompt learning (model, provider, etc.)."""
69
+ model: str
70
+ provider: ProviderName
71
+ inference_url: str | None = None # Optional - trainer provides it in rollout requests (ignored if present)
72
+ inference_mode: InferenceMode = InferenceMode.synth_hosted
73
+ temperature: float = 0.0
74
+ max_completion_tokens: int = 512
75
+ policy_name: str | None = None
76
+
77
+ @field_validator("inference_url", mode="before")
78
+ @classmethod
79
+ def _strip_inference_url(cls, v: str | None) -> str | None:
80
+ """Strip whitespace from inference_url if provided."""
81
+ if v is None:
82
+ return None
83
+ if isinstance(v, str):
84
+ v = v.strip()
85
+ # Validate that URL starts with http:// or https:// if provided (non-empty)
86
+ if v and not v.startswith(("http://", "https://")):
87
+ raise ValueError("inference_url must start with http:// or https://")
88
+ # Reject empty strings after stripping
89
+ if not v:
90
+ raise ValueError("inference_url must start with http:// or https://")
91
+ return v
92
+
93
+
94
+ class MessagePatternConfig(ExtraModel):
95
+ """Configuration for a single message pattern."""
96
+ role: str
97
+ pattern: str
98
+ order: int = 0
99
+
100
+
101
+ class PromptPatternConfig(ExtraModel):
102
+ """Initial prompt pattern configuration."""
103
+ id: str | None = None
104
+ name: str | None = None
105
+ messages: list[MessagePatternConfig] = []
106
+ wildcards: dict[str, str] = Field(default_factory=dict)
107
+
108
+
109
+ class MIPROMetaConfig(ExtraModel):
110
+ """DEPRECATED: Meta-model config is now controlled by proposer_effort and proposer_output_tokens.
111
+
112
+ This class is kept for backwards compatibility but should not be used.
113
+ Use proposer_effort (LOW_CONTEXT, LOW, MEDIUM, HIGH) and proposer_output_tokens (RAPID, FAST, SLOW) instead.
114
+ """
115
+ model: str | None = None
116
+ provider: str | None = None
117
+ inference_url: str | None = None
118
+ temperature: float | None = None
119
+ max_tokens: int | None = None
120
+
121
+
122
+ class MIPROStageConfig(ExtraModel):
123
+ """Configuration for a single MIPRO stage inside a module.
124
+
125
+ Each stage MUST have its own policy configuration. The policy field is required
126
+ and must include 'model' and 'provider' fields.
127
+ """
128
+ stage_id: str
129
+ baseline_instruction: str
130
+ baseline_messages: list[dict[str, str]] = Field(default_factory=list)
131
+ max_instruction_slots: int | None = None
132
+ max_demo_slots: int | None = None
133
+ policy: PromptLearningPolicyConfig | dict[str, Any] = Field(
134
+ ...,
135
+ description="Required per-stage policy configuration. Must include 'model' and 'provider' fields."
136
+ )
137
+
138
+
139
+ class MIPROModuleConfig(ExtraModel):
140
+ """Configuration for a single module in a MIPRO pipeline."""
141
+ module_id: str
142
+ stages: list[MIPROStageConfig] = Field(default_factory=list)
143
+
144
+
145
+ class MIPROSeedConfig(ExtraModel):
146
+ """Seed pools used across bootstrap, optimization, and evaluation."""
147
+ bootstrap: list[int] = Field(default_factory=list)
148
+ online: list[int] = Field(default_factory=list)
149
+ test: list[int] = Field(default_factory=list)
150
+ reference: list[int] = Field(default_factory=list)
151
+
152
+ @field_validator("bootstrap", "online", "test", "reference", mode="before")
153
+ @classmethod
154
+ def _parse_seed_pools(cls, v: Any) -> list[int]:
155
+ """Parse seed pools that can be either a list or range dict."""
156
+ return _parse_seeds(v) or []
157
+
158
+
159
+ class PromptLearningJudgeConfig(ExtraModel):
160
+ """Verifier configuration shared by GEPA and MIPRO.
161
+
162
+ This configures LLM-based evaluation of agent trajectories during prompt optimization.
163
+ You can use standard rubrics or registered Verifier Graphs.
164
+
165
+ Attributes:
166
+ enabled: Whether to enable verifier-based scoring.
167
+ reward_source: Source of the final reward for optimization.
168
+ - "task_app": Use only environment rewards from task app (default).
169
+ - "judge": Use only verifier quality scores.
170
+ - "fused": Weighted combination of environment and verifier rewards.
171
+ backend_base: Base URL for the verifier service (e.g. "https://api.usesynth.ai").
172
+ backend_api_key_env: Env var containing the Synth API key (default: "SYNTH_API_KEY").
173
+ backend_provider: Provider for the verifier model (e.g. "openai", "groq").
174
+ backend_model: Model used to execute the verifier rubric or graph (e.g. "gpt-4o-mini").
175
+ synth_verifier_id: ID or Name of a registered Verifier Graph or Rubric on the backend.
176
+ Use this to point to a specific, versioned verifier artifact.
177
+ backend_rubric_id: Legacy alias for synth_verifier_id.
178
+ backend_event_enabled: Whether to enable fine-grained event-level scoring.
179
+ backend_outcome_enabled: Whether to enable episode-level outcome scoring.
180
+ weight_env: Weight for environment rewards in "fused" mode (default: 1.0).
181
+ weight_event: Weight for verifier event rewards in "fused" mode (default: 0.0).
182
+ weight_outcome: Weight for verifier outcome rewards in "fused" mode (default: 0.0).
183
+ """
184
+ enabled: bool = False
185
+ reward_source: Literal["task_app", "judge", "fused"] = "task_app"
186
+ backend_base: str = ""
187
+ backend_api_key_env: str = "SYNTH_API_KEY"
188
+ backend_provider: str = ""
189
+ backend_model: str = ""
190
+ synth_verifier_id: str = "" # Preferred field for Registered VerifierGraph or Rubric ID
191
+ backend_rubric_id: str = "" # Legacy alias for synth_verifier_id
192
+ backend_event_enabled: bool = True
193
+ backend_outcome_enabled: bool = True
194
+ backend_options: Dict[str, Any] = Field(default_factory=dict)
195
+ concurrency: int = 8
196
+ timeout: float = 60.0
197
+ weight_env: float = 1.0
198
+ weight_event: float = 0.0
199
+ weight_outcome: float = 0.0
200
+ spec_path: Optional[str] = None
201
+ spec_max_tokens: int = 5000
202
+ spec_context: Optional[str] = None
203
+
204
+ @model_validator(mode="before")
205
+ @classmethod
206
+ def _sync_verifier_ids(cls, data: Any) -> Any:
207
+ """Sync synth_verifier_id and backend_rubric_id."""
208
+ if isinstance(data, dict):
209
+ if not data.get("synth_verifier_id") and data.get("backend_rubric_id"):
210
+ data["synth_verifier_id"] = data["backend_rubric_id"]
211
+ elif not data.get("backend_rubric_id") and data.get("synth_verifier_id"):
212
+ data["backend_rubric_id"] = data["synth_verifier_id"]
213
+ return data
214
+
215
+
216
+ class PromptLearningVerifierConfig(PromptLearningJudgeConfig):
217
+ """Alias for PromptLearningJudgeConfig with verifier terminology."""
218
+
219
+
220
+ class ProxyModelsConfig(ExtraModel):
221
+ """Configuration for proxy usage on policy evaluations.
222
+
223
+ Uses a low-fidelity (LO) model for most evaluations and a high-fidelity (HI) model
224
+ for verification, with dynamic switching based on calibration and correlation.
225
+
226
+ The proxy system starts by evaluating examples with both HI and LO models to build
227
+ a calibration regression. Once calibrated (R² >= r2_thresh), it switches to using
228
+ only the LO model for most evaluations, falling back to HI when reliability drops.
229
+
230
+ Attributes:
231
+ hi_provider: Provider for high-fidelity model (e.g., "openai", "groq", "google").
232
+ This is the expensive model used for ground-truth evaluations.
233
+ hi_model: High-fidelity model name (e.g., "gpt-4o", "gpt-oss-120b").
234
+ Must be a supported model for the provider.
235
+ lo_provider: Provider for low-fidelity proxy model (e.g., "groq", "openai").
236
+ This is the cheaper model used for most evaluations after calibration.
237
+ lo_model: Low-fidelity proxy model name (e.g., "gpt-oss-20b", "gpt-4o-mini").
238
+ Must be a supported model for the provider. Should be cheaper than hi_model.
239
+ n_min_hi: Minimum number of HI evaluations before allowing proxy substitution.
240
+ Default: 5. Ensures sufficient calibration data before proxying.
241
+ r2_thresh: R² correlation threshold (0.0-1.0) required to enable proxying.
242
+ Default: 0.5. Higher values require stronger correlation before proxying.
243
+ r2_stop: R² threshold (0.0-1.0) below which proxying is disabled.
244
+ Default: 0.2. If correlation drops below this, revert to HI-only.
245
+ sigma_max: Maximum residual variance (sigma²) allowed for proxy calibration.
246
+ Default: 1e6. Higher values allow more variance in predictions.
247
+ sigma_stop: Stop proxying if residual variance exceeds this value.
248
+ Default: 1e9. If variance exceeds this, revert to HI-only.
249
+ verify_every: Periodically verify calibration every N LO-only evaluations.
250
+ Default: 0 (no periodic verification). Set to >0 to periodically run BOTH
251
+ to check if calibration is still valid.
252
+ proxy_patience_usd: Stop proxying if cumulative net gain drops below this (USD).
253
+ Default: -100.0. Negative values allow some loss before stopping. Set to 0.0
254
+ to stop immediately if proxy becomes unprofitable.
255
+ """
256
+ hi_provider: str
257
+ hi_model: str
258
+ lo_provider: str
259
+ lo_model: str
260
+ n_min_hi: int = 5
261
+ r2_thresh: float = 0.5
262
+ r2_stop: float = 0.2
263
+ sigma_max: float = 1e6
264
+ sigma_stop: float = 1e9
265
+ verify_every: int = 0
266
+ proxy_patience_usd: float = -100.0
267
+
268
+
269
+ class AdaptiveCurriculumLevel(str, Enum):
270
+ """Preset levels for adaptive pooling curriculum."""
271
+ NONE = "NONE"
272
+ LOW = "LOW"
273
+ MODERATE = "MODERATE"
274
+ HIGH = "HIGH"
275
+
276
+
277
+ class AdaptivePoolConfig(ExtraModel):
278
+ """Configuration for adaptive pooling (dynamically adjusting evaluation pool size).
279
+
280
+ Reduces evaluation costs by focusing on the most informative examples while
281
+ maintaining optimization quality through informativeness-based selection.
282
+
283
+ The adaptive pool starts with a larger pool and gradually reduces to a minimum
284
+ size, selecting examples based on informativeness (variance across prompts).
285
+ Examples are divided into anchors (always evaluated) and exploration pool
286
+ (selected based on informativeness).
287
+
288
+ Attributes:
289
+ level: Preset level (NONE, LOW, MODERATE, HIGH). Default: LOW.
290
+ NONE disables adaptive pooling. Higher levels use smaller pools and
291
+ more aggressive annealing for greater cost savings.
292
+ anchor_size: Number of anchor examples that are always evaluated.
293
+ Default: 30. Anchors provide stable baseline for optimization.
294
+ Must be <= pool_min_size.
295
+ pool_init_size: Initial pool size at start of optimization.
296
+ Default: None (uses all available examples). Set to limit initial pool.
297
+ Must be >= pool_min_size if both are set.
298
+ pool_min_size: Target minimum pool size after annealing completes.
299
+ Default: None (uses anchor_size). Pool anneals linearly from
300
+ pool_init_size to pool_min_size between warmup_iters and anneal_stop_iter.
301
+ Must be >= anchor_size.
302
+ warmup_iters: Number of iterations before starting pool annealing.
303
+ Default: 5. During warmup, pool stays at pool_init_size to gather
304
+ informativeness data.
305
+ anneal_stop_iter: Iteration at which pool reaches pool_min_size.
306
+ Default: 20. Pool size decreases linearly from warmup_iters to this.
307
+ Must be > warmup_iters.
308
+ pool_update_period: Update informativeness scores every N generations.
309
+ Default: 3. More frequent updates (lower value) adapt faster but
310
+ require more computation.
311
+ min_evals_per_example: Minimum evaluations per example before computing
312
+ informativeness. Default: 3. Examples with fewer evals get info=0.0.
313
+ k_info_prompts: Number of top-performing prompts used for informativeness
314
+ computation. Default: 10. Only scores from these prompts are used to
315
+ compute variance-based informativeness.
316
+ info_buffer_factor: Buffer factor (0.0-1.0) for preserving informativeness
317
+ during pool reduction. Default: 0.9. Higher values preserve more
318
+ informativeness but allow less reduction. Lower values allow more
319
+ aggressive reduction but may lose informativeness.
320
+ info_epsilon: Small epsilon value added to prevent division by zero in
321
+ informativeness calculations. Default: 1e-6.
322
+ anchor_selection_method: Method for selecting anchor examples.
323
+ Default: "clustering". Options:
324
+ - "random": Random selection
325
+ - "clustering": Select diverse examples via clustering
326
+ exploration_strategy: Strategy for selecting exploration pool examples.
327
+ Default: "diversity". Options:
328
+ - "random": Random selection
329
+ - "diversity": Select diverse examples based on informativeness
330
+ heatup_reserve_pool: Optional list of seed IDs reserved for heat-up phase.
331
+ Default: None. If provided, these seeds are added back to pool during
332
+ heat-up phases to prevent overfitting to small pool.
333
+ heatup_trigger: When to trigger heat-up phase (adding seeds back to pool).
334
+ Default: "after_min_size". Options:
335
+ - "after_min_size": Trigger after pool reaches min_size
336
+ - "immediate": Trigger immediately
337
+ - "every_N_trials_after_min": Trigger periodically after min_size
338
+ heatup_size: Number of seeds to add during heat-up phase.
339
+ Default: 20. Seeds are selected from heatup_reserve_pool or reserve pool.
340
+ heatup_cooldown_trials: Number of trials to wait before cooling down
341
+ (removing heat-up seeds) after heat-up. Default: 50.
342
+ heatup_schedule: Whether heat-up repeats or happens once.
343
+ Default: "repeat". Options:
344
+ - "once": Heat-up happens once
345
+ - "repeat": Heat-up repeats after cooldown
346
+ """
347
+ level: AdaptiveCurriculumLevel = AdaptiveCurriculumLevel.LOW
348
+ anchor_size: int = 30
349
+ pool_init_size: int | None = None
350
+ pool_min_size: int | None = None
351
+ warmup_iters: int = 5
352
+ anneal_stop_iter: int = 20
353
+ pool_update_period: int = 3
354
+ min_evals_per_example: int = 3
355
+ k_info_prompts: int = 10
356
+ info_buffer_factor: float = 0.9
357
+ info_epsilon: float = 1e-6
358
+ anchor_selection_method: Literal["random", "clustering"] = "clustering"
359
+ exploration_strategy: Literal["random", "diversity"] = "diversity"
360
+ heatup_reserve_pool: list[int] | None = None
361
+ heatup_trigger: Literal["after_min_size", "immediate", "every_N_trials_after_min"] = "after_min_size"
362
+ heatup_size: int = 20
363
+ heatup_cooldown_trials: int = 50
364
+ heatup_schedule: Literal["repeat", "once"] = "repeat"
365
+
366
+ @property
367
+ def enabled(self) -> bool:
368
+ """Whether adaptive pooling is enabled (level != NONE)."""
369
+ return self.level != AdaptiveCurriculumLevel.NONE
370
+
371
+
372
+ class AdaptiveBatchLevel(str, Enum):
373
+ """Preset levels for adaptive batch curriculum (GEPA only)."""
374
+ NONE = "NONE"
375
+ LOW = "LOW"
376
+ MODERATE = "MODERATE"
377
+ HIGH = "HIGH"
378
+
379
+
380
+ class GEPAAdaptiveBatchConfig(ExtraModel):
381
+ """Configuration for adaptive batch evaluation (GEPA only).
382
+
383
+ Reduces evaluation costs by using smaller minibatches and subsampling validation.
384
+ """
385
+ level: AdaptiveBatchLevel = AdaptiveBatchLevel.MODERATE
386
+ reflection_minibatch_size: int = 3 # Train examples per reflection step
387
+ min_local_improvement: float = 0.0 # Threshold for accepting proposals
388
+ val_evaluation_mode: Literal["full", "subsample"] = "subsample" # Validation mode
389
+ val_subsample_size: int = 64 # Subsample size when mode="subsample"
390
+ candidate_selection_strategy: Literal["coverage", "random"] = "coverage"
391
+
392
+ @property
393
+ def enabled(self) -> bool:
394
+ """Whether adaptive batch is enabled (level != NONE)."""
395
+ return self.level != AdaptiveBatchLevel.NONE
396
+
397
+
398
+ # Default presets for adaptive pool (mirrors monorepo structure)
399
+ _ADAPTIVE_POOL_DEFAULTS: dict[AdaptiveCurriculumLevel, dict[str, Any]] = {
400
+ AdaptiveCurriculumLevel.NONE: {
401
+ "anchor_size": 0,
402
+ "pool_init_size": None,
403
+ "pool_min_size": None,
404
+ "warmup_iters": 999_999,
405
+ "anneal_stop_iter": 999_999,
406
+ "pool_update_period": 999_999,
407
+ "min_evals_per_example": 1,
408
+ "k_info_prompts": 0,
409
+ "info_buffer_factor": 1.0,
410
+ "info_epsilon": 1e-6,
411
+ "anchor_selection_method": "random",
412
+ "exploration_strategy": "random",
413
+ "heatup_reserve_pool": None,
414
+ "heatup_trigger": "after_min_size",
415
+ "heatup_size": 20,
416
+ "heatup_cooldown_trials": 50,
417
+ "heatup_schedule": "repeat",
418
+ },
419
+ AdaptiveCurriculumLevel.LOW: {
420
+ "anchor_size": 50,
421
+ "pool_init_size": 150,
422
+ "pool_min_size": 100,
423
+ "warmup_iters": 10,
424
+ "anneal_stop_iter": 30,
425
+ "pool_update_period": 2,
426
+ "min_evals_per_example": 5,
427
+ "k_info_prompts": 15,
428
+ "info_buffer_factor": 0.95,
429
+ "info_epsilon": 1e-6,
430
+ "anchor_selection_method": "clustering",
431
+ "exploration_strategy": "diversity",
432
+ "heatup_reserve_pool": None,
433
+ "heatup_trigger": "after_min_size",
434
+ "heatup_size": 20,
435
+ "heatup_cooldown_trials": 50,
436
+ "heatup_schedule": "repeat",
437
+ },
438
+ AdaptiveCurriculumLevel.MODERATE: {
439
+ "anchor_size": 30,
440
+ "pool_init_size": 100,
441
+ "pool_min_size": 50,
442
+ "warmup_iters": 5,
443
+ "anneal_stop_iter": 20,
444
+ "pool_update_period": 3,
445
+ "min_evals_per_example": 3,
446
+ "k_info_prompts": 10,
447
+ "info_buffer_factor": 0.9,
448
+ "info_epsilon": 1e-6,
449
+ "anchor_selection_method": "clustering",
450
+ "exploration_strategy": "diversity",
451
+ "heatup_reserve_pool": None,
452
+ "heatup_trigger": "after_min_size",
453
+ "heatup_size": 20,
454
+ "heatup_cooldown_trials": 50,
455
+ "heatup_schedule": "repeat",
456
+ },
457
+ AdaptiveCurriculumLevel.HIGH: {
458
+ "anchor_size": 20,
459
+ "pool_init_size": 60,
460
+ "pool_min_size": 30,
461
+ "warmup_iters": 3,
462
+ "anneal_stop_iter": 10,
463
+ "pool_update_period": 5,
464
+ "min_evals_per_example": 2,
465
+ "k_info_prompts": 5,
466
+ "info_buffer_factor": 0.8,
467
+ "info_epsilon": 1e-6,
468
+ "anchor_selection_method": "clustering",
469
+ "exploration_strategy": "diversity",
470
+ "heatup_reserve_pool": None,
471
+ "heatup_trigger": "after_min_size",
472
+ "heatup_size": 20,
473
+ "heatup_cooldown_trials": 50,
474
+ "heatup_schedule": "repeat",
475
+ },
476
+ }
477
+
478
+ # Default presets for adaptive batch (GEPA only)
479
+ _ADAPTIVE_BATCH_DEFAULTS: dict[AdaptiveBatchLevel, dict[str, Any]] = {
480
+ AdaptiveBatchLevel.NONE: {
481
+ "reflection_minibatch_size": 8,
482
+ "min_local_improvement": 0.0,
483
+ "val_evaluation_mode": "full",
484
+ "val_subsample_size": 64,
485
+ "candidate_selection_strategy": "random",
486
+ },
487
+ AdaptiveBatchLevel.LOW: {
488
+ "reflection_minibatch_size": 5,
489
+ "min_local_improvement": 0.0,
490
+ "val_evaluation_mode": "subsample",
491
+ "val_subsample_size": 80,
492
+ "candidate_selection_strategy": "coverage",
493
+ },
494
+ AdaptiveBatchLevel.MODERATE: {
495
+ "reflection_minibatch_size": 3,
496
+ "min_local_improvement": 0.0,
497
+ "val_evaluation_mode": "subsample",
498
+ "val_subsample_size": 64,
499
+ "candidate_selection_strategy": "coverage",
500
+ },
501
+ AdaptiveBatchLevel.HIGH: {
502
+ "reflection_minibatch_size": 2,
503
+ "min_local_improvement": 0.0,
504
+ "val_evaluation_mode": "subsample",
505
+ "val_subsample_size": 48,
506
+ "candidate_selection_strategy": "coverage",
507
+ },
508
+ }
509
+
510
+
511
+ def resolve_adaptive_pool_config(
512
+ *,
513
+ level: AdaptiveCurriculumLevel | str | None = None,
514
+ overrides: dict[str, Any] | None = None,
515
+ dev_pool_size: int | None = None,
516
+ ) -> AdaptivePoolConfig:
517
+ """Resolve adaptive pool config from level preset and overrides.
518
+
519
+ Args:
520
+ level: Preset level (NONE, LOW, MODERATE, HIGH). Defaults to LOW if None.
521
+ overrides: Dict of field overrides to apply on top of level defaults.
522
+ dev_pool_size: Optional dev pool size to cap pool_init_size if needed.
523
+
524
+ Returns:
525
+ AdaptivePoolConfig with resolved values.
526
+ """
527
+ # Normalize level
528
+ if level is None:
529
+ level = AdaptiveCurriculumLevel.LOW
530
+ elif isinstance(level, str):
531
+ try:
532
+ level = AdaptiveCurriculumLevel[level.strip().upper()]
533
+ except KeyError:
534
+ valid_levels = ", ".join(level_item.name for level_item in AdaptiveCurriculumLevel)
535
+ raise ValueError(f"Invalid adaptive pool level '{level}'. Must be one of: {valid_levels}") from None
536
+
537
+ # Get defaults for level
538
+ defaults = _ADAPTIVE_POOL_DEFAULTS[level].copy()
539
+
540
+ # Apply overrides
541
+ if overrides:
542
+ defaults.update(overrides)
543
+
544
+ # Handle pool_init_size and pool_min_size with dev_pool_size
545
+ pool_init_size = defaults.get("pool_init_size")
546
+ pool_min_size = defaults.get("pool_min_size")
547
+
548
+ if pool_init_size is None:
549
+ pool_init_size = dev_pool_size
550
+ if pool_min_size is None:
551
+ pool_min_size = dev_pool_size
552
+
553
+ # Cap pool_init_size if dev_pool_size is provided
554
+ if dev_pool_size is not None and pool_init_size is not None and pool_init_size > dev_pool_size:
555
+ pool_init_size = dev_pool_size
556
+
557
+ # Handle heatup_reserve_pool (can be list, None, or single value)
558
+ heatup_reserve = defaults.get("heatup_reserve_pool")
559
+ if heatup_reserve is not None and not isinstance(heatup_reserve, list | tuple):
560
+ # Convert single value or other types to list
561
+ heatup_reserve = [heatup_reserve] if heatup_reserve else None
562
+
563
+ # Create config with proper types
564
+ config = AdaptivePoolConfig(
565
+ level=level,
566
+ anchor_size=int(defaults["anchor_size"]),
567
+ pool_init_size=None if pool_init_size is None else int(pool_init_size),
568
+ pool_min_size=None if pool_min_size is None else int(pool_min_size),
569
+ warmup_iters=int(defaults["warmup_iters"]),
570
+ anneal_stop_iter=int(defaults["anneal_stop_iter"]),
571
+ pool_update_period=int(defaults["pool_update_period"]),
572
+ min_evals_per_example=int(defaults["min_evals_per_example"]),
573
+ k_info_prompts=int(defaults["k_info_prompts"]),
574
+ info_buffer_factor=float(defaults["info_buffer_factor"]),
575
+ info_epsilon=float(defaults["info_epsilon"]),
576
+ anchor_selection_method=defaults["anchor_selection_method"] if defaults["anchor_selection_method"] in ("random", "clustering") else "clustering",
577
+ exploration_strategy=defaults["exploration_strategy"] if defaults["exploration_strategy"] in ("random", "diversity") else "diversity",
578
+ heatup_reserve_pool=list(heatup_reserve) if heatup_reserve else None,
579
+ heatup_trigger=defaults.get("heatup_trigger", "after_min_size") if defaults.get("heatup_trigger", "after_min_size") in ("after_min_size", "immediate", "every_N_trials_after_min") else "after_min_size",
580
+ heatup_size=int(defaults.get("heatup_size", 20)),
581
+ heatup_cooldown_trials=int(defaults.get("heatup_cooldown_trials", 50)),
582
+ heatup_schedule=defaults.get("heatup_schedule", "repeat") if defaults.get("heatup_schedule", "repeat") in ("repeat", "once") else "repeat",
583
+ )
584
+
585
+ return config
586
+
587
+
588
+ def resolve_adaptive_batch_config(
589
+ *,
590
+ level: AdaptiveBatchLevel | str | None = None,
591
+ overrides: dict[str, Any] | None = None,
592
+ ) -> GEPAAdaptiveBatchConfig:
593
+ """Resolve adaptive batch config from level preset and overrides.
594
+
595
+ Args:
596
+ level: Preset level (NONE, LOW, MODERATE, HIGH). Defaults to MODERATE if None.
597
+ overrides: Dict of field overrides to apply on top of level defaults.
598
+
599
+ Returns:
600
+ GEPAAdaptiveBatchConfig with resolved values.
601
+ """
602
+ # Normalize level
603
+ if level is None:
604
+ level = AdaptiveBatchLevel.MODERATE
605
+ elif isinstance(level, str):
606
+ try:
607
+ level = AdaptiveBatchLevel[level.strip().upper()]
608
+ except KeyError:
609
+ valid_levels = ", ".join(level_item.name for level_item in AdaptiveBatchLevel)
610
+ raise ValueError(f"Invalid adaptive batch level '{level}'. Must be one of: {valid_levels}") from None
611
+
612
+ # Get defaults for level
613
+ defaults = _ADAPTIVE_BATCH_DEFAULTS[level].copy()
614
+
615
+ # Apply overrides
616
+ if overrides:
617
+ defaults.update(overrides)
618
+
619
+ # Create config with proper types
620
+ return GEPAAdaptiveBatchConfig(
621
+ level=level,
622
+ reflection_minibatch_size=int(defaults["reflection_minibatch_size"]),
623
+ min_local_improvement=float(defaults["min_local_improvement"]),
624
+ val_evaluation_mode=defaults["val_evaluation_mode"] if defaults["val_evaluation_mode"] in ("full", "subsample") else "full",
625
+ val_subsample_size=int(defaults["val_subsample_size"]),
626
+ candidate_selection_strategy=defaults["candidate_selection_strategy"] if defaults["candidate_selection_strategy"] in ("coverage", "random") else "coverage",
627
+ )
628
+
629
+
630
+ class MIPROConfig(ExtraModel):
631
+ """MIPRO-specific configuration.
632
+
633
+ MIPROv2 uses meta-learning with bootstrap phase, TPE optimization, and mini-batch evaluation
634
+ to efficiently optimize prompts with fewer evaluations than genetic algorithms.
635
+
636
+ Attributes:
637
+ proposer_effort: Effort level for proposer model selection. Controls which model
638
+ is used for generating prompt proposals. Default: "LOW".
639
+ Options:
640
+ - "LOW_CONTEXT": Uses gpt-oss-120b (Groq) with minimal context. Fastest/cheapest.
641
+ Required when proposer_output_tokens="RAPID".
642
+ - "LOW": Uses smaller/faster models (e.g., gpt-4o-mini). Good balance.
643
+ - "MEDIUM": Uses medium models (e.g., gpt-4o). Higher quality proposals.
644
+ - "HIGH": Uses best models (e.g., gpt-5). Highest quality but expensive.
645
+ proposer_output_tokens: Maximum output tokens allowed for proposer model.
646
+ Default: "FAST". Controls proposal length and cost.
647
+ Options:
648
+ - "RAPID": 3000 tokens max. Fastest/cheapest. Requires proposer_effort="LOW_CONTEXT"
649
+ and gpt-oss-120b model. Use for short, focused proposals.
650
+ - "FAST": 10000 tokens max. Good balance. Works with any effort level.
651
+ - "SLOW": 25000 tokens max. Allows longer proposals. Use for complex prompts.
652
+ min_bootstrap_demos: Minimum number of qualified bootstrap demonstrations required.
653
+ Default: None (no minimum). If set, bootstrap phase will fail early if fewer than
654
+ this many demos pass the few_shot_score_threshold. Use with strict_bootstrap=True
655
+ for fail-fast behavior.
656
+ strict_bootstrap: If True, fail immediately when bootstrap doesn't produce enough
657
+ qualified demos (< min_bootstrap_demos). Default: False. When False, optimization
658
+ continues but may produce suboptimal results with insufficient demos.
659
+ """
660
+ task_app_url: str | None = None
661
+ task_app_api_key: str | None = None
662
+ task_app_id: str | None = None
663
+ num_iterations: int = 20
664
+ num_evaluations_per_iteration: int = 5
665
+ batch_size: int = 32
666
+ max_concurrent: int = 20
667
+ env_name: str = "banking77"
668
+ env_config: dict[str, Any] | None = None
669
+ few_shot_score_threshold: float = 0.8
670
+ results_file: str | None = None
671
+ max_wall_clock_seconds: float | None = None
672
+ max_total_tokens: int | None = None
673
+ policy_config: dict[str, Any] | None = None
674
+ meta: MIPROMetaConfig | dict[str, Any] | None = None
675
+ modules: list[MIPROModuleConfig] | list[dict[str, Any]] | None = None
676
+ seeds: MIPROSeedConfig | dict[str, Any] | None = None
677
+
678
+ # Proposer configuration
679
+ proposer_effort: Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"] = "LOW"
680
+ proposer_output_tokens: Literal["RAPID", "FAST", "SLOW"] = "FAST"
681
+
682
+ # Token and budget configuration (mirrors GEPA pattern)
683
+ max_token_limit: int | None = None # Total tokens across all rollouts (policy + proposer)
684
+ max_spend_usd: float | None = None # Maximum spend in USD
685
+ token_counting_model: str = "gpt-4" # Model for token estimation (tiktoken)
686
+ enforce_token_limit: bool = True # Halt optimization if limit exceeded
687
+
688
+ # TPE configuration
689
+ tpe: dict[str, Any] | None = None
690
+
691
+ # Demo configuration
692
+ demo: dict[str, Any] | None = None
693
+
694
+ # Grounding configuration
695
+ grounding: dict[str, Any] | None = None
696
+
697
+ # Meta-update configuration
698
+ meta_update: dict[str, Any] | None = None
699
+
700
+ # Judge configuration (shared with GEPA)
701
+ judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
702
+
703
+ # Proxy models configuration (optional, can also be at top-level)
704
+ proxy_models: ProxyModelsConfig | dict[str, Any] | None = None
705
+
706
+ # Adaptive pool configuration (optional)
707
+ adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None
708
+
709
+ # System spec configuration
710
+ spec_path: str | None = None # Path to system spec JSON file
711
+ spec_max_tokens: int = 5000 # Max tokens for spec context in meta-prompt
712
+ spec_include_examples: bool = True # Include examples from spec
713
+ spec_priority_threshold: int | None = None # Only include rules with priority >= threshold
714
+ # Custom metaprompt (optional)
715
+ metaprompt: str | None = None # Custom metaprompt text to include in instruction generation prompts
716
+
717
+ # Bootstrap seeds (for few-shot examples)
718
+ bootstrap_train_seeds: list[int] | None = None
719
+
720
+ # Online pool (for mini-batch evaluation)
721
+ online_pool: list[int] | None = None
722
+
723
+ # Test pool (held-out seeds)
724
+ test_pool: list[int] | None = None
725
+
726
+ # Reference pool (for dataset context in meta-prompt, must not overlap with train/test)
727
+ reference_pool: list[int] | None = None
728
+
729
+ # Strict bootstrap mode: minimum qualified demos required
730
+ # If fewer demos qualify (score >= few_shot_score_threshold), job fails early with clear error
731
+ # Default: 0 (no minimum - current behavior for backwards compatibility)
732
+ min_bootstrap_demos: int = 0
733
+
734
+ @model_validator(mode="before")
735
+ @classmethod
736
+ def _forbid_meta_model_config(cls, data: dict[str, Any]) -> dict[str, Any]:
737
+ """Forbid deprecated meta_model configuration fields.
738
+
739
+ Meta-model selection is now controlled by proposer_effort and proposer_output_tokens.
740
+ The backend automatically selects the model based on these settings.
741
+ """
742
+ if not isinstance(data, dict):
743
+ return data
744
+
745
+ deprecated_meta_fields = {
746
+ "meta_model": "Meta-model selection is now controlled by 'proposer_effort' (LOW_CONTEXT, LOW, MEDIUM, HIGH). Remove 'meta_model' from your config.",
747
+ "meta_model_provider": "Meta-model provider is now controlled by 'proposer_effort'. Remove 'meta_model_provider' from your config.",
748
+ "meta_model_inference_url": "Meta-model inference URL is now controlled by 'proposer_effort'. Remove 'meta_model_inference_url' from your config.",
749
+ "meta_model_temperature": "Meta-model temperature is now controlled by 'proposer_effort'. Remove 'meta_model_temperature' from your config.",
750
+ "meta_model_max_tokens": "Meta-model max_tokens is now controlled by 'proposer_effort' and 'proposer_output_tokens'. Remove 'meta_model_max_tokens' from your config.",
751
+ }
752
+
753
+ for field, message in deprecated_meta_fields.items():
754
+ if field in data and data[field] is not None:
755
+ raise ValueError(f"Deprecated field '{field}': {message}")
756
+
757
+ # Also check in nested meta section
758
+ if "meta" in data and isinstance(data["meta"], dict):
759
+ meta_data = data["meta"]
760
+ if meta_data.get("model") is not None:
761
+ raise ValueError("Deprecated field 'meta.model': Meta-model selection is now controlled by 'proposer_effort'. Remove [prompt_learning.mipro.meta] section.")
762
+ if meta_data.get("provider") is not None:
763
+ raise ValueError("Deprecated field 'meta.provider': Meta-model provider is now controlled by 'proposer_effort'. Remove [prompt_learning.mipro.meta] section.")
764
+
765
+ return data
766
+
767
+ @field_validator("bootstrap_train_seeds", "online_pool", "test_pool", "reference_pool", mode="before")
768
+ @classmethod
769
+ def _parse_mipro_seed_lists(cls, v: Any) -> list[int] | None:
770
+ """Parse MIPRO seed lists that can be either a list or range dict."""
771
+ return _parse_seeds(v)
772
+
773
+ @classmethod
774
+ def simple(
775
+ cls,
776
+ *,
777
+ task_app_url: str,
778
+ task_app_api_key: str,
779
+ env_name: str,
780
+ rollout_budget: int,
781
+ initial_prompt_messages: Sequence[Mapping[str, Any]] | Sequence[Any],
782
+ task_app_id: str | None = None,
783
+ bootstrap_seeds: list[int] | None = None,
784
+ online_seeds: list[int] | None = None,
785
+ test_seeds: list[int] | None = None,
786
+ reference_pool: list[int] | None = None,
787
+ env_config: dict[str, Any] | None = None,
788
+ num_iterations: int | None = None,
789
+ num_evaluations_per_iteration: int | None = None,
790
+ batch_size: int | None = None,
791
+ max_concurrent: int | None = None,
792
+ meta_preset: Literal["fast", "balanced", "high_quality"] = "balanced",
793
+ policy_model: str = "openai/gpt-oss-20b",
794
+ policy_provider: str = "groq",
795
+ policy_temperature: float = 1.0,
796
+ policy_max_completion_tokens: int = 512,
797
+ policy_name: str | None = None,
798
+ meta_model: str | None = None,
799
+ meta_provider: str | None = None,
800
+ meta_inference_url: str | None = None,
801
+ ) -> MIPROConfig:
802
+ """Convenience constructor for single-stage MIPRO tasks.
803
+
804
+ Automatically infers reasonable defaults for seeds, iterations, and module layout
805
+ based on the rollout budget. This keeps simple benchmarks (e.g., Iris) readable
806
+ while leaving the full constructor available for complex multi-stage pipelines.
807
+ """
808
+ if rollout_budget <= 0:
809
+ raise ValueError("rollout_budget must be positive for MIPROConfig.simple()")
810
+ normalized_messages = _normalize_messages(initial_prompt_messages)
811
+ if not normalized_messages:
812
+ raise ValueError("initial_prompt_messages must contain at least one message")
813
+
814
+ bootstrap = bootstrap_seeds or _auto_calculate_bootstrap_seeds(rollout_budget)
815
+ online = online_seeds or _auto_calculate_online_seeds(rollout_budget)
816
+ tests = test_seeds or []
817
+ reference = reference_pool or _auto_calculate_reference_pool(rollout_budget)
818
+
819
+ iterations = num_iterations or _auto_calculate_iterations(rollout_budget)
820
+ evals_per_iteration = (
821
+ num_evaluations_per_iteration
822
+ or _auto_calculate_evaluations_per_iteration(rollout_budget)
823
+ )
824
+ derived_batch_size = batch_size or max(1, min(len(online), 32))
825
+ derived_max_concurrent = max_concurrent or 10
826
+
827
+ baseline_instruction = _extract_baseline_instruction(normalized_messages)
828
+ meta_config = _create_meta_config_from_preset(meta_preset)
829
+ if meta_model:
830
+ meta_config.model = meta_model
831
+ if meta_provider:
832
+ meta_config.provider = meta_provider
833
+ if meta_inference_url is not None:
834
+ meta_config.inference_url = meta_inference_url
835
+
836
+ stage = MIPROStageConfig(
837
+ stage_id="default_stage_0",
838
+ baseline_instruction=baseline_instruction,
839
+ baseline_messages=normalized_messages,
840
+ )
841
+ module = MIPROModuleConfig(
842
+ module_id="default",
843
+ stages=[stage],
844
+ )
845
+ seeds = MIPROSeedConfig(
846
+ bootstrap=bootstrap,
847
+ online=online,
848
+ test=tests,
849
+ reference=reference,
850
+ )
851
+ policy_config = {
852
+ "model": policy_model,
853
+ "provider": policy_provider,
854
+ "temperature": policy_temperature,
855
+ "max_completion_tokens": policy_max_completion_tokens,
856
+ }
857
+ if policy_name:
858
+ policy_config["policy_name"] = policy_name
859
+
860
+ return cls(
861
+ task_app_url=task_app_url,
862
+ task_app_api_key=task_app_api_key,
863
+ task_app_id=task_app_id or env_name,
864
+ env_name=env_name,
865
+ env_config=env_config,
866
+ seeds=seeds,
867
+ num_iterations=iterations,
868
+ num_evaluations_per_iteration=evals_per_iteration,
869
+ batch_size=derived_batch_size,
870
+ max_concurrent=derived_max_concurrent,
871
+ policy_config=policy_config,
872
+ meta=meta_config,
873
+ modules=[module],
874
+ )
875
+
876
+
877
+ def _auto_calculate_bootstrap_seeds(rollout_budget: int) -> list[int]:
878
+ """Auto-calculate bootstrap seeds from rollout budget."""
879
+ count = max(3, min(10, max(rollout_budget // 10, 1)))
880
+ return list(range(count))
881
+
882
+
883
+ def _auto_calculate_online_seeds(rollout_budget: int) -> list[int]:
884
+ """Auto-calculate online pool seeds from rollout budget."""
885
+ count = max(5, min(50, max(rollout_budget // 3, 1)))
886
+ return list(range(10, 10 + count))
887
+
888
+
889
+ def _auto_calculate_reference_pool(rollout_budget: int) -> list[int]:
890
+ """Auto-calculate reference pool seeds from rollout budget."""
891
+ count = max(5, min(30, max(rollout_budget // 5, 1)))
892
+ return list(range(20, 20 + count))
893
+
894
+
895
+ def _auto_calculate_iterations(rollout_budget: int) -> int:
896
+ """Auto-calculate number of optimization iterations."""
897
+ online_pool_size = max(5, min(50, max(rollout_budget // 3, 1)))
898
+ evals_per_iteration = max(3, min(10, max(rollout_budget // max(online_pool_size * 2, 1), 1)))
899
+ iterations = max(5, min(20, max(rollout_budget // max(online_pool_size * evals_per_iteration, 1), 1)))
900
+ return iterations
901
+
902
+
903
+ def _auto_calculate_evaluations_per_iteration(rollout_budget: int) -> int:
904
+ """Auto-calculate number of evaluations per iteration."""
905
+ online_pool_size = max(5, min(50, max(rollout_budget // 3, 1)))
906
+ iterations = max(5, min(20, max(rollout_budget // max(online_pool_size * 5, 1), 1)))
907
+ evals_per_iteration = max(3, min(10, max(rollout_budget // max(online_pool_size * iterations, 1), 1)))
908
+ return evals_per_iteration
909
+
910
+
911
+ def _coerce_message_mapping(message: Mapping[str, Any] | Any) -> dict[str, Any]:
912
+ """Convert message objects or dicts into a mutable dict."""
913
+ if isinstance(message, Mapping):
914
+ return dict(message)
915
+ if hasattr(message, "model_dump"):
916
+ try:
917
+ data = message.model_dump()
918
+ if isinstance(data, dict):
919
+ return data
920
+ except Exception: # pragma: no cover - defensive
921
+ pass
922
+ if hasattr(message, "__dict__"):
923
+ try:
924
+ return {
925
+ key: value
926
+ for key, value in vars(message).items()
927
+ if not key.startswith("_")
928
+ }
929
+ except Exception: # pragma: no cover - defensive
930
+ return {}
931
+ return {}
932
+
933
+
934
+ def _extract_baseline_instruction(messages: Sequence[Mapping[str, str]] | Sequence[Any]) -> str:
935
+ """Extract the baseline instruction string from message templates."""
936
+ for raw in messages:
937
+ msg = _coerce_message_mapping(raw)
938
+ if msg.get("role", "user") == "system":
939
+ text = (msg.get("content") or msg.get("pattern") or "").strip()
940
+ if text:
941
+ return text
942
+ for raw in messages:
943
+ msg = _coerce_message_mapping(raw)
944
+ if msg.get("role", "user") == "user":
945
+ text = (msg.get("content") or msg.get("pattern") or "").strip()
946
+ if text:
947
+ return text
948
+ return "Complete the task."
949
+
950
+
951
+ def _normalize_messages(messages: Sequence[Mapping[str, str]] | Sequence[Any]) -> list[dict[str, str]]:
952
+ """Normalize message dictionaries so downstream tools can rely on `content`."""
953
+ normalized: list[dict[str, str]] = []
954
+ for raw in messages:
955
+ msg = _coerce_message_mapping(raw)
956
+ role = msg.get("role", "user") or "user"
957
+ content = msg.get("content") or msg.get("pattern") or ""
958
+ normalized.append({"role": str(role), "content": str(content)})
959
+ return normalized
960
+
961
+
962
+ def _create_meta_config_from_preset(preset: str) -> MIPROMetaConfig:
963
+ """Create a meta config preset (fast/balanced/high_quality)."""
964
+ preset_key = preset.lower().strip()
965
+ presets: dict[str, MIPROMetaConfig] = {
966
+ "fast": MIPROMetaConfig(
967
+ model="gpt-4o-mini",
968
+ provider="openai",
969
+ temperature=0.7,
970
+ max_tokens=512,
971
+ inference_url=None,
972
+ ),
973
+ "balanced": MIPROMetaConfig(
974
+ model="gpt-4o-mini",
975
+ provider="openai",
976
+ temperature=0.8,
977
+ max_tokens=1024,
978
+ inference_url=None,
979
+ ),
980
+ "high_quality": MIPROMetaConfig(
981
+ model="gpt-4o",
982
+ provider="openai",
983
+ temperature=0.9,
984
+ max_tokens=2048,
985
+ inference_url=None,
986
+ ),
987
+ }
988
+ return presets.get(preset_key, presets["balanced"])
989
+
990
+
991
+ # GEPA nested configs (mirroring RL structure)
992
+ class GEPARolloutConfig(ExtraModel):
993
+ """GEPA rollout configuration (mirrors RL [rollout] section)."""
994
+ budget: int | None = None # Total rollout budget
995
+ max_concurrent: int = 20 # Maximum concurrent rollouts
996
+ minibatch_size: int = 8 # Minibatch size for evaluation
997
+
998
+
999
+ class GEPAEvaluationConfig(ExtraModel):
1000
+ """GEPA evaluation configuration (mirrors RL [evaluation] section)."""
1001
+ seeds: list[int] | None = None # Evaluation seeds (training set)
1002
+ validation_seeds: list[int] | None = None # Validation seeds (held-out)
1003
+ test_pool: list[int] | None = None # Test pool (final evaluation)
1004
+ validation_pool: str | None = None # Pool name for validation (e.g., "validation")
1005
+ validation_top_k: int | None = None # Top-K prompts to validate
1006
+
1007
+ @field_validator("seeds", "validation_seeds", "test_pool", mode="before")
1008
+ @classmethod
1009
+ def _parse_seed_lists(cls, v: Any) -> list[int] | None:
1010
+ """Parse seed lists that can be either a list or range dict."""
1011
+ return _parse_seeds(v)
1012
+
1013
+
1014
+ class GEPAMutationConfig(ExtraModel):
1015
+ """GEPA mutation configuration.
1016
+
1017
+ NOTE: Mutation model selection is controlled by proposer_effort, NOT llm_model.
1018
+ The llm_model/llm_provider fields are deprecated and should not be used.
1019
+ """
1020
+ rate: float = 0.3 # Mutation rate
1021
+ llm_model: str | None = None # DEPRECATED: Use proposer_effort instead
1022
+ llm_provider: str | None = None # DEPRECATED: Use proposer_effort instead
1023
+ llm_inference_url: str | None = None # DEPRECATED: Not used
1024
+ prompt: str | None = None # Custom mutation prompt
1025
+
1026
+ @model_validator(mode="before")
1027
+ @classmethod
1028
+ def _forbid_mutation_llm_config(cls, data: dict[str, Any]) -> dict[str, Any]:
1029
+ """Forbid deprecated mutation LLM configuration fields.
1030
+
1031
+ Mutation model selection is now controlled by proposer_effort at the gepa level.
1032
+ """
1033
+ if not isinstance(data, dict):
1034
+ return data
1035
+
1036
+ deprecated_mutation_fields = {
1037
+ "llm_model": "Mutation model selection is now controlled by 'proposer_effort' (LOW_CONTEXT, LOW, MEDIUM, HIGH) at [prompt_learning.gepa] level. Remove 'llm_model' from [prompt_learning.gepa.mutation].",
1038
+ "llm_provider": "Mutation provider is now controlled by 'proposer_effort'. Remove 'llm_provider' from [prompt_learning.gepa.mutation].",
1039
+ "llm_inference_url": "Mutation inference URL is not used. Remove 'llm_inference_url' from [prompt_learning.gepa.mutation].",
1040
+ }
1041
+
1042
+ for field, message in deprecated_mutation_fields.items():
1043
+ if field in data and data[field] is not None:
1044
+ raise ValueError(f"Deprecated field '{field}': {message}")
1045
+
1046
+ return data
1047
+
1048
+
1049
+ class GEPAPopulationConfig(ExtraModel):
1050
+ """GEPA population configuration (evolution parameters)."""
1051
+ initial_size: int = 20 # Initial population size
1052
+ num_generations: int = 10 # Number of generations
1053
+ children_per_generation: int = 5 # Children generated per generation
1054
+ crossover_rate: float = 0.5 # Crossover rate
1055
+ selection_pressure: float = 1.0 # Pareto selection pressure
1056
+ patience_generations: int = 3 # Early stopping patience
1057
+
1058
+
1059
+ class GEPAArchiveConfig(ExtraModel):
1060
+ """GEPA archive configuration (Pareto archive settings)."""
1061
+ size: int = 64 # Archive size
1062
+ pareto_set_size: int = 64 # Pareto set size
1063
+ pareto_eps: float = 1e-6 # Pareto epsilon
1064
+ feedback_fraction: float = 0.5 # Fraction of archive for feedback
1065
+
1066
+
1067
+ class GEPATokenConfig(ExtraModel):
1068
+ """GEPA token and budget configuration."""
1069
+ max_limit: int | None = None # Maximum tokens allowed in prompt
1070
+ counting_model: str = "gpt-4" # Model for token counting
1071
+ enforce_pattern_limit: bool = True # Enforce token limit on patterns
1072
+ max_spend_usd: float | None = None # Maximum spend in USD
1073
+
1074
+
1075
+ class GEPAModuleConfig(ExtraModel):
1076
+ """Configuration for a single GEPA pipeline module/stage (instruction-only).
1077
+
1078
+ Each module MUST have its own policy configuration. The policy field is required
1079
+ and must include 'model' and 'provider' fields.
1080
+ """
1081
+ module_id: str
1082
+ max_instruction_slots: int = 3
1083
+ allowed_tools: list[str] | None = None
1084
+ max_tokens: int | None = None
1085
+ policy: PromptLearningPolicyConfig | dict[str, Any] = Field(
1086
+ ...,
1087
+ description="Required per-module policy configuration. Must include 'model' and 'provider' fields."
1088
+ )
1089
+
1090
+ @field_validator("module_id")
1091
+ @classmethod
1092
+ def _validate_module_id(cls, v: str) -> str:
1093
+ v = v.strip()
1094
+ if not v:
1095
+ raise ValueError("module_id cannot be empty")
1096
+ return v
1097
+
1098
+ @field_validator("max_instruction_slots")
1099
+ @classmethod
1100
+ def _validate_slots(cls, v: int) -> int:
1101
+ if v < 1:
1102
+ raise ValueError("max_instruction_slots must be >= 1")
1103
+ return v
1104
+
1105
+ @field_validator("policy", mode="before")
1106
+ @classmethod
1107
+ def _validate_policy(cls, v: Any) -> dict[str, Any]:
1108
+ """Validate that policy is a dict with required fields."""
1109
+ if v is None:
1110
+ raise ValueError("policy is required for each module/stage")
1111
+ if isinstance(v, dict):
1112
+ if not v.get("model"):
1113
+ raise ValueError("policy must include 'model' field")
1114
+ if not v.get("provider"):
1115
+ raise ValueError("policy must include 'provider' field")
1116
+ return v
1117
+ # If it's already a PromptLearningPolicyConfig, it will be validated by Pydantic
1118
+ return v
1119
+
1120
+
1121
+ class GEPAConfig(ExtraModel):
1122
+ """GEPA-specific configuration with nested subsections.
1123
+
1124
+ GEPA (Genetic Evolution of Prompt Architectures) uses evolutionary algorithms
1125
+ with LLM-guided mutations to optimize prompts through population-based search.
1126
+
1127
+ Attributes:
1128
+ proposer_type: Type of proposer to use for generating mutations.
1129
+ Default: "dspy". Options: "dspy" (DSPy-style proposer) or "spec" (spec-based).
1130
+ proposer_effort: Effort level for proposer model selection. Controls which model
1131
+ is used for generating prompt mutations. Default: "LOW".
1132
+ Options:
1133
+ - "LOW_CONTEXT": Uses gpt-oss-120b (Groq) with minimal context. Fastest/cheapest.
1134
+ Required when proposer_output_tokens="RAPID".
1135
+ - "LOW": Uses smaller/faster models (e.g., gpt-4o-mini). Good balance.
1136
+ - "MEDIUM": Uses medium models (e.g., gpt-4o). Higher quality mutations.
1137
+ - "HIGH": Uses best models (e.g., gpt-5). Highest quality but expensive.
1138
+ proposer_output_tokens: Maximum output tokens allowed for proposer model.
1139
+ Default: "FAST". Controls mutation length and cost.
1140
+ Options:
1141
+ - "RAPID": 3000 tokens max. Fastest/cheapest. Requires proposer_effort="LOW_CONTEXT"
1142
+ and gpt-oss-120b model. Use for short, focused mutations.
1143
+ - "FAST": 10000 tokens max. Good balance. Works with any effort level.
1144
+ - "SLOW": 25000 tokens max. Allows longer mutations. Use for complex prompts.
1145
+ metaprompt: Optional custom metaprompt text to include in mutation prompts.
1146
+ Default: None. If provided, replaces default metaprompt template.
1147
+ """
1148
+ # Top-level fields (for backwards compatibility)
1149
+ env_name: str = "banking77"
1150
+ env_config: dict[str, Any] | None = None
1151
+ rng_seed: int | None = None
1152
+ proposer_type: str = "dspy"
1153
+ proposer_effort: Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"] = "LOW"
1154
+ proposer_output_tokens: Literal["RAPID", "FAST", "SLOW"] = "FAST"
1155
+ # Custom metaprompt (optional)
1156
+ metaprompt: str | None = None
1157
+
1158
+ # Multi-stage pipeline support
1159
+ modules: list[GEPAModuleConfig] | None = None
1160
+
1161
+ # Nested subsections (preferred, mirrors RL structure)
1162
+ rollout: GEPARolloutConfig | None = None
1163
+ evaluation: GEPAEvaluationConfig | None = None
1164
+ mutation: GEPAMutationConfig | None = None
1165
+ population: GEPAPopulationConfig | None = None
1166
+ archive: GEPAArchiveConfig | None = None
1167
+ token: GEPATokenConfig | None = None
1168
+ judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
1169
+ proxy_models: ProxyModelsConfig | dict[str, Any] | None = None # Proxy models config (can be at top-level or gepa-specific)
1170
+ adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None # Adaptive pooling config
1171
+ adaptive_batch: GEPAAdaptiveBatchConfig | dict[str, Any] | None = None # Adaptive batch config (GEPA only)
1172
+
1173
+ # Backwards compatibility: flat fields (DEPRECATED - DO NOT USE)
1174
+ # These are kept for backwards compatibility with _get_* methods but should not be used directly
1175
+ rollout_budget: int | None = None
1176
+ max_concurrent_rollouts: int | None = None
1177
+ minibatch_size: int | None = None
1178
+ evaluation_seeds: list[int] | None = None
1179
+ validation_seeds: list[int] | None = None
1180
+ test_pool: list[int] | None = None
1181
+ validation_pool: str | None = None
1182
+ validation_top_k: int | None = None
1183
+ mutation_rate: float | None = None
1184
+ mutation_llm_model: str | None = None
1185
+ mutation_llm_provider: str | None = None
1186
+ mutation_llm_inference_url: str | None = None
1187
+ mutation_prompt: str | None = None
1188
+ initial_population_size: int | None = None
1189
+ num_generations: int | None = None
1190
+ children_per_generation: int | None = None
1191
+ crossover_rate: float | None = None
1192
+ selection_pressure: float | None = None
1193
+ patience_generations: int | None = None
1194
+ archive_size: int | None = None
1195
+ pareto_set_size: int | None = None
1196
+ pareto_eps: float | None = None
1197
+ feedback_fraction: float | None = None
1198
+ max_token_limit: int | None = None
1199
+ token_counting_model: str | None = None
1200
+ enforce_pattern_token_limit: bool | None = None
1201
+ max_spend_usd: float | None = None
1202
+
1203
+ @model_validator(mode="before")
1204
+ @classmethod
1205
+ def _check_flat_format_deprecated(cls, data: dict[str, Any]) -> dict[str, Any]:
1206
+ """Forbid deprecated flat GEPA format fields.
1207
+
1208
+ Users must use nested format:
1209
+ - gepa.rollout.budget instead of gepa.rollout_budget
1210
+ - gepa.evaluation.seeds instead of gepa.evaluation_seeds
1211
+ - etc.
1212
+ """
1213
+ if not isinstance(data, dict):
1214
+ return data
1215
+
1216
+ flat_fields_map = {
1217
+ "rollout_budget": "Use [prompt_learning.gepa.rollout] section with 'budget' field instead.",
1218
+ "max_concurrent_rollouts": "Use [prompt_learning.gepa.rollout] section with 'max_concurrent' field instead.",
1219
+ "minibatch_size": "Use [prompt_learning.gepa.rollout] section with 'minibatch_size' field instead.",
1220
+ "evaluation_seeds": "Use [prompt_learning.gepa.evaluation] section with 'seeds' field instead.",
1221
+ "validation_seeds": "Use [prompt_learning.gepa.evaluation] section with 'validation_seeds' field instead.",
1222
+ "test_pool": "Use [prompt_learning.gepa.evaluation] section with 'test_pool' field instead.",
1223
+ "validation_pool": "Use [prompt_learning.gepa.evaluation] section with 'validation_pool' field instead.",
1224
+ "validation_top_k": "Use [prompt_learning.gepa.evaluation] section with 'validation_top_k' field instead.",
1225
+ "mutation_rate": "Use [prompt_learning.gepa.mutation] section with 'rate' field instead.",
1226
+ "mutation_llm_model": "Use [prompt_learning.gepa.mutation] section with 'llm_model' field instead.",
1227
+ "mutation_llm_provider": "Use [prompt_learning.gepa.mutation] section with 'llm_provider' field instead.",
1228
+ "mutation_llm_inference_url": "Use [prompt_learning.gepa.mutation] section with 'llm_inference_url' field instead.",
1229
+ "mutation_prompt": "Use [prompt_learning.gepa.mutation] section with 'prompt' field instead.",
1230
+ "initial_population_size": "Use [prompt_learning.gepa.population] section with 'initial_size' field instead.",
1231
+ "num_generations": "Use [prompt_learning.gepa.population] section with 'num_generations' field instead.",
1232
+ "children_per_generation": "Use [prompt_learning.gepa.population] section with 'children_per_generation' field instead.",
1233
+ "crossover_rate": "Use [prompt_learning.gepa.population] section with 'crossover_rate' field instead.",
1234
+ "selection_pressure": "Use [prompt_learning.gepa.population] section with 'selection_pressure' field instead.",
1235
+ "patience_generations": "Use [prompt_learning.gepa.population] section with 'patience_generations' field instead.",
1236
+ "archive_size": "Use [prompt_learning.gepa.archive] section with 'size' field instead.",
1237
+ "pareto_set_size": "Use [prompt_learning.gepa.archive] section with 'pareto_set_size' field instead.",
1238
+ "pareto_eps": "Use [prompt_learning.gepa.archive] section with 'pareto_eps' field instead.",
1239
+ "feedback_fraction": "Use [prompt_learning.gepa.archive] section with 'feedback_fraction' field instead.",
1240
+ "max_token_limit": "Use [prompt_learning.gepa.token] section with 'max_limit' field instead.",
1241
+ "token_counting_model": "Use [prompt_learning.gepa.token] section with 'counting_model' field instead.",
1242
+ "enforce_pattern_token_limit": "Use [prompt_learning.gepa.token] section with 'enforce_pattern_limit' field instead.",
1243
+ "max_spend_usd": "Use [prompt_learning.gepa.token] section with 'max_spend_usd' field instead.",
1244
+ }
1245
+
1246
+ for field, message in flat_fields_map.items():
1247
+ if field in data and data[field] is not None:
1248
+ raise ValueError(f"Deprecated flat GEPA format field '{field}': {message}")
1249
+
1250
+ return data
1251
+
1252
+ def _get_rollout_budget(self) -> int | None:
1253
+ """Get rollout budget from nested or flat structure."""
1254
+ if self.rollout and self.rollout.budget is not None:
1255
+ return self.rollout.budget
1256
+ return self.rollout_budget
1257
+
1258
+ def _get_max_concurrent_rollouts(self) -> int:
1259
+ """Get max concurrent rollouts from nested or flat structure."""
1260
+ if self.rollout and self.rollout.max_concurrent is not None:
1261
+ return self.rollout.max_concurrent
1262
+ return self.max_concurrent_rollouts or 20
1263
+
1264
+ def _get_minibatch_size(self) -> int:
1265
+ """Get minibatch size from nested or flat structure."""
1266
+ if self.rollout and self.rollout.minibatch_size is not None:
1267
+ return self.rollout.minibatch_size
1268
+ return self.minibatch_size or 8
1269
+
1270
+ def _get_evaluation_seeds(self) -> list[int] | None:
1271
+ """Get evaluation seeds from nested or flat structure."""
1272
+ if self.evaluation and self.evaluation.seeds is not None:
1273
+ return self.evaluation.seeds
1274
+ return self.evaluation_seeds
1275
+
1276
+ def _get_validation_seeds(self) -> list[int] | None:
1277
+ """Get validation seeds from nested or flat structure."""
1278
+ if self.evaluation and self.evaluation.validation_seeds is not None:
1279
+ return self.evaluation.validation_seeds
1280
+ return self.validation_seeds
1281
+
1282
+ def _get_test_pool(self) -> list[int] | None:
1283
+ """Get test pool from nested or flat structure."""
1284
+ if self.evaluation and self.evaluation.test_pool is not None:
1285
+ return self.evaluation.test_pool
1286
+ return self.test_pool
1287
+
1288
+ def _get_mutation_rate(self) -> float:
1289
+ """Get mutation rate from nested or flat structure."""
1290
+ if self.mutation and self.mutation.rate is not None:
1291
+ return self.mutation.rate
1292
+ return self.mutation_rate or 0.3
1293
+
1294
+ def _get_mutation_llm_model(self) -> str | None:
1295
+ """Get mutation LLM model from nested or flat structure."""
1296
+ if self.mutation and self.mutation.llm_model is not None:
1297
+ return self.mutation.llm_model
1298
+ return self.mutation_llm_model
1299
+
1300
+ def _get_mutation_llm_provider(self) -> str:
1301
+ """Get mutation LLM provider from nested or flat structure."""
1302
+ if self.mutation and self.mutation.llm_provider is not None:
1303
+ return self.mutation.llm_provider
1304
+ return self.mutation_llm_provider or "groq"
1305
+
1306
+ def _get_mutation_llm_inference_url(self) -> str | None:
1307
+ """Get mutation LLM inference URL from nested or flat structure."""
1308
+ if self.mutation and self.mutation.llm_inference_url is not None:
1309
+ return self.mutation.llm_inference_url
1310
+ return self.mutation_llm_inference_url
1311
+
1312
+ def _get_mutation_prompt(self) -> str | None:
1313
+ """Get mutation prompt from nested or flat structure."""
1314
+ if self.mutation and self.mutation.prompt is not None:
1315
+ return self.mutation.prompt
1316
+ return self.mutation_prompt
1317
+
1318
+ def _get_initial_population_size(self) -> int:
1319
+ """Get initial population size from nested or flat structure."""
1320
+ if self.population and self.population.initial_size is not None:
1321
+ return self.population.initial_size
1322
+ return self.initial_population_size or 20
1323
+
1324
+ def _get_num_generations(self) -> int:
1325
+ """Get num generations from nested or flat structure."""
1326
+ if self.population and self.population.num_generations is not None:
1327
+ return self.population.num_generations
1328
+ return self.num_generations or 10
1329
+
1330
+ def _get_children_per_generation(self) -> int:
1331
+ """Get children per generation from nested or flat structure."""
1332
+ if self.population and self.population.children_per_generation is not None:
1333
+ return self.population.children_per_generation
1334
+ return self.children_per_generation or 5
1335
+
1336
+ def _get_crossover_rate(self) -> float:
1337
+ """Get crossover rate from nested or flat structure."""
1338
+ if self.population and self.population.crossover_rate is not None:
1339
+ return self.population.crossover_rate
1340
+ return self.crossover_rate or 0.5
1341
+
1342
+ def _get_selection_pressure(self) -> float:
1343
+ """Get selection pressure from nested or flat structure."""
1344
+ if self.population and self.population.selection_pressure is not None:
1345
+ return self.population.selection_pressure
1346
+ return self.selection_pressure or 1.0
1347
+
1348
+ def _get_patience_generations(self) -> int:
1349
+ """Get patience generations from nested or flat structure."""
1350
+ if self.population and self.population.patience_generations is not None:
1351
+ return self.population.patience_generations
1352
+ return self.patience_generations or 3
1353
+
1354
+ def _get_archive_size(self) -> int:
1355
+ """Get archive size from nested or flat structure."""
1356
+ if self.archive and self.archive.size is not None:
1357
+ return self.archive.size
1358
+ return self.archive_size or 64
1359
+
1360
+ def _get_pareto_set_size(self) -> int:
1361
+ """Get pareto set size from nested or flat structure."""
1362
+ if self.archive and self.archive.pareto_set_size is not None:
1363
+ return self.archive.pareto_set_size
1364
+ return self.pareto_set_size or 64
1365
+
1366
+ def _get_pareto_eps(self) -> float:
1367
+ """Get pareto eps from nested or flat structure."""
1368
+ if self.archive and self.archive.pareto_eps is not None:
1369
+ return self.archive.pareto_eps
1370
+ return self.pareto_eps or 1e-6
1371
+
1372
+ def _get_feedback_fraction(self) -> float:
1373
+ """Get feedback fraction from nested or flat structure."""
1374
+ if self.archive and self.archive.feedback_fraction is not None:
1375
+ return self.archive.feedback_fraction
1376
+ return self.feedback_fraction or 0.5
1377
+
1378
+ def _get_max_token_limit(self) -> int | None:
1379
+ """Get max token limit from nested or flat structure."""
1380
+ if self.token and self.token.max_limit is not None:
1381
+ return self.token.max_limit
1382
+ return self.max_token_limit
1383
+
1384
+ def _get_token_counting_model(self) -> str:
1385
+ """Get token counting model from nested or flat structure."""
1386
+ if self.token and self.token.counting_model is not None:
1387
+ return self.token.counting_model
1388
+ return self.token_counting_model or "gpt-4"
1389
+
1390
+ def _get_enforce_pattern_token_limit(self) -> bool:
1391
+ """Get enforce pattern token limit from nested or flat structure."""
1392
+ if self.token and self.token.enforce_pattern_limit is not None:
1393
+ return self.token.enforce_pattern_limit
1394
+ return self.enforce_pattern_token_limit if self.enforce_pattern_token_limit is not None else True
1395
+
1396
+ def _get_max_spend_usd(self) -> float | None:
1397
+ """Get max spend USD from nested or flat structure."""
1398
+ if self.token and self.token.max_spend_usd is not None:
1399
+ return self.token.max_spend_usd
1400
+ return self.max_spend_usd
1401
+
1402
+ @classmethod
1403
+ def from_mapping(cls, data: Mapping[str, Any]) -> GEPAConfig:
1404
+ """Load GEPA config from dict/TOML, handling both nested and flat structures."""
1405
+ # Check for nested structure first
1406
+ nested_data = {}
1407
+ flat_data = {}
1408
+
1409
+ for key, value in data.items():
1410
+ if key in ("rollout", "evaluation", "mutation", "population", "archive", "token", "modules", "proxy_models", "adaptive_pool", "adaptive_batch", "judge"):
1411
+ nested_data[key] = value
1412
+ else:
1413
+ flat_data[key] = value
1414
+
1415
+ # If we have nested data, create nested configs
1416
+ if nested_data:
1417
+ if "rollout" in nested_data:
1418
+ nested_data["rollout"] = GEPARolloutConfig.model_validate(nested_data["rollout"])
1419
+ if "evaluation" in nested_data:
1420
+ nested_data["evaluation"] = GEPAEvaluationConfig.model_validate(nested_data["evaluation"])
1421
+ if "mutation" in nested_data:
1422
+ nested_data["mutation"] = GEPAMutationConfig.model_validate(nested_data["mutation"])
1423
+ if "population" in nested_data:
1424
+ nested_data["population"] = GEPAPopulationConfig.model_validate(nested_data["population"])
1425
+ if "archive" in nested_data:
1426
+ nested_data["archive"] = GEPAArchiveConfig.model_validate(nested_data["archive"])
1427
+ if "token" in nested_data:
1428
+ nested_data["token"] = GEPATokenConfig.model_validate(nested_data["token"])
1429
+ if "modules" in nested_data:
1430
+ modules_data = nested_data["modules"]
1431
+ if isinstance(modules_data, list):
1432
+ nested_data["modules"] = [
1433
+ GEPAModuleConfig.model_validate(m) if isinstance(m, dict) else m
1434
+ for m in modules_data
1435
+ ]
1436
+ # Handle proxy_models in gepa config (only if specified, defaults to None)
1437
+ if "proxy_models" in nested_data and isinstance(nested_data["proxy_models"], dict):
1438
+ nested_data["proxy_models"] = ProxyModelsConfig.model_validate(nested_data["proxy_models"])
1439
+ # If proxy_models not specified, leave as None (defaults to disabled)
1440
+
1441
+ # Handle adaptive_pool in gepa config (only if specified, defaults to None)
1442
+ if "adaptive_pool" in nested_data and isinstance(nested_data["adaptive_pool"], dict):
1443
+ # Resolve adaptive pool config with level and overrides
1444
+ adaptive_pool_data = nested_data["adaptive_pool"]
1445
+ level = adaptive_pool_data.get("level")
1446
+ # If level not specified, default to LOW (conservative SDK default)
1447
+ overrides = {k: v for k, v in adaptive_pool_data.items() if k != "level"}
1448
+ # Get dev_pool_size from evaluation.seeds if available
1449
+ dev_pool_size = None
1450
+ if "evaluation" in nested_data:
1451
+ eval_config = nested_data["evaluation"]
1452
+ # Handle both dict and Pydantic model (GEPAEvaluationConfig)
1453
+ if isinstance(eval_config, dict):
1454
+ eval_seeds = eval_config.get("seeds")
1455
+ else:
1456
+ # Pydantic model - use attribute access
1457
+ eval_seeds = getattr(eval_config, "seeds", None)
1458
+ if isinstance(eval_seeds, list):
1459
+ dev_pool_size = len(eval_seeds)
1460
+ nested_data["adaptive_pool"] = resolve_adaptive_pool_config(
1461
+ level=level, # Will default to LOW if None (via resolve_adaptive_pool_config)
1462
+ overrides=overrides if overrides else None,
1463
+ dev_pool_size=dev_pool_size,
1464
+ )
1465
+ # If adaptive_pool not specified, leave as None (defaults to disabled)
1466
+ if "adaptive_batch" in nested_data and isinstance(nested_data["adaptive_batch"], dict):
1467
+ # Resolve adaptive batch config with level and overrides
1468
+ adaptive_batch_data = nested_data["adaptive_batch"]
1469
+ level = adaptive_batch_data.get("level")
1470
+ overrides = {k: v for k, v in adaptive_batch_data.items() if k != "level"}
1471
+ try:
1472
+ nested_data["adaptive_batch"] = resolve_adaptive_batch_config(
1473
+ level=level,
1474
+ overrides=overrides if overrides else None,
1475
+ )
1476
+ except Exception as exc:
1477
+ # Re-raise with clearer context
1478
+ raise ValueError(f"Failed to resolve adaptive_batch config: {exc}") from exc
1479
+
1480
+ # Merge nested and flat data
1481
+ merged_data = {**flat_data, **nested_data}
1482
+ return cls.model_validate(merged_data)
1483
+
1484
+
1485
+ class PromptLearningConfig(ExtraModel):
1486
+ """Top-level prompt learning configuration."""
1487
+ algorithm: str # "mipro" or "gepa"
1488
+ task_app_url: str
1489
+ task_app_api_key: str | None = None
1490
+ task_app_id: str | None = None
1491
+ initial_prompt: PromptPatternConfig | None = None
1492
+ policy: PromptLearningPolicyConfig | None = None
1493
+ mipro: MIPROConfig | None = None
1494
+ gepa: GEPAConfig | None = None
1495
+ judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
1496
+ proxy_models: ProxyModelsConfig | dict[str, Any] | None = None # Proxy models config (can be at top-level or algorithm-specific)
1497
+ env_config: dict[str, Any] | None = None
1498
+
1499
+ # Free tier configuration
1500
+ free_tier: bool = Field(
1501
+ default=False,
1502
+ description=(
1503
+ "Enable free tier mode. Uses cost-effective OSS models for policy and proposer. "
1504
+ "Requires proposer_effort='LOW' or 'MEDIUM' (not 'HIGH'). "
1505
+ "Counts against your org's free tier limits. When limits are exceeded, "
1506
+ "remove this flag to run as paid job."
1507
+ ),
1508
+ )
1509
+
1510
+ @model_validator(mode="before")
1511
+ @classmethod
1512
+ def _validate_free_tier_config(cls, data: dict[str, Any]) -> dict[str, Any]:
1513
+ """Validate that free tier jobs use eligible proposer_effort levels."""
1514
+ if not isinstance(data, dict):
1515
+ return data
1516
+
1517
+ # Check if free tier is enabled
1518
+ free_tier = data.get("free_tier", False)
1519
+ if isinstance(free_tier, str):
1520
+ free_tier = free_tier.lower() in ("true", "1", "yes", "on")
1521
+ if not free_tier:
1522
+ return data
1523
+
1524
+ # Get proposer_effort from GEPA or MIPRO config
1525
+ proposer_effort = None
1526
+ gepa = data.get("gepa", {})
1527
+ if isinstance(gepa, dict):
1528
+ proposer_effort = gepa.get("proposer_effort")
1529
+ if proposer_effort is None:
1530
+ mipro = data.get("mipro", {})
1531
+ if isinstance(mipro, dict):
1532
+ proposer_effort = mipro.get("proposer_effort")
1533
+
1534
+ # Default to "LOW" if not specified (which is free tier eligible)
1535
+ if proposer_effort is None:
1536
+ proposer_effort = "LOW"
1537
+
1538
+ # Validate proposer_effort is eligible for free tier
1539
+ free_tier_efforts = {"LOW_CONTEXT", "LOW", "MEDIUM"}
1540
+ effort_upper = proposer_effort.upper() if isinstance(proposer_effort, str) else str(proposer_effort).upper()
1541
+ if effort_upper not in free_tier_efforts:
1542
+ raise ValueError(
1543
+ f"Free tier requires proposer_effort to be one of: {', '.join(sorted(free_tier_efforts))}. "
1544
+ f"Got: '{proposer_effort}'. "
1545
+ f"Either change proposer_effort to 'LOW' or 'MEDIUM', or remove 'free_tier = true' from your config."
1546
+ )
1547
+
1548
+ return data
1549
+
1550
+ @model_validator(mode="before")
1551
+ @classmethod
1552
+ def _check_deprecated_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
1553
+ """Remove deprecated fields that are no longer used.
1554
+
1555
+ These fields are silently removed to maintain backwards compatibility
1556
+ with older configs while the CLI validation module warns about them.
1557
+ """
1558
+ if not isinstance(data, dict):
1559
+ return data
1560
+
1561
+ # Silently remove deprecated fields (don't raise errors)
1562
+ deprecated_fields = {"display", "results_folder", "env_file_path"}
1563
+
1564
+ for field in deprecated_fields:
1565
+ if field in data:
1566
+ data.pop(field, None)
1567
+
1568
+ return data
1569
+
1570
+ def to_dict(self) -> dict[str, Any]:
1571
+ """Convert config to dictionary for API payload."""
1572
+ result = self.model_dump(mode="python", exclude_none=True)
1573
+ # Ensure prompt_learning section wraps everything
1574
+ if "prompt_learning" not in result:
1575
+ pl_data = dict(result.items())
1576
+ result = {"prompt_learning": pl_data}
1577
+ return result
1578
+
1579
+ @classmethod
1580
+ def from_mapping(cls, data: Mapping[str, Any]) -> PromptLearningConfig:
1581
+ """Load prompt learning config from dict/TOML mapping."""
1582
+ # Remove deprecated fields at top level (silently for backwards compatibility)
1583
+ # The CLI validation module will warn about these
1584
+ deprecated_top_level = {"display", "results_folder", "env_file_path"}
1585
+
1586
+ # Convert to mutable dict if needed
1587
+ if not isinstance(data, dict):
1588
+ data = dict(data)
1589
+ else:
1590
+ data = dict(data) # Create a copy to avoid modifying the original
1591
+
1592
+ for field in deprecated_top_level:
1593
+ if field in data:
1594
+ data.pop(field, None)
1595
+
1596
+ # Handle both [prompt_learning] section and flat structure
1597
+ pl_data = data.get("prompt_learning", {})
1598
+ if not pl_data:
1599
+ # If no prompt_learning section, assume top-level is prompt_learning
1600
+ pl_data = dict(data)
1601
+
1602
+ # Handle proxy_models at top-level FIRST (takes precedence over algorithm-specific)
1603
+ # This ensures top-level proxy_models is available for algorithm configs to check
1604
+ # Default: None (proxy models disabled unless explicitly configured)
1605
+ top_level_proxy_models = None
1606
+ if "proxy_models" in pl_data and isinstance(pl_data["proxy_models"], dict):
1607
+ top_level_proxy_models = ProxyModelsConfig.model_validate(pl_data["proxy_models"])
1608
+ pl_data["proxy_models"] = top_level_proxy_models
1609
+ # If proxy_models not specified, leave as None (defaults to disabled)
1610
+
1611
+ # Handle gepa config specially to support nested structure
1612
+ if "gepa" in pl_data and isinstance(pl_data["gepa"], dict):
1613
+ gepa_data = pl_data["gepa"]
1614
+ # If top-level proxy_models exists, remove gepa-specific proxy_models (top-level takes precedence)
1615
+ if top_level_proxy_models is not None and "proxy_models" in gepa_data:
1616
+ gepa_data.pop("proxy_models")
1617
+ pl_data["gepa"] = GEPAConfig.from_mapping(gepa_data)
1618
+ # Ensure gepa config uses top-level proxy_models if available
1619
+ if top_level_proxy_models is not None:
1620
+ # Note: gepa.proxy_models will be None, but top-level proxy_models will be used by backend
1621
+ pass
1622
+
1623
+ # Handle mipro config - check for adaptive_pool
1624
+ if "mipro" in pl_data and isinstance(pl_data["mipro"], dict):
1625
+ mipro_data = pl_data["mipro"]
1626
+ # If top-level proxy_models exists, remove mipro-specific proxy_models (top-level takes precedence)
1627
+ if top_level_proxy_models is not None and "proxy_models" in mipro_data:
1628
+ mipro_data.pop("proxy_models")
1629
+
1630
+ # Extract bootstrap_train_seeds and online_pool from top-level pl_data if not in mipro_data
1631
+ # These fields can be at top-level [prompt_learning] or nested [prompt_learning.mipro]
1632
+ if "bootstrap_train_seeds" not in mipro_data and "bootstrap_train_seeds" in pl_data:
1633
+ mipro_data["bootstrap_train_seeds"] = pl_data["bootstrap_train_seeds"]
1634
+ if "online_pool" not in mipro_data and "online_pool" in pl_data:
1635
+ mipro_data["online_pool"] = pl_data["online_pool"]
1636
+ if "test_pool" not in mipro_data and "test_pool" in pl_data:
1637
+ mipro_data["test_pool"] = pl_data["test_pool"]
1638
+ if "reference_pool" not in mipro_data and "reference_pool" in pl_data:
1639
+ mipro_data["reference_pool"] = pl_data["reference_pool"]
1640
+
1641
+ # Handle adaptive_pool in mipro config (only if specified, defaults to None)
1642
+ if "adaptive_pool" in mipro_data and isinstance(mipro_data["adaptive_pool"], dict):
1643
+ adaptive_pool_data = mipro_data["adaptive_pool"]
1644
+ level = adaptive_pool_data.get("level")
1645
+ # If level not specified, default to LOW (conservative SDK default)
1646
+ overrides = {k: v for k, v in adaptive_pool_data.items() if k != "level"}
1647
+ # Get dev_pool_size from online_pool if available
1648
+ dev_pool_size = None
1649
+ online_pool = mipro_data.get("online_pool") or (mipro_data.get("seeds") or {}).get("online", [])
1650
+ if isinstance(online_pool, list):
1651
+ dev_pool_size = len(online_pool)
1652
+ try:
1653
+ mipro_data["adaptive_pool"] = resolve_adaptive_pool_config(
1654
+ level=level, # Will default to LOW if None (via resolve_adaptive_pool_config)
1655
+ overrides=overrides if overrides else None,
1656
+ dev_pool_size=dev_pool_size,
1657
+ )
1658
+ except Exception as exc:
1659
+ # Re-raise with clearer context
1660
+ raise ValueError(f"Failed to resolve mipro.adaptive_pool config: {exc}") from exc
1661
+ # If adaptive_pool not specified, leave as None (defaults to disabled)
1662
+
1663
+ # Handle proxy_models in mipro config (only if specified, defaults to None)
1664
+ if "proxy_models" in mipro_data and isinstance(mipro_data["proxy_models"], dict):
1665
+ mipro_data["proxy_models"] = ProxyModelsConfig.model_validate(mipro_data["proxy_models"])
1666
+ # If proxy_models not specified, leave as None (defaults to disabled)
1667
+
1668
+ if "judge" in pl_data and isinstance(pl_data["judge"], dict):
1669
+ pl_data["judge"] = PromptLearningJudgeConfig.model_validate(pl_data["judge"])
1670
+
1671
+ return cls.model_validate(pl_data)
1672
+
1673
+ @classmethod
1674
+ def from_path(cls, path: Path) -> PromptLearningConfig:
1675
+ """Load prompt learning config from TOML file."""
1676
+ content = load_toml(path)
1677
+ return cls.from_mapping(content)
1678
+
1679
+
1680
+ __all__ = [
1681
+ "GEPAConfig",
1682
+ "GEPAModuleConfig",
1683
+ "GEPARolloutConfig",
1684
+ "GEPAEvaluationConfig",
1685
+ "GEPAMutationConfig",
1686
+ "GEPAPopulationConfig",
1687
+ "GEPAArchiveConfig",
1688
+ "GEPATokenConfig",
1689
+ "GEPAAdaptiveBatchConfig",
1690
+ "MIPROConfig",
1691
+ "MIPROMetaConfig",
1692
+ "MIPROModuleConfig",
1693
+ "MIPROStageConfig",
1694
+ "MIPROSeedConfig",
1695
+ "MessagePatternConfig",
1696
+ "PromptLearningConfig",
1697
+ "PromptLearningPolicyConfig",
1698
+ "PromptPatternConfig",
1699
+ "PromptLearningJudgeConfig",
1700
+ "ProxyModelsConfig",
1701
+ "AdaptivePoolConfig",
1702
+ "AdaptiveCurriculumLevel",
1703
+ "AdaptiveBatchLevel",
1704
+ "resolve_adaptive_pool_config",
1705
+ "resolve_adaptive_batch_config",
1706
+ ]