synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ _STATE_FILE = _STATE_DIR / "train_cli.json"
18
18
  @dataclass(slots=True)
19
19
  class ConfigCandidate:
20
20
  path: Path
21
- train_type: str # "rl", "sft", or "unknown"
21
+ train_type: str # "rl", "sft", "prompt_learning", or "unknown"
22
22
 
23
23
 
24
24
  def _load_last_config() -> Path | None:
@@ -94,6 +94,17 @@ def _iter_candidate_paths() -> Iterable[Path]:
94
94
 
95
95
 
96
96
  def _infer_config_type(data: dict) -> str:
97
+ # 0) Check for prompt_learning section (highest priority)
98
+ pl_section = data.get("prompt_learning")
99
+ if isinstance(pl_section, dict):
100
+ algorithm = pl_section.get("algorithm", "").lower()
101
+ if algorithm in {"mipro", "gepa"}:
102
+ return "prompt_learning"
103
+ # Also check if top-level has prompt_learning indicators
104
+ algorithm = data.get("algorithm")
105
+ if isinstance(algorithm, str) and algorithm.lower() in {"mipro", "gepa"}:
106
+ return "prompt_learning"
107
+
97
108
  # 1) Strong signals from [algorithm]
98
109
  algo = data.get("algorithm")
99
110
  if isinstance(algo, dict):
@@ -152,7 +163,7 @@ def discover_configs(explicit: list[str], *, requested_type: str | None) -> list
152
163
  cfg_type = _infer_config_type(data)
153
164
  if cfg_type == "unknown":
154
165
  raise click.ClickException(
155
- f"Config {path} is missing algorithm.type/method metadata. Add type = 'rl' or 'sft'."
166
+ f"Config {path} is missing algorithm.type/method metadata. Add type = 'rl', 'sft', or 'prompt_learning'."
156
167
  )
157
168
  candidates.append(ConfigCandidate(path=path, train_type=cfg_type))
158
169
  seen.add(path)
@@ -1,14 +1,24 @@
1
- """Typed training config loaders for RL and SFT jobs."""
1
+ """Typed training config loaders for RL, SFT, and Prompt Learning jobs."""
2
2
 
3
+ from .prompt_learning import (
4
+ GEPAConfig,
5
+ MessagePatternConfig,
6
+ MIPROConfig,
7
+ PromptLearningConfig,
8
+ PromptLearningPolicyConfig,
9
+ PromptPatternConfig,
10
+ )
3
11
  from .rl import (
4
12
  EvaluationConfig,
5
13
  JudgeConfig,
6
14
  JudgeOptionsConfig,
7
15
  ModelConfig,
16
+ RewardsConfig,
8
17
  RLConfig,
9
18
  RLServicesConfig,
10
19
  RLTrainingConfig,
11
20
  RolloutConfig,
21
+ RubricConfig,
12
22
  WeightSyncConfig,
13
23
  )
14
24
  from .sft import (
@@ -20,24 +30,35 @@ from .sft import (
20
30
  TrainingConfig,
21
31
  TrainingValidationConfig,
22
32
  )
23
- from .shared import AlgorithmConfig, ComputeConfig
33
+ from .shared import AlgorithmConfig, ComputeConfig, LoraConfig, PolicyConfig, TopologyConfig
24
34
 
25
35
  __all__ = [
26
36
  "AlgorithmConfig",
27
37
  "ComputeConfig",
28
38
  "EvaluationConfig",
39
+ "GEPAConfig",
29
40
  "HyperparametersConfig",
30
41
  "HyperparametersParallelism",
31
42
  "JobConfig",
32
43
  "JudgeConfig",
33
44
  "JudgeOptionsConfig",
45
+ "LoraConfig",
46
+ "MIPROConfig",
47
+ "MessagePatternConfig",
34
48
  "ModelConfig",
49
+ "PolicyConfig",
50
+ "PromptLearningConfig",
51
+ "PromptLearningPolicyConfig",
52
+ "PromptPatternConfig",
53
+ "RewardsConfig",
35
54
  "RLConfig",
36
55
  "RLServicesConfig",
37
56
  "RLTrainingConfig",
38
57
  "RolloutConfig",
58
+ "RubricConfig",
39
59
  "SFTConfig",
40
60
  "SFTDataConfig",
61
+ "TopologyConfig",
41
62
  "TrainingConfig",
42
63
  "TrainingValidationConfig",
43
64
  "WeightSyncConfig",
@@ -0,0 +1,442 @@
1
+ """Prompt Learning configuration models for MIPRO and GEPA."""
2
+ from __future__ import annotations
3
+
4
+ from collections.abc import Mapping
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from pydantic import Field, field_validator
10
+
11
+ from ..utils import load_toml
12
+ from .shared import ExtraModel
13
+
14
+
15
+ class InferenceMode(str, Enum):
16
+ synth_hosted = "synth_hosted"
17
+
18
+
19
+ class ProviderName(str, Enum):
20
+ openai = "openai"
21
+ groq = "groq"
22
+ google = "google"
23
+
24
+
25
+ class PromptLearningPolicyConfig(ExtraModel):
26
+ """Policy configuration for prompt learning (model, provider, etc.)."""
27
+ model: str
28
+ provider: ProviderName
29
+ inference_url: str
30
+ inference_mode: InferenceMode = InferenceMode.synth_hosted
31
+ temperature: float = 0.0
32
+ max_completion_tokens: int = 512
33
+ policy_name: str | None = None
34
+
35
+ @field_validator("inference_url")
36
+ @classmethod
37
+ def _normalize_inference_url(cls, v: str) -> str:
38
+ if not isinstance(v, str):
39
+ raise ValueError("inference_url must be a string")
40
+ v = v.strip()
41
+ if not v.startswith(("http://", "https://")):
42
+ raise ValueError("inference_url must start with http:// or https://")
43
+ return v
44
+
45
+
46
+ class MessagePatternConfig(ExtraModel):
47
+ """Configuration for a single message pattern."""
48
+ role: str
49
+ pattern: str
50
+ order: int = 0
51
+
52
+
53
+ class PromptPatternConfig(ExtraModel):
54
+ """Initial prompt pattern configuration."""
55
+ id: str | None = None
56
+ name: str | None = None
57
+ messages: list[MessagePatternConfig] = []
58
+ wildcards: dict[str, str] = Field(default_factory=dict)
59
+
60
+
61
+ class MIPROConfig(ExtraModel):
62
+ """MIPRO-specific configuration.
63
+
64
+ NOTE: MIPRO support is not yet implemented in synth-ai.
65
+ This configuration class exists for future compatibility.
66
+ Use GEPA algorithm for prompt optimization.
67
+ """
68
+ num_iterations: int = 20
69
+ num_evaluations_per_iteration: int = 5
70
+ batch_size: int = 32
71
+ max_concurrent: int = 20
72
+ env_name: str = "banking77"
73
+ env_config: dict[str, Any] | None = None
74
+ meta_model: str = "gpt-4o-mini"
75
+ meta_model_provider: str = "openai"
76
+ meta_model_inference_url: str | None = None
77
+ few_shot_score_threshold: float = 0.8
78
+ results_file: str | None = None
79
+ max_wall_clock_seconds: float | None = None
80
+ max_total_tokens: int | None = None
81
+
82
+ # TPE configuration
83
+ tpe: dict[str, Any] | None = None
84
+
85
+ # Demo configuration
86
+ demo: dict[str, Any] | None = None
87
+
88
+ # Grounding configuration
89
+ grounding: dict[str, Any] | None = None
90
+
91
+ # Meta-update configuration
92
+ meta_update: dict[str, Any] | None = None
93
+
94
+ # Bootstrap seeds (for few-shot examples)
95
+ bootstrap_train_seeds: list[int] | None = None
96
+
97
+ # Online pool (for mini-batch evaluation)
98
+ online_pool: list[int] | None = None
99
+
100
+ # Test pool (held-out seeds)
101
+ test_pool: list[int] | None = None
102
+
103
+
104
+ # GEPA nested configs (mirroring RL structure)
105
+ class GEPARolloutConfig(ExtraModel):
106
+ """GEPA rollout configuration (mirrors RL [rollout] section)."""
107
+ budget: int | None = None # Total rollout budget
108
+ max_concurrent: int = 20 # Maximum concurrent rollouts
109
+ minibatch_size: int = 8 # Minibatch size for evaluation
110
+
111
+
112
+ class GEPAEvaluationConfig(ExtraModel):
113
+ """GEPA evaluation configuration (mirrors RL [evaluation] section)."""
114
+ seeds: list[int] | None = None # Evaluation seeds (training set)
115
+ validation_seeds: list[int] | None = None # Validation seeds (held-out)
116
+ test_pool: list[int] | None = None # Test pool (final evaluation)
117
+ validation_pool: str | None = None # Pool name for validation (e.g., "validation")
118
+ validation_top_k: int | None = None # Top-K prompts to validate
119
+
120
+
121
+ class GEPAMutationConfig(ExtraModel):
122
+ """GEPA mutation configuration (LLM-guided mutation settings)."""
123
+ rate: float = 0.3 # Mutation rate
124
+ llm_model: str | None = None # Model for generating mutations
125
+ llm_provider: str = "groq" # Provider for mutation LLM
126
+ llm_inference_url: str | None = None # Custom inference URL
127
+ prompt: str | None = None # Custom mutation prompt
128
+
129
+
130
+ class GEPAPopulationConfig(ExtraModel):
131
+ """GEPA population configuration (evolution parameters)."""
132
+ initial_size: int = 20 # Initial population size
133
+ num_generations: int = 10 # Number of generations
134
+ children_per_generation: int = 5 # Children generated per generation
135
+ crossover_rate: float = 0.5 # Crossover rate
136
+ selection_pressure: float = 1.0 # Pareto selection pressure
137
+ patience_generations: int = 3 # Early stopping patience
138
+
139
+
140
+ class GEPAArchiveConfig(ExtraModel):
141
+ """GEPA archive configuration (Pareto archive settings)."""
142
+ size: int = 64 # Archive size
143
+ pareto_set_size: int = 64 # Pareto set size
144
+ pareto_eps: float = 1e-6 # Pareto epsilon
145
+ feedback_fraction: float = 0.5 # Fraction of archive for feedback
146
+
147
+
148
+ class GEPATokenConfig(ExtraModel):
149
+ """GEPA token and budget configuration."""
150
+ max_limit: int | None = None # Maximum tokens allowed in prompt
151
+ counting_model: str = "gpt-4" # Model for token counting
152
+ enforce_pattern_limit: bool = True # Enforce token limit on patterns
153
+ max_spend_usd: float | None = None # Maximum spend in USD
154
+
155
+
156
+ class GEPAConfig(ExtraModel):
157
+ """GEPA-specific configuration with nested subsections."""
158
+ # Top-level fields (for backwards compatibility)
159
+ env_name: str = "banking77"
160
+ env_config: dict[str, Any] | None = None
161
+ rng_seed: int | None = None
162
+ proposer_type: str = "dspy" # "dspy" or "synth"
163
+
164
+ # Nested subsections (preferred, mirrors RL structure)
165
+ rollout: GEPARolloutConfig | None = None
166
+ evaluation: GEPAEvaluationConfig | None = None
167
+ mutation: GEPAMutationConfig | None = None
168
+ population: GEPAPopulationConfig | None = None
169
+ archive: GEPAArchiveConfig | None = None
170
+ token: GEPATokenConfig | None = None
171
+
172
+ # Backwards compatibility: flat fields (deprecated, prefer nested)
173
+ # These will be flattened from nested configs if provided
174
+ rollout_budget: int | None = None
175
+ max_concurrent_rollouts: int | None = None
176
+ minibatch_size: int | None = None
177
+ evaluation_seeds: list[int] | None = None
178
+ validation_seeds: list[int] | None = None
179
+ test_pool: list[int] | None = None
180
+ validation_pool: str | None = None
181
+ validation_top_k: int | None = None
182
+ mutation_rate: float | None = None
183
+ mutation_llm_model: str | None = None
184
+ mutation_llm_provider: str | None = None
185
+ mutation_llm_inference_url: str | None = None
186
+ mutation_prompt: str | None = None
187
+ initial_population_size: int | None = None
188
+ num_generations: int | None = None
189
+ children_per_generation: int | None = None
190
+ crossover_rate: float | None = None
191
+ selection_pressure: float | None = None
192
+ patience_generations: int | None = None
193
+ archive_size: int | None = None
194
+ pareto_set_size: int | None = None
195
+ pareto_eps: float | None = None
196
+ feedback_fraction: float | None = None
197
+ max_token_limit: int | None = None
198
+ token_counting_model: str | None = None
199
+ enforce_pattern_token_limit: bool | None = None
200
+ max_spend_usd: float | None = None
201
+
202
+ def _get_rollout_budget(self) -> int | None:
203
+ """Get rollout budget from nested or flat structure."""
204
+ if self.rollout and self.rollout.budget is not None:
205
+ return self.rollout.budget
206
+ return self.rollout_budget
207
+
208
+ def _get_max_concurrent_rollouts(self) -> int:
209
+ """Get max concurrent rollouts from nested or flat structure."""
210
+ if self.rollout and self.rollout.max_concurrent is not None:
211
+ return self.rollout.max_concurrent
212
+ return self.max_concurrent_rollouts or 20
213
+
214
+ def _get_minibatch_size(self) -> int:
215
+ """Get minibatch size from nested or flat structure."""
216
+ if self.rollout and self.rollout.minibatch_size is not None:
217
+ return self.rollout.minibatch_size
218
+ return self.minibatch_size or 8
219
+
220
+ def _get_evaluation_seeds(self) -> list[int] | None:
221
+ """Get evaluation seeds from nested or flat structure."""
222
+ if self.evaluation and self.evaluation.seeds is not None:
223
+ return self.evaluation.seeds
224
+ return self.evaluation_seeds
225
+
226
+ def _get_validation_seeds(self) -> list[int] | None:
227
+ """Get validation seeds from nested or flat structure."""
228
+ if self.evaluation and self.evaluation.validation_seeds is not None:
229
+ return self.evaluation.validation_seeds
230
+ return self.validation_seeds
231
+
232
+ def _get_test_pool(self) -> list[int] | None:
233
+ """Get test pool from nested or flat structure."""
234
+ if self.evaluation and self.evaluation.test_pool is not None:
235
+ return self.evaluation.test_pool
236
+ return self.test_pool
237
+
238
+ def _get_mutation_rate(self) -> float:
239
+ """Get mutation rate from nested or flat structure."""
240
+ if self.mutation and self.mutation.rate is not None:
241
+ return self.mutation.rate
242
+ return self.mutation_rate or 0.3
243
+
244
+ def _get_mutation_llm_model(self) -> str | None:
245
+ """Get mutation LLM model from nested or flat structure."""
246
+ if self.mutation and self.mutation.llm_model is not None:
247
+ return self.mutation.llm_model
248
+ return self.mutation_llm_model
249
+
250
+ def _get_mutation_llm_provider(self) -> str:
251
+ """Get mutation LLM provider from nested or flat structure."""
252
+ if self.mutation and self.mutation.llm_provider is not None:
253
+ return self.mutation.llm_provider
254
+ return self.mutation_llm_provider or "groq"
255
+
256
+ def _get_mutation_llm_inference_url(self) -> str | None:
257
+ """Get mutation LLM inference URL from nested or flat structure."""
258
+ if self.mutation and self.mutation.llm_inference_url is not None:
259
+ return self.mutation.llm_inference_url
260
+ return self.mutation_llm_inference_url
261
+
262
+ def _get_mutation_prompt(self) -> str | None:
263
+ """Get mutation prompt from nested or flat structure."""
264
+ if self.mutation and self.mutation.prompt is not None:
265
+ return self.mutation.prompt
266
+ return self.mutation_prompt
267
+
268
+ def _get_initial_population_size(self) -> int:
269
+ """Get initial population size from nested or flat structure."""
270
+ if self.population and self.population.initial_size is not None:
271
+ return self.population.initial_size
272
+ return self.initial_population_size or 20
273
+
274
+ def _get_num_generations(self) -> int:
275
+ """Get num generations from nested or flat structure."""
276
+ if self.population and self.population.num_generations is not None:
277
+ return self.population.num_generations
278
+ return self.num_generations or 10
279
+
280
+ def _get_children_per_generation(self) -> int:
281
+ """Get children per generation from nested or flat structure."""
282
+ if self.population and self.population.children_per_generation is not None:
283
+ return self.population.children_per_generation
284
+ return self.children_per_generation or 5
285
+
286
+ def _get_crossover_rate(self) -> float:
287
+ """Get crossover rate from nested or flat structure."""
288
+ if self.population and self.population.crossover_rate is not None:
289
+ return self.population.crossover_rate
290
+ return self.crossover_rate or 0.5
291
+
292
+ def _get_selection_pressure(self) -> float:
293
+ """Get selection pressure from nested or flat structure."""
294
+ if self.population and self.population.selection_pressure is not None:
295
+ return self.population.selection_pressure
296
+ return self.selection_pressure or 1.0
297
+
298
+ def _get_patience_generations(self) -> int:
299
+ """Get patience generations from nested or flat structure."""
300
+ if self.population and self.population.patience_generations is not None:
301
+ return self.population.patience_generations
302
+ return self.patience_generations or 3
303
+
304
+ def _get_archive_size(self) -> int:
305
+ """Get archive size from nested or flat structure."""
306
+ if self.archive and self.archive.size is not None:
307
+ return self.archive.size
308
+ return self.archive_size or 64
309
+
310
+ def _get_pareto_set_size(self) -> int:
311
+ """Get pareto set size from nested or flat structure."""
312
+ if self.archive and self.archive.pareto_set_size is not None:
313
+ return self.archive.pareto_set_size
314
+ return self.pareto_set_size or 64
315
+
316
+ def _get_pareto_eps(self) -> float:
317
+ """Get pareto eps from nested or flat structure."""
318
+ if self.archive and self.archive.pareto_eps is not None:
319
+ return self.archive.pareto_eps
320
+ return self.pareto_eps or 1e-6
321
+
322
+ def _get_feedback_fraction(self) -> float:
323
+ """Get feedback fraction from nested or flat structure."""
324
+ if self.archive and self.archive.feedback_fraction is not None:
325
+ return self.archive.feedback_fraction
326
+ return self.feedback_fraction or 0.5
327
+
328
+ def _get_max_token_limit(self) -> int | None:
329
+ """Get max token limit from nested or flat structure."""
330
+ if self.token and self.token.max_limit is not None:
331
+ return self.token.max_limit
332
+ return self.max_token_limit
333
+
334
+ def _get_token_counting_model(self) -> str:
335
+ """Get token counting model from nested or flat structure."""
336
+ if self.token and self.token.counting_model is not None:
337
+ return self.token.counting_model
338
+ return self.token_counting_model or "gpt-4"
339
+
340
+ def _get_enforce_pattern_token_limit(self) -> bool:
341
+ """Get enforce pattern token limit from nested or flat structure."""
342
+ if self.token and self.token.enforce_pattern_limit is not None:
343
+ return self.token.enforce_pattern_limit
344
+ return self.enforce_pattern_token_limit if self.enforce_pattern_token_limit is not None else True
345
+
346
+ def _get_max_spend_usd(self) -> float | None:
347
+ """Get max spend USD from nested or flat structure."""
348
+ if self.token and self.token.max_spend_usd is not None:
349
+ return self.token.max_spend_usd
350
+ return self.max_spend_usd
351
+
352
+ @classmethod
353
+ def from_mapping(cls, data: Mapping[str, Any]) -> GEPAConfig:
354
+ """Load GEPA config from dict/TOML, handling both nested and flat structures."""
355
+ # Check for nested structure first
356
+ nested_data = {}
357
+ flat_data = {}
358
+
359
+ for key, value in data.items():
360
+ if key in ("rollout", "evaluation", "mutation", "population", "archive", "token"):
361
+ nested_data[key] = value
362
+ else:
363
+ flat_data[key] = value
364
+
365
+ # If we have nested data, create nested configs
366
+ if nested_data:
367
+ if "rollout" in nested_data:
368
+ nested_data["rollout"] = GEPARolloutConfig.model_validate(nested_data["rollout"])
369
+ if "evaluation" in nested_data:
370
+ nested_data["evaluation"] = GEPAEvaluationConfig.model_validate(nested_data["evaluation"])
371
+ if "mutation" in nested_data:
372
+ nested_data["mutation"] = GEPAMutationConfig.model_validate(nested_data["mutation"])
373
+ if "population" in nested_data:
374
+ nested_data["population"] = GEPAPopulationConfig.model_validate(nested_data["population"])
375
+ if "archive" in nested_data:
376
+ nested_data["archive"] = GEPAArchiveConfig.model_validate(nested_data["archive"])
377
+ if "token" in nested_data:
378
+ nested_data["token"] = GEPATokenConfig.model_validate(nested_data["token"])
379
+
380
+ # Merge nested and flat data
381
+ merged_data = {**flat_data, **nested_data}
382
+ return cls.model_validate(merged_data)
383
+
384
+
385
+ class PromptLearningConfig(ExtraModel):
386
+ """Top-level prompt learning configuration."""
387
+ algorithm: str # "mipro" or "gepa"
388
+ task_app_url: str
389
+ task_app_api_key: str | None = None
390
+ task_app_id: str | None = None
391
+ initial_prompt: PromptPatternConfig | None = None
392
+ policy: PromptLearningPolicyConfig | None = None
393
+ mipro: MIPROConfig | None = None
394
+ gepa: GEPAConfig | None = None
395
+ env_config: dict[str, Any] | None = None
396
+
397
+ def to_dict(self) -> dict[str, Any]:
398
+ """Convert config to dictionary for API payload."""
399
+ result = self.model_dump(mode="python", exclude_none=True)
400
+ # Ensure prompt_learning section wraps everything
401
+ if "prompt_learning" not in result:
402
+ pl_data = dict(result.items())
403
+ result = {"prompt_learning": pl_data}
404
+ return result
405
+
406
+ @classmethod
407
+ def from_mapping(cls, data: Mapping[str, Any]) -> PromptLearningConfig:
408
+ """Load prompt learning config from dict/TOML mapping."""
409
+ # Handle both [prompt_learning] section and flat structure
410
+ pl_data = data.get("prompt_learning", {})
411
+ if not pl_data:
412
+ # If no prompt_learning section, assume top-level is prompt_learning
413
+ pl_data = dict(data)
414
+
415
+ # Handle gepa config specially to support nested structure
416
+ if "gepa" in pl_data and isinstance(pl_data["gepa"], dict):
417
+ gepa_data = pl_data["gepa"]
418
+ pl_data["gepa"] = GEPAConfig.from_mapping(gepa_data)
419
+
420
+ return cls.model_validate(pl_data)
421
+
422
+ @classmethod
423
+ def from_path(cls, path: Path) -> PromptLearningConfig:
424
+ """Load prompt learning config from TOML file."""
425
+ content = load_toml(path)
426
+ return cls.from_mapping(content)
427
+
428
+
429
+ __all__ = [
430
+ "GEPAConfig",
431
+ "GEPARolloutConfig",
432
+ "GEPAEvaluationConfig",
433
+ "GEPAMutationConfig",
434
+ "GEPAPopulationConfig",
435
+ "GEPAArchiveConfig",
436
+ "GEPATokenConfig",
437
+ "MIPROConfig",
438
+ "MessagePatternConfig",
439
+ "PromptLearningConfig",
440
+ "PromptLearningPolicyConfig",
441
+ "PromptPatternConfig",
442
+ ]
@@ -7,7 +7,7 @@ from typing import Any
7
7
  from pydantic import model_validator
8
8
 
9
9
  from ..utils import load_toml
10
- from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
10
+ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
11
11
 
12
12
 
13
13
  class RLServicesConfig(ExtraModel):
@@ -48,6 +48,16 @@ class WeightSyncConfig(ExtraModel):
48
48
  verify_every_k: int | None = None
49
49
 
50
50
 
51
+ class RewardsConfig(ExtraModel):
52
+ """Rewards configuration for RL training."""
53
+ step_rewards_enabled: bool | None = None
54
+ step_rewards_mode: str | None = None
55
+ step_rewards_indicator_lambda: float | None = None
56
+ step_rewards_beta: float | None = None
57
+ step_rewards_strategy: str | None = None
58
+ event_rewards_kind: str | None = None
59
+
60
+
51
61
  class RLTrainingConfig(ExtraModel):
52
62
  num_epochs: int
53
63
  iterations_per_epoch: int
@@ -59,13 +69,17 @@ class RLTrainingConfig(ExtraModel):
59
69
  learning_rate: float
60
70
  log_interval: int | None = None
61
71
  weight_sync_interval: int | None = None
72
+ # DEPRECATED: flat reward fields (use rewards.* instead)
62
73
  step_rewards_enabled: bool | None = None
63
74
  step_rewards_mode: str | None = None
64
75
  step_rewards_indicator_lambda: float | None = None
65
76
  step_rewards_beta: float | None = None
66
77
  step_rewards_strategy: str | None = None
67
78
  event_rewards_kind: str | None = None
79
+ # NEW: nested configs
68
80
  weight_sync: WeightSyncConfig | None = None
81
+ lora: LoraConfig | None = None
82
+ rewards: RewardsConfig | None = None
69
83
 
70
84
 
71
85
  class EvaluationConfig(ExtraModel):
@@ -86,34 +100,73 @@ class JudgeOptionsConfig(ExtraModel):
86
100
  max_concurrency: int | None = None
87
101
 
88
102
 
103
+ class RubricConfig(ExtraModel):
104
+ """Rubric configuration for reward blending."""
105
+ enabled: bool = False
106
+ reward_blend: dict[str, float] | None = None # env, event, outcome weights
107
+
108
+
89
109
  class JudgeConfig(ExtraModel):
90
110
  type: str | None = None
91
111
  timeout_s: int | None = None
112
+ enabled: bool | None = None # Master switch for judge/rubric
113
+ reward_blend: dict[str, float] | None = None # NEW: nested reward blending (replaces rubric.weights)
114
+ rubric: RubricConfig | None = None # DEPRECATED: use flat fields instead
92
115
  options: JudgeOptionsConfig | None = None
93
116
 
94
117
 
118
+ class SmokeConfig(ExtraModel):
119
+ """Configuration for local smoke testing (CLI only, ignored by trainer)."""
120
+ # Test parameters
121
+ task_url: str | None = None
122
+ env_name: str | None = None
123
+ policy_name: str | None = None
124
+ max_steps: int | None = None
125
+ policy: str | None = None # mock, gpt-5-nano, openai, groq
126
+ model: str | None = None
127
+ mock_backend: str | None = None # synthetic or openai
128
+ mock_port: int | None = None
129
+ return_trace: bool | None = None
130
+ use_mock: bool | None = None
131
+
132
+ # Task app auto-start configuration
133
+ task_app_name: str | None = None # Task app to serve (e.g., "grpo-crafter")
134
+ task_app_port: int | None = None # Port for task app (default: 8765)
135
+ task_app_env_file: str | None = None # Path to .env file for task app
136
+ task_app_force: bool | None = None # Use --force flag when serving
137
+
138
+ # sqld auto-start configuration
139
+ sqld_auto_start: bool | None = None # Auto-start sqld server
140
+ sqld_db_path: str | None = None # Database path (default: ./traces/local.db)
141
+ sqld_hrana_port: int | None = None # Hrana WebSocket port (default: 8080)
142
+ sqld_http_port: int | None = None # HTTP API port (default: 8081)
143
+
144
+
95
145
  class RLConfig(ExtraModel):
96
146
  algorithm: AlgorithmConfig
97
147
  services: RLServicesConfig
98
148
  compute: ComputeConfig | None = None
99
- topology: dict[str, Any] | None = None
149
+ topology: dict[str, Any] | None = None # DEPRECATED: use compute.topology instead
100
150
  vllm: dict[str, Any] | None = None
101
- reference: dict[str, Any] | None = None
102
- model: ModelConfig
103
- lora: dict[str, Any] | None = None
151
+ reference: dict[str, Any] | None = None # DEPRECATED: use compute.topology.reference_placement instead
152
+ model: ModelConfig | None = None # DEPRECATED: use policy instead
153
+ policy: PolicyConfig | None = None # NEW: unified policy (preferred)
154
+ lora: dict[str, Any] | None = None # DEPRECATED: use training.lora instead
104
155
  rollout: RolloutConfig | None = None
105
156
  evaluation: EvaluationConfig | None = None
106
157
  training: RLTrainingConfig | None = None
107
- rubric: dict[str, Any] | None = None
158
+ rubric: dict[str, Any] | None = None # DEPRECATED: use judge.reward_blend and judge.enabled instead
108
159
  judge: JudgeConfig | None = None
109
160
  tags: dict[str, Any] | None = None
161
+ smoke: SmokeConfig | None = None # CLI-only: local smoke testing config (ignored by trainer)
110
162
 
111
163
  def to_dict(self) -> dict[str, Any]:
112
164
  return self.model_dump(mode="python", exclude_none=True)
113
165
 
114
166
  @classmethod
115
167
  def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
116
- return cls.model_validate(dict(data))
168
+ """Load RL config from dict/TOML mapping."""
169
+ return cls.model_validate(data)
117
170
 
118
171
  @classmethod
119
172
  def from_path(cls, path: Path) -> RLConfig:
@@ -130,5 +183,6 @@ __all__ = [
130
183
  "RLServicesConfig",
131
184
  "RLTrainingConfig",
132
185
  "RolloutConfig",
186
+ "SmokeConfig",
133
187
  "WeightSyncConfig",
134
188
  ]