PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

synth_ai/api/train/config_finder.py CHANGED Viewed

@@ -18,7 +18,7 @@ _STATE_FILE = _STATE_DIR / "train_cli.json"
 @dataclass(slots=True)
 class ConfigCandidate:
     path: Path
-    train_type: str  # "rl", "sft", or "unknown"
+    train_type: str  # "rl", "sft", "prompt_learning", or "unknown"
 def _load_last_config() -> Path | None:
@@ -94,6 +94,17 @@ def _iter_candidate_paths() -> Iterable[Path]:
 def _infer_config_type(data: dict) -> str:
+    # 0) Check for prompt_learning section (highest priority)
+    pl_section = data.get("prompt_learning")
+    if isinstance(pl_section, dict):
+        algorithm = pl_section.get("algorithm", "").lower()
+        if algorithm in {"mipro", "gepa"}:
+            return "prompt_learning"
+    # Also check if top-level has prompt_learning indicators
+    algorithm = data.get("algorithm")
+    if isinstance(algorithm, str) and algorithm.lower() in {"mipro", "gepa"}:
+        return "prompt_learning"
     # 1) Strong signals from [algorithm]
     algo = data.get("algorithm")
     if isinstance(algo, dict):
@@ -152,7 +163,7 @@ def discover_configs(explicit: list[str], *, requested_type: str | None) -> list
         cfg_type = _infer_config_type(data)
         if cfg_type == "unknown":
             raise click.ClickException(
-                f"Config {path} is missing algorithm.type/method metadata. Add type = 'rl' or 'sft'."
+                f"Config {path} is missing algorithm.type/method metadata. Add type = 'rl', 'sft', or 'prompt_learning'."
             )
         candidates.append(ConfigCandidate(path=path, train_type=cfg_type))
         seen.add(path)

synth_ai/api/train/configs/__init__.py CHANGED Viewed

@@ -1,14 +1,24 @@
-"""Typed training config loaders for RL and SFT jobs."""
+"""Typed training config loaders for RL, SFT, and Prompt Learning jobs."""
+from .prompt_learning import (
+    GEPAConfig,
+    MessagePatternConfig,
+    MIPROConfig,
+    PromptLearningConfig,
+    PromptLearningPolicyConfig,
+    PromptPatternConfig,
+)
 from .rl import (
     EvaluationConfig,
     JudgeConfig,
     JudgeOptionsConfig,
     ModelConfig,
+    RewardsConfig,
     RLConfig,
     RLServicesConfig,
     RLTrainingConfig,
     RolloutConfig,
+    RubricConfig,
     WeightSyncConfig,
 )
 from .sft import (
@@ -20,24 +30,35 @@ from .sft import (
     TrainingConfig,
     TrainingValidationConfig,
 )
-from .shared import AlgorithmConfig, ComputeConfig
+from .shared import AlgorithmConfig, ComputeConfig, LoraConfig, PolicyConfig, TopologyConfig
 __all__ = [
     "AlgorithmConfig",
     "ComputeConfig",
     "EvaluationConfig",
+    "GEPAConfig",
     "HyperparametersConfig",
     "HyperparametersParallelism",
     "JobConfig",
     "JudgeConfig",
     "JudgeOptionsConfig",
+    "LoraConfig",
+    "MIPROConfig",
+    "MessagePatternConfig",
     "ModelConfig",
+    "PolicyConfig",
+    "PromptLearningConfig",
+    "PromptLearningPolicyConfig",
+    "PromptPatternConfig",
+    "RewardsConfig",
     "RLConfig",
     "RLServicesConfig",
     "RLTrainingConfig",
     "RolloutConfig",
+    "RubricConfig",
     "SFTConfig",
     "SFTDataConfig",
+    "TopologyConfig",
     "TrainingConfig",
     "TrainingValidationConfig",
     "WeightSyncConfig",

synth_ai/api/train/configs/prompt_learning.py ADDED Viewed

@@ -0,0 +1,442 @@
+"""Prompt Learning configuration models for MIPRO and GEPA."""
+from __future__ import annotations
+from collections.abc import Mapping
+from enum import Enum
+from pathlib import Path
+from typing import Any
+from pydantic import Field, field_validator
+from ..utils import load_toml
+from .shared import ExtraModel
+class InferenceMode(str, Enum):
+    synth_hosted = "synth_hosted"
+class ProviderName(str, Enum):
+    openai = "openai"
+    groq = "groq"
+    google = "google"
+class PromptLearningPolicyConfig(ExtraModel):
+    """Policy configuration for prompt learning (model, provider, etc.)."""
+    model: str
+    provider: ProviderName
+    inference_url: str
+    inference_mode: InferenceMode = InferenceMode.synth_hosted
+    temperature: float = 0.0
+    max_completion_tokens: int = 512
+    policy_name: str | None = None
+    @field_validator("inference_url")
+    @classmethod
+    def _normalize_inference_url(cls, v: str) -> str:
+        if not isinstance(v, str):
+            raise ValueError("inference_url must be a string")
+        v = v.strip()
+        if not v.startswith(("http://", "https://")):
+            raise ValueError("inference_url must start with http:// or https://")
+        return v
+class MessagePatternConfig(ExtraModel):
+    """Configuration for a single message pattern."""
+    role: str
+    pattern: str
+    order: int = 0
+class PromptPatternConfig(ExtraModel):
+    """Initial prompt pattern configuration."""
+    id: str | None = None
+    name: str | None = None
+    messages: list[MessagePatternConfig] = []
+    wildcards: dict[str, str] = Field(default_factory=dict)
+class MIPROConfig(ExtraModel):
+    """MIPRO-specific configuration.
+    NOTE: MIPRO support is not yet implemented in synth-ai.
+    This configuration class exists for future compatibility.
+    Use GEPA algorithm for prompt optimization.
+    """
+    num_iterations: int = 20
+    num_evaluations_per_iteration: int = 5
+    batch_size: int = 32
+    max_concurrent: int = 20
+    env_name: str = "banking77"
+    env_config: dict[str, Any] | None = None
+    meta_model: str = "gpt-4o-mini"
+    meta_model_provider: str = "openai"
+    meta_model_inference_url: str | None = None
+    few_shot_score_threshold: float = 0.8
+    results_file: str | None = None
+    max_wall_clock_seconds: float | None = None
+    max_total_tokens: int | None = None
+    # TPE configuration
+    tpe: dict[str, Any] | None = None
+    # Demo configuration
+    demo: dict[str, Any] | None = None
+    # Grounding configuration
+    grounding: dict[str, Any] | None = None
+    # Meta-update configuration
+    meta_update: dict[str, Any] | None = None
+    # Bootstrap seeds (for few-shot examples)
+    bootstrap_train_seeds: list[int] | None = None
+    # Online pool (for mini-batch evaluation)
+    online_pool: list[int] | None = None
+    # Test pool (held-out seeds)
+    test_pool: list[int] | None = None
+# GEPA nested configs (mirroring RL structure)
+class GEPARolloutConfig(ExtraModel):
+    """GEPA rollout configuration (mirrors RL [rollout] section)."""
+    budget: int | None = None  # Total rollout budget
+    max_concurrent: int = 20  # Maximum concurrent rollouts
+    minibatch_size: int = 8  # Minibatch size for evaluation
+class GEPAEvaluationConfig(ExtraModel):
+    """GEPA evaluation configuration (mirrors RL [evaluation] section)."""
+    seeds: list[int] | None = None  # Evaluation seeds (training set)
+    validation_seeds: list[int] | None = None  # Validation seeds (held-out)
+    test_pool: list[int] | None = None  # Test pool (final evaluation)
+    validation_pool: str | None = None  # Pool name for validation (e.g., "validation")
+    validation_top_k: int | None = None  # Top-K prompts to validate
+class GEPAMutationConfig(ExtraModel):
+    """GEPA mutation configuration (LLM-guided mutation settings)."""
+    rate: float = 0.3  # Mutation rate
+    llm_model: str | None = None  # Model for generating mutations
+    llm_provider: str = "groq"  # Provider for mutation LLM
+    llm_inference_url: str | None = None  # Custom inference URL
+    prompt: str | None = None  # Custom mutation prompt
+class GEPAPopulationConfig(ExtraModel):
+    """GEPA population configuration (evolution parameters)."""
+    initial_size: int = 20  # Initial population size
+    num_generations: int = 10  # Number of generations
+    children_per_generation: int = 5  # Children generated per generation
+    crossover_rate: float = 0.5  # Crossover rate
+    selection_pressure: float = 1.0  # Pareto selection pressure
+    patience_generations: int = 3  # Early stopping patience
+class GEPAArchiveConfig(ExtraModel):
+    """GEPA archive configuration (Pareto archive settings)."""
+    size: int = 64  # Archive size
+    pareto_set_size: int = 64  # Pareto set size
+    pareto_eps: float = 1e-6  # Pareto epsilon
+    feedback_fraction: float = 0.5  # Fraction of archive for feedback
+class GEPATokenConfig(ExtraModel):
+    """GEPA token and budget configuration."""
+    max_limit: int | None = None  # Maximum tokens allowed in prompt
+    counting_model: str = "gpt-4"  # Model for token counting
+    enforce_pattern_limit: bool = True  # Enforce token limit on patterns
+    max_spend_usd: float | None = None  # Maximum spend in USD
+class GEPAConfig(ExtraModel):
+    """GEPA-specific configuration with nested subsections."""
+    # Top-level fields (for backwards compatibility)
+    env_name: str = "banking77"
+    env_config: dict[str, Any] | None = None
+    rng_seed: int | None = None
+    proposer_type: str = "dspy"  # "dspy" or "synth"
+    # Nested subsections (preferred, mirrors RL structure)
+    rollout: GEPARolloutConfig | None = None
+    evaluation: GEPAEvaluationConfig | None = None
+    mutation: GEPAMutationConfig | None = None
+    population: GEPAPopulationConfig | None = None
+    archive: GEPAArchiveConfig | None = None
+    token: GEPATokenConfig | None = None
+    # Backwards compatibility: flat fields (deprecated, prefer nested)
+    # These will be flattened from nested configs if provided
+    rollout_budget: int | None = None
+    max_concurrent_rollouts: int | None = None
+    minibatch_size: int | None = None
+    evaluation_seeds: list[int] | None = None
+    validation_seeds: list[int] | None = None
+    test_pool: list[int] | None = None
+    validation_pool: str | None = None
+    validation_top_k: int | None = None
+    mutation_rate: float | None = None
+    mutation_llm_model: str | None = None
+    mutation_llm_provider: str | None = None
+    mutation_llm_inference_url: str | None = None
+    mutation_prompt: str | None = None
+    initial_population_size: int | None = None
+    num_generations: int | None = None
+    children_per_generation: int | None = None
+    crossover_rate: float | None = None
+    selection_pressure: float | None = None
+    patience_generations: int | None = None
+    archive_size: int | None = None
+    pareto_set_size: int | None = None
+    pareto_eps: float | None = None
+    feedback_fraction: float | None = None
+    max_token_limit: int | None = None
+    token_counting_model: str | None = None
+    enforce_pattern_token_limit: bool | None = None
+    max_spend_usd: float | None = None
+    def _get_rollout_budget(self) -> int | None:
+        """Get rollout budget from nested or flat structure."""
+        if self.rollout and self.rollout.budget is not None:
+            return self.rollout.budget
+        return self.rollout_budget
+    def _get_max_concurrent_rollouts(self) -> int:
+        """Get max concurrent rollouts from nested or flat structure."""
+        if self.rollout and self.rollout.max_concurrent is not None:
+            return self.rollout.max_concurrent
+        return self.max_concurrent_rollouts or 20
+    def _get_minibatch_size(self) -> int:
+        """Get minibatch size from nested or flat structure."""
+        if self.rollout and self.rollout.minibatch_size is not None:
+            return self.rollout.minibatch_size
+        return self.minibatch_size or 8
+    def _get_evaluation_seeds(self) -> list[int] | None:
+        """Get evaluation seeds from nested or flat structure."""
+        if self.evaluation and self.evaluation.seeds is not None:
+            return self.evaluation.seeds
+        return self.evaluation_seeds
+    def _get_validation_seeds(self) -> list[int] | None:
+        """Get validation seeds from nested or flat structure."""
+        if self.evaluation and self.evaluation.validation_seeds is not None:
+            return self.evaluation.validation_seeds
+        return self.validation_seeds
+    def _get_test_pool(self) -> list[int] | None:
+        """Get test pool from nested or flat structure."""
+        if self.evaluation and self.evaluation.test_pool is not None:
+            return self.evaluation.test_pool
+        return self.test_pool
+    def _get_mutation_rate(self) -> float:
+        """Get mutation rate from nested or flat structure."""
+        if self.mutation and self.mutation.rate is not None:
+            return self.mutation.rate
+        return self.mutation_rate or 0.3
+    def _get_mutation_llm_model(self) -> str | None:
+        """Get mutation LLM model from nested or flat structure."""
+        if self.mutation and self.mutation.llm_model is not None:
+            return self.mutation.llm_model
+        return self.mutation_llm_model
+    def _get_mutation_llm_provider(self) -> str:
+        """Get mutation LLM provider from nested or flat structure."""
+        if self.mutation and self.mutation.llm_provider is not None:
+            return self.mutation.llm_provider
+        return self.mutation_llm_provider or "groq"
+    def _get_mutation_llm_inference_url(self) -> str | None:
+        """Get mutation LLM inference URL from nested or flat structure."""
+        if self.mutation and self.mutation.llm_inference_url is not None:
+            return self.mutation.llm_inference_url
+        return self.mutation_llm_inference_url
+    def _get_mutation_prompt(self) -> str | None:
+        """Get mutation prompt from nested or flat structure."""
+        if self.mutation and self.mutation.prompt is not None:
+            return self.mutation.prompt
+        return self.mutation_prompt
+    def _get_initial_population_size(self) -> int:
+        """Get initial population size from nested or flat structure."""
+        if self.population and self.population.initial_size is not None:
+            return self.population.initial_size
+        return self.initial_population_size or 20
+    def _get_num_generations(self) -> int:
+        """Get num generations from nested or flat structure."""
+        if self.population and self.population.num_generations is not None:
+            return self.population.num_generations
+        return self.num_generations or 10
+    def _get_children_per_generation(self) -> int:
+        """Get children per generation from nested or flat structure."""
+        if self.population and self.population.children_per_generation is not None:
+            return self.population.children_per_generation
+        return self.children_per_generation or 5
+    def _get_crossover_rate(self) -> float:
+        """Get crossover rate from nested or flat structure."""
+        if self.population and self.population.crossover_rate is not None:
+            return self.population.crossover_rate
+        return self.crossover_rate or 0.5
+    def _get_selection_pressure(self) -> float:
+        """Get selection pressure from nested or flat structure."""
+        if self.population and self.population.selection_pressure is not None:
+            return self.population.selection_pressure
+        return self.selection_pressure or 1.0
+    def _get_patience_generations(self) -> int:
+        """Get patience generations from nested or flat structure."""
+        if self.population and self.population.patience_generations is not None:
+            return self.population.patience_generations
+        return self.patience_generations or 3
+    def _get_archive_size(self) -> int:
+        """Get archive size from nested or flat structure."""
+        if self.archive and self.archive.size is not None:
+            return self.archive.size
+        return self.archive_size or 64
+    def _get_pareto_set_size(self) -> int:
+        """Get pareto set size from nested or flat structure."""
+        if self.archive and self.archive.pareto_set_size is not None:
+            return self.archive.pareto_set_size
+        return self.pareto_set_size or 64
+    def _get_pareto_eps(self) -> float:
+        """Get pareto eps from nested or flat structure."""
+        if self.archive and self.archive.pareto_eps is not None:
+            return self.archive.pareto_eps
+        return self.pareto_eps or 1e-6
+    def _get_feedback_fraction(self) -> float:
+        """Get feedback fraction from nested or flat structure."""
+        if self.archive and self.archive.feedback_fraction is not None:
+            return self.archive.feedback_fraction
+        return self.feedback_fraction or 0.5
+    def _get_max_token_limit(self) -> int | None:
+        """Get max token limit from nested or flat structure."""
+        if self.token and self.token.max_limit is not None:
+            return self.token.max_limit
+        return self.max_token_limit
+    def _get_token_counting_model(self) -> str:
+        """Get token counting model from nested or flat structure."""
+        if self.token and self.token.counting_model is not None:
+            return self.token.counting_model
+        return self.token_counting_model or "gpt-4"
+    def _get_enforce_pattern_token_limit(self) -> bool:
+        """Get enforce pattern token limit from nested or flat structure."""
+        if self.token and self.token.enforce_pattern_limit is not None:
+            return self.token.enforce_pattern_limit
+        return self.enforce_pattern_token_limit if self.enforce_pattern_token_limit is not None else True
+    def _get_max_spend_usd(self) -> float | None:
+        """Get max spend USD from nested or flat structure."""
+        if self.token and self.token.max_spend_usd is not None:
+            return self.token.max_spend_usd
+        return self.max_spend_usd
+    @classmethod
+    def from_mapping(cls, data: Mapping[str, Any]) -> GEPAConfig:
+        """Load GEPA config from dict/TOML, handling both nested and flat structures."""
+        # Check for nested structure first
+        nested_data = {}
+        flat_data = {}
+        for key, value in data.items():
+            if key in ("rollout", "evaluation", "mutation", "population", "archive", "token"):
+                nested_data[key] = value
+            else:
+                flat_data[key] = value
+        # If we have nested data, create nested configs
+        if nested_data:
+            if "rollout" in nested_data:
+                nested_data["rollout"] = GEPARolloutConfig.model_validate(nested_data["rollout"])
+            if "evaluation" in nested_data:
+                nested_data["evaluation"] = GEPAEvaluationConfig.model_validate(nested_data["evaluation"])
+            if "mutation" in nested_data:
+                nested_data["mutation"] = GEPAMutationConfig.model_validate(nested_data["mutation"])
+            if "population" in nested_data:
+                nested_data["population"] = GEPAPopulationConfig.model_validate(nested_data["population"])
+            if "archive" in nested_data:
+                nested_data["archive"] = GEPAArchiveConfig.model_validate(nested_data["archive"])
+            if "token" in nested_data:
+                nested_data["token"] = GEPATokenConfig.model_validate(nested_data["token"])
+        # Merge nested and flat data
+        merged_data = {**flat_data, **nested_data}
+        return cls.model_validate(merged_data)
+class PromptLearningConfig(ExtraModel):
+    """Top-level prompt learning configuration."""
+    algorithm: str  # "mipro" or "gepa"
+    task_app_url: str
+    task_app_api_key: str | None = None
+    task_app_id: str | None = None
+    initial_prompt: PromptPatternConfig | None = None
+    policy: PromptLearningPolicyConfig | None = None
+    mipro: MIPROConfig | None = None
+    gepa: GEPAConfig | None = None
+    env_config: dict[str, Any] | None = None
+    def to_dict(self) -> dict[str, Any]:
+        """Convert config to dictionary for API payload."""
+        result = self.model_dump(mode="python", exclude_none=True)
+        # Ensure prompt_learning section wraps everything
+        if "prompt_learning" not in result:
+            pl_data = dict(result.items())
+            result = {"prompt_learning": pl_data}
+        return result
+    @classmethod
+    def from_mapping(cls, data: Mapping[str, Any]) -> PromptLearningConfig:
+        """Load prompt learning config from dict/TOML mapping."""
+        # Handle both [prompt_learning] section and flat structure
+        pl_data = data.get("prompt_learning", {})
+        if not pl_data:
+            # If no prompt_learning section, assume top-level is prompt_learning
+            pl_data = dict(data)
+        # Handle gepa config specially to support nested structure
+        if "gepa" in pl_data and isinstance(pl_data["gepa"], dict):
+            gepa_data = pl_data["gepa"]
+            pl_data["gepa"] = GEPAConfig.from_mapping(gepa_data)
+        return cls.model_validate(pl_data)
+    @classmethod
+    def from_path(cls, path: Path) -> PromptLearningConfig:
+        """Load prompt learning config from TOML file."""
+        content = load_toml(path)
+        return cls.from_mapping(content)
+__all__ = [
+    "GEPAConfig",
+    "GEPARolloutConfig",
+    "GEPAEvaluationConfig",
+    "GEPAMutationConfig",
+    "GEPAPopulationConfig",
+    "GEPAArchiveConfig",
+    "GEPATokenConfig",
+    "MIPROConfig",
+    "MessagePatternConfig",
+    "PromptLearningConfig",
+    "PromptLearningPolicyConfig",
+    "PromptPatternConfig",
+]

synth_ai/api/train/configs/rl.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 from pydantic import model_validator
 from ..utils import load_toml
-from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
+from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
 class RLServicesConfig(ExtraModel):
@@ -48,6 +48,16 @@ class WeightSyncConfig(ExtraModel):
     verify_every_k: int | None = None
+class RewardsConfig(ExtraModel):
+    """Rewards configuration for RL training."""
+    step_rewards_enabled: bool | None = None
+    step_rewards_mode: str | None = None
+    step_rewards_indicator_lambda: float | None = None
+    step_rewards_beta: float | None = None
+    step_rewards_strategy: str | None = None
+    event_rewards_kind: str | None = None
 class RLTrainingConfig(ExtraModel):
     num_epochs: int
     iterations_per_epoch: int
@@ -59,13 +69,17 @@ class RLTrainingConfig(ExtraModel):
     learning_rate: float
     log_interval: int | None = None
     weight_sync_interval: int | None = None
+    # DEPRECATED: flat reward fields (use rewards.* instead)
     step_rewards_enabled: bool | None = None
     step_rewards_mode: str | None = None
     step_rewards_indicator_lambda: float | None = None
     step_rewards_beta: float | None = None
     step_rewards_strategy: str | None = None
     event_rewards_kind: str | None = None
+    # NEW: nested configs
     weight_sync: WeightSyncConfig | None = None
+    lora: LoraConfig | None = None
+    rewards: RewardsConfig | None = None
 class EvaluationConfig(ExtraModel):
@@ -86,34 +100,73 @@ class JudgeOptionsConfig(ExtraModel):
     max_concurrency: int | None = None
+class RubricConfig(ExtraModel):
+    """Rubric configuration for reward blending."""
+    enabled: bool = False
+    reward_blend: dict[str, float] | None = None  # env, event, outcome weights
 class JudgeConfig(ExtraModel):
     type: str | None = None
     timeout_s: int | None = None
+    enabled: bool | None = None  # Master switch for judge/rubric
+    reward_blend: dict[str, float] | None = None  # NEW: nested reward blending (replaces rubric.weights)
+    rubric: RubricConfig | None = None  # DEPRECATED: use flat fields instead
     options: JudgeOptionsConfig | None = None
+class SmokeConfig(ExtraModel):
+    """Configuration for local smoke testing (CLI only, ignored by trainer)."""
+    # Test parameters
+    task_url: str | None = None
+    env_name: str | None = None
+    policy_name: str | None = None
+    max_steps: int | None = None
+    policy: str | None = None  # mock, gpt-5-nano, openai, groq
+    model: str | None = None
+    mock_backend: str | None = None  # synthetic or openai
+    mock_port: int | None = None
+    return_trace: bool | None = None
+    use_mock: bool | None = None
+    # Task app auto-start configuration
+    task_app_name: str | None = None  # Task app to serve (e.g., "grpo-crafter")
+    task_app_port: int | None = None  # Port for task app (default: 8765)
+    task_app_env_file: str | None = None  # Path to .env file for task app
+    task_app_force: bool | None = None  # Use --force flag when serving
+    # sqld auto-start configuration
+    sqld_auto_start: bool | None = None  # Auto-start sqld server
+    sqld_db_path: str | None = None  # Database path (default: ./traces/local.db)
+    sqld_hrana_port: int | None = None  # Hrana WebSocket port (default: 8080)
+    sqld_http_port: int | None = None  # HTTP API port (default: 8081)
 class RLConfig(ExtraModel):
     algorithm: AlgorithmConfig
     services: RLServicesConfig
     compute: ComputeConfig | None = None
-    topology: dict[str, Any] | None = None
+    topology: dict[str, Any] | None = None  # DEPRECATED: use compute.topology instead
     vllm: dict[str, Any] | None = None
-    reference: dict[str, Any] | None = None
-    model: ModelConfig
-    lora: dict[str, Any] | None = None
+    reference: dict[str, Any] | None = None  # DEPRECATED: use compute.topology.reference_placement instead
+    model: ModelConfig | None = None  # DEPRECATED: use policy instead
+    policy: PolicyConfig | None = None  # NEW: unified policy (preferred)
+    lora: dict[str, Any] | None = None  # DEPRECATED: use training.lora instead
     rollout: RolloutConfig | None = None
     evaluation: EvaluationConfig | None = None
     training: RLTrainingConfig | None = None
-    rubric: dict[str, Any] | None = None
+    rubric: dict[str, Any] | None = None  # DEPRECATED: use judge.reward_blend and judge.enabled instead
     judge: JudgeConfig | None = None
     tags: dict[str, Any] | None = None
+    smoke: SmokeConfig | None = None  # CLI-only: local smoke testing config (ignored by trainer)
     def to_dict(self) -> dict[str, Any]:
         return self.model_dump(mode="python", exclude_none=True)
     @classmethod
     def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
-        return cls.model_validate(dict(data))
+        """Load RL config from dict/TOML mapping."""
+        return cls.model_validate(data)
     @classmethod
     def from_path(cls, path: Path) -> RLConfig:
@@ -130,5 +183,6 @@ __all__ = [
     "RLServicesConfig",
     "RLTrainingConfig",
     "RolloutConfig",
+    "SmokeConfig",
     "WeightSyncConfig",
 ]

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl