PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py CHANGED Viewed

@@ -466,11 +466,20 @@ async def step_policy(
             if tracing_context is not None:
                 try:
+                    print(
+                        f"[TRACE_DEBUG] record_policy_prompts sys={len(system_prompt_records)} user={len(user_prompt_records)}",
+                        flush=True,
+                    )
                     await tracing_context.record_policy_prompts(
                         system_prompt_records, user_prompt_records
                     )
                 except Exception as exc:
                     logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
+            else:
+                print(
+                    f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
+                    flush=True,
+                )
             # Create inference client (choose API key by target provider)
             # Require inference_url to be set explicitly by the rollout policy config.
@@ -492,7 +501,11 @@ async def step_policy(
                 if isinstance(target_url, str):
                     low_url = target_url.lower()
                     # Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
-                    if "/proxy/groq" in low_url or "/proxy/openai" in low_url:
+                    if (
+                        "/proxy/groq" in low_url
+                        or "/proxy/openai" in low_url
+                        or "/proxy/v1" in low_url
+                    ):
                         api_key_override = None
                     elif "openai.com" in low_url:
                         api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
@@ -692,9 +705,10 @@ async def step_policy(
                 "sokoban-react",
                 "crafter-react",
             ) and getattr(policy, "use_tools", True):
-                req_tools = meta["inference_request"]["tools"]
-                req_tool_choice = meta["inference_request"]["tool_choice"]
-                req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
+                inf_req = meta.get("inference_request", {})
+                req_tools = inf_req.get("tools")
+                req_tool_choice = inf_req.get("tool_choice")
+                req_stop_after = inf_req.get("stop_after_tool_calls")
                 logger.info(
                     f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
                 )
@@ -703,6 +717,8 @@ async def step_policy(
                         status_code=500,
                         detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
                     )
+                if req_stop_after is None:
+                    inf_req["stop_after_tool_calls"] = 1
             # Call inference service with retries for Flash cold-start (503)
             import time as _t
@@ -951,6 +967,23 @@ async def step_policy(
                 except Exception as exc:
                     logger.debug(f"TRACING_LLM_FAIL: {exc}")
+        if not tool_calls:
+            preview = ""
+            try:
+                preview = str(meta.get("raw_response") or "")[:400]
+            except Exception:
+                preview = "<unavailable>"
+            logger.error(
+                {
+                    "rollout.policy_step": True,
+                    "policy_id": request.policy_id,
+                    "error": "no_tool_calls",
+                    "inference_url": meta.get("inference_url"),
+                    "raw_preview": preview,
+                }
+            )
+            raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
         return PolicyStepResponse(
             tool_calls=tool_calls,
             meta=meta,

examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py CHANGED Viewed

@@ -223,6 +223,7 @@ class RolloutTracingContext:
         ).lower()
         self.return_trace = bool(getattr(request.record, "return_trace", False))
         self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
+        print(f"[TRACE_DEBUG] RolloutTracingContext init: trace_format={self.trace_format} return_trace={self.return_trace}", flush=True)
         self.session_trace = None
         self.metadata_updates: dict[str, Any] = {}
         self.policy_name = request.policy.policy_name or ""
@@ -244,19 +245,24 @@ class RolloutTracingContext:
     async def start_session(self) -> None:
         if not self.enabled or self.tracer is None:
+            print("[TRACE_DEBUG] start_session skipped: tracer disabled", flush=True)
             return
         try:
             await self.tracer.initialize()
+            print("[TRACE_DEBUG] tracer initialized", flush=True)
         except Exception as exc:
             logger.debug("TRACING_INIT_FAIL: %s", exc)
+            # Hard fail: tracing requested but cannot initialize
+            raise
         try:
             await self.tracer.start_session(
                 session_id=self.run_id, metadata=dict(self.metadata_base)
             )
+            print(f"[TRACE_DEBUG] start_session succeeded for run_id={self.run_id}", flush=True)
         except Exception as exc:
             logger.warning("TRACING_START_FAIL: %s", exc)
-            self.enabled = False
-            self.tracer = None
+            # Hard fail: tracing requested but cannot start session
+            raise
     async def start_decision(self, turn_number: int) -> None:
         self.current_turn = turn_number
@@ -317,6 +323,9 @@ class RolloutTracingContext:
                 )
             except Exception as exc:
                 logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
+        if self.tracer and self.tracer._current_trace:
+            msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
+            print(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages", flush=True)
     def _content_to_text(self, content: Any) -> str:
         if isinstance(content, str):
@@ -395,6 +404,11 @@ class RolloutTracingContext:
                     message_type="policy_tool_call",
                     metadata=self._message_metadata(),
                 )
+                if self.tracer._current_trace:
+                    print(
+                        f"[TRACE_DEBUG] After tool invocation: messages={len(self.tracer._current_trace.markov_blanket_message_history)}",
+                        flush=True,
+                    )
             except Exception as exc:
                 logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
@@ -664,12 +678,24 @@ class RolloutTracingContext:
             except Exception as exc:
                 logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
             try:
+                if self.tracer._current_trace:
+                    msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
+                    print(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace", flush=True)
                 self.session_trace = await self.tracer.end_session()
                 if self.session_trace is not None:
                     self.session_trace.metadata.update(self.metadata_updates)
+                    print(
+                        f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}",
+                        flush=True,
+                    )
+                    print(
+                        f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}",
+                        flush=True,
+                    )
             except Exception as exc:
                 logger.debug("TRACING_END_SESSION_FAIL: %s", exc)
                 self.session_trace = None
+                print(f"[TRACE_DEBUG] end_session failed for run_id={self.run_id}: {exc}", flush=True)
             with contextlib.suppress(Exception):
                 await self.tracer.close()
@@ -700,9 +726,13 @@ class RolloutTracingContext:
     def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
         if not self.return_trace or session_trace is None:
             return None
-        if self.trace_format == "full":
+        if self.trace_format in ("full", "structured"):
             payload = session_trace.to_dict()
             payload.setdefault("metadata", {}).update(self.metadata_updates)
+            print(
+                f"[TRACE_DEBUG] build_trace_payload returning structured trace with messages={len(payload.get('markov_blanket_message_history') or [])}",
+                flush=True,
+            )
             return payload
         metadata = dict(session_trace.metadata)
         metadata.update(self.metadata_updates)

examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Utility functions for the task service."""
 from typing import Any
+from urllib.parse import urlparse, urlunparse
 import numpy as np
@@ -60,3 +61,69 @@ def sanitize_observation(observation: dict[str, Any]) -> dict[str, Any]:
             sanitized[key] = convert_numpy_to_python(value)
     return sanitized
+_CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
+def force_normalize_chat_completions_url(raw_url: Any) -> Any:
+    """
+    Convert ANY malformed inference URL into the correct chat-completions form.
+    Ensures path ends with /v1/chat/completions and that query has no '/' segments.
+    """
+    if not isinstance(raw_url, str):
+        return raw_url
+    url = raw_url.strip()
+    if not url:
+        return raw_url
+    parsed = urlparse(url)
+    path = (parsed.path or "").rstrip("/")
+    query = parsed.query or ""
+    # If query contains a path, extract and repair
+    if query and "/" in query:
+        before_slash, after_slash = query.split("/", 1)
+        cut_positions = [i for i in [after_slash.find("&"), after_slash.find("?")] if i >= 0]
+        cut = min(cut_positions) if cut_positions else len(after_slash)
+        path_from_query = "/" + after_slash[:cut]
+        extra_query = after_slash[cut + 1 :] if cut < len(after_slash) else ""
+        merged_query = before_slash if before_slash else ""
+        if extra_query:
+            merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
+        final_path = (
+            path_from_query
+            if path_from_query.startswith(_CHAT_COMPLETIONS_SUFFIX)
+            else f"{path_from_query.rstrip('/')}{_CHAT_COMPLETIONS_SUFFIX}"
+        )
+        parsed = parsed._replace(path=final_path, query=merged_query)
+        url = urlunparse(parsed)
+        parsed = urlparse(url)
+        path = parsed.path or ""
+        query = parsed.query or ""
+    # Ensure path suffix
+    if not path.endswith(_CHAT_COMPLETIONS_SUFFIX):
+        new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}" if path else _CHAT_COMPLETIONS_SUFFIX
+        parsed = parsed._replace(path=new_path)
+        url = urlunparse(parsed)
+        parsed = urlparse(url)
+        path = parsed.path or ""
+        query = parsed.query or ""
+    # Last-resort: strip any '/' from query
+    if query and "/" in query:
+        safe_query = query.split("/")[0]
+        parsed = parsed._replace(query=safe_query)
+        url = urlunparse(parsed)
+    return url
+def ensure_chat_completions_url(raw_url: Any, mode: Any = None) -> Any:
+    """
+    Mode-aware normalizer (RL/EVAL) that returns a valid chat completions URL and
+    preserves existing query parameters.
+    """
+    # For now reuse force normalizer in both modes to guarantee correctness
+    return force_normalize_chat_completions_url(raw_url)

examples/workflows/math_rl/configs/rl_from_base_qwen.toml CHANGED Viewed

@@ -1,8 +1,15 @@
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
 [services]
 task_url = "https://your-math-task.modal.run"
 [model]
 base = "Qwen/Qwen3-4B"
+trainer_mode = "full"
+label = "math-single-step-qwen3-4b"
 [policy]
 model = "Qwen/Qwen3-4B"
@@ -18,6 +25,8 @@ evaluation_split = "validation"
 evaluation_episodes = 256
 [training]
+num_epochs = 1
+iterations_per_epoch = 20
 max_turns = 1
 ops = ["agent", "env"]
 batch_size = 128
@@ -31,5 +40,23 @@ learning_rate = 5e-6
 gpu_type = "A10G"
 gpu_count = 4
+[topology]
+type = "single_node_split"
+gpus_for_vllm = 2
+gpus_for_training = 2
+gpus_for_ref = 0
+tensor_parallel = 1
+[rollout]
+env_name = "math"
+policy_name = "math-single-step"
+max_turns = 1
+episodes_per_batch = 256
+[evaluation]
+instances = 256
+every_n_iters = 10
+seeds = [0, 1, 2, 3, 4]
 [tags]
 experiment = "math_single_step"

examples/workflows/math_rl/configs/rl_from_base_qwen17.toml CHANGED Viewed

@@ -8,6 +8,8 @@ task_url = "http://localhost:8101"
 [model]
 base = "Qwen/Qwen3-1.7B"
+trainer_mode = "full"
+label = "math-single-step-qwen3-1.7b"
 [policy]
 model = "Qwen/Qwen3-1.7B"
@@ -23,6 +25,8 @@ evaluation_split = "validation"
 evaluation_episodes = 50
 [training]
+num_epochs = 1
+iterations_per_epoch = 20
 max_turns = 1
 ops = ["agent", "env"]
 batch_size = 2
@@ -61,9 +65,11 @@ health_max_wait_s = 180
 health_interval_ms = 300
 [rollout]
+env_name = "math"
 policy_name = "math-single-step"
 max_turns = 1
 episodes_per_batch = 32  # group_size * batch_size
+task_app_origin_rewards_only = true
 [evaluation]
 instances = 32

synth_ai/api/train/builders.py CHANGED Viewed

@@ -33,7 +33,7 @@ try:
 except Exception as exc:  # pragma: no cover - critical dependency
     raise RuntimeError("Unable to load SFT payload helpers") from exc
-from .configs import RLConfig, SFTConfig
+from .configs import PromptLearningConfig, RLConfig, SFTConfig
 from .supported_algos import (
     AlgorithmValidationError,
     ensure_model_supported_for_algorithm,
@@ -56,6 +56,12 @@ class SFTBuildResult:
     validation_file: Path | None
+@dataclass(slots=True)
+class PromptLearningBuildResult:
+    payload: dict[str, Any]
+    task_url: str
 def _format_validation_error(path: Path, exc: ValidationError) -> str:
     lines: list[str] = []
     for error in exc.errors():
@@ -74,12 +80,23 @@ def build_rl_payload(
     idempotency: str | None,
     allow_experimental: bool | None = None,
 ) -> RLBuildResult:
+    # Load and validate config with SDK-level checks
+    from synth_ai.api.train.utils import load_toml
+    from synth_ai.cli.commands.train.validation import validate_rl_config
     try:
-        rl_cfg = RLConfig.from_path(config_path)
+        raw_config = load_toml(config_path)
+        validated_config = validate_rl_config(raw_config)  # Adds defaults & validates
+        rl_cfg = RLConfig.from_mapping(validated_config)
     except ValidationError as exc:
         raise click.ClickException(_format_validation_error(config_path, exc)) from exc
     data = rl_cfg.to_dict()
+    # Remove smoke section - it's CLI-only and should not be sent to the trainer
+    if "smoke" in data:
+        del data["smoke"]
     # Ensure required [reference] section for backend validators
     try:
         ref_cfg = data.get("reference") if isinstance(data, dict) else None
@@ -110,8 +127,8 @@ def build_rl_payload(
             "Task app URL required (provide --task-url or set services.task_url in TOML)"
         )
-    model_source = (model_cfg.source or "").strip()
-    model_base = (model_cfg.base or "").strip()
+    model_source = (model_cfg.source or "").strip() if model_cfg else ""
+    model_base = (model_cfg.base or "").strip() if model_cfg else ""
     override_model = (overrides.get("model") or "").strip()
     if override_model:
         model_source = override_model
@@ -343,9 +360,87 @@ def build_sft_payload(
     return SFTBuildResult(payload=payload, train_file=dataset_path, validation_file=validation_file)
+def build_prompt_learning_payload(
+    *,
+    config_path: Path,
+    task_url: str | None,
+    overrides: dict[str, Any],
+    allow_experimental: bool | None = None,
+) -> PromptLearningBuildResult:
+    """Build payload for prompt learning job (MIPRO or GEPA)."""
+    import os
+    from pydantic import ValidationError
+    from .configs.prompt_learning import load_toml
+    # SDK-SIDE VALIDATION: Catch errors BEFORE sending to backend
+    from .validators import validate_prompt_learning_config
+    raw_config = load_toml(config_path)
+    validate_prompt_learning_config(raw_config, config_path)
+    try:
+        pl_cfg = PromptLearningConfig.from_path(config_path)
+    except ValidationError as exc:
+        raise click.ClickException(_format_validation_error(config_path, exc)) from exc
+    # Source of truth: TOML only (ignore shell/env and CLI overrides)
+    final_task_url = (pl_cfg.task_app_url or "").strip()
+    if not final_task_url:
+        raise click.ClickException(
+            "Task app URL required (provide --task-url or set prompt_learning.task_app_url in TOML)"
+        )
+    # Get task_app_api_key from config or environment
+    task_app_api_key = (
+        pl_cfg.task_app_api_key
+        or os.environ.get("ENVIRONMENT_API_KEY", "")
+    ).strip()
+    if not task_app_api_key:
+        raise click.ClickException(
+            "Task app API key required (set prompt_learning.task_app_api_key in TOML or ENVIRONMENT_API_KEY env var)"
+        )
+    # Build config dict for backend
+    config_dict = pl_cfg.to_dict()
+    # Ensure task_app_url and task_app_api_key are set
+    pl_section = config_dict.get("prompt_learning", {})
+    if isinstance(pl_section, dict):
+        pl_section["task_app_url"] = final_task_url
+        pl_section["task_app_api_key"] = task_app_api_key
+    else:
+        config_dict["prompt_learning"] = {
+            "task_app_url": final_task_url,
+            "task_app_api_key": task_app_api_key,
+        }
+    # Build payload matching backend API format
+    payload: dict[str, Any] = {
+        "algorithm": pl_cfg.algorithm,
+        "config_body": config_dict,
+        "overrides": overrides.get("overrides", {}),
+        "metadata": overrides.get("metadata", {}),
+        "auto_start": overrides.get("auto_start", True),
+    }
+    backend = overrides.get("backend")
+    if backend:
+        metadata_default: dict[str, Any] = {}
+        metadata = cast(dict[str, Any], payload.setdefault("metadata", metadata_default))
+        metadata["backend_base_url"] = ensure_api_base(str(backend))
+    return PromptLearningBuildResult(payload=payload, task_url=final_task_url)
 __all__ = [
+    "PromptLearningBuildResult",
     "RLBuildResult",
     "SFTBuildResult",
+    "build_prompt_learning_payload",
     "build_rl_payload",
     "build_sft_payload",
 ]

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl