synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -466,11 +466,20 @@ async def step_policy(
|
|
|
466
466
|
|
|
467
467
|
if tracing_context is not None:
|
|
468
468
|
try:
|
|
469
|
+
print(
|
|
470
|
+
f"[TRACE_DEBUG] record_policy_prompts sys={len(system_prompt_records)} user={len(user_prompt_records)}",
|
|
471
|
+
flush=True,
|
|
472
|
+
)
|
|
469
473
|
await tracing_context.record_policy_prompts(
|
|
470
474
|
system_prompt_records, user_prompt_records
|
|
471
475
|
)
|
|
472
476
|
except Exception as exc:
|
|
473
477
|
logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
|
|
478
|
+
else:
|
|
479
|
+
print(
|
|
480
|
+
f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
|
|
481
|
+
flush=True,
|
|
482
|
+
)
|
|
474
483
|
|
|
475
484
|
# Create inference client (choose API key by target provider)
|
|
476
485
|
# Require inference_url to be set explicitly by the rollout policy config.
|
|
@@ -492,7 +501,11 @@ async def step_policy(
|
|
|
492
501
|
if isinstance(target_url, str):
|
|
493
502
|
low_url = target_url.lower()
|
|
494
503
|
# Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
|
|
495
|
-
if
|
|
504
|
+
if (
|
|
505
|
+
"/proxy/groq" in low_url
|
|
506
|
+
or "/proxy/openai" in low_url
|
|
507
|
+
or "/proxy/v1" in low_url
|
|
508
|
+
):
|
|
496
509
|
api_key_override = None
|
|
497
510
|
elif "openai.com" in low_url:
|
|
498
511
|
api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
|
|
@@ -692,9 +705,10 @@ async def step_policy(
|
|
|
692
705
|
"sokoban-react",
|
|
693
706
|
"crafter-react",
|
|
694
707
|
) and getattr(policy, "use_tools", True):
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
708
|
+
inf_req = meta.get("inference_request", {})
|
|
709
|
+
req_tools = inf_req.get("tools")
|
|
710
|
+
req_tool_choice = inf_req.get("tool_choice")
|
|
711
|
+
req_stop_after = inf_req.get("stop_after_tool_calls")
|
|
698
712
|
logger.info(
|
|
699
713
|
f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
|
|
700
714
|
)
|
|
@@ -703,6 +717,8 @@ async def step_policy(
|
|
|
703
717
|
status_code=500,
|
|
704
718
|
detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
|
|
705
719
|
)
|
|
720
|
+
if req_stop_after is None:
|
|
721
|
+
inf_req["stop_after_tool_calls"] = 1
|
|
706
722
|
|
|
707
723
|
# Call inference service with retries for Flash cold-start (503)
|
|
708
724
|
import time as _t
|
|
@@ -951,6 +967,23 @@ async def step_policy(
|
|
|
951
967
|
except Exception as exc:
|
|
952
968
|
logger.debug(f"TRACING_LLM_FAIL: {exc}")
|
|
953
969
|
|
|
970
|
+
if not tool_calls:
|
|
971
|
+
preview = ""
|
|
972
|
+
try:
|
|
973
|
+
preview = str(meta.get("raw_response") or "")[:400]
|
|
974
|
+
except Exception:
|
|
975
|
+
preview = "<unavailable>"
|
|
976
|
+
logger.error(
|
|
977
|
+
{
|
|
978
|
+
"rollout.policy_step": True,
|
|
979
|
+
"policy_id": request.policy_id,
|
|
980
|
+
"error": "no_tool_calls",
|
|
981
|
+
"inference_url": meta.get("inference_url"),
|
|
982
|
+
"raw_preview": preview,
|
|
983
|
+
}
|
|
984
|
+
)
|
|
985
|
+
raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
|
|
986
|
+
|
|
954
987
|
return PolicyStepResponse(
|
|
955
988
|
tool_calls=tool_calls,
|
|
956
989
|
meta=meta,
|
|
@@ -223,6 +223,7 @@ class RolloutTracingContext:
|
|
|
223
223
|
).lower()
|
|
224
224
|
self.return_trace = bool(getattr(request.record, "return_trace", False))
|
|
225
225
|
self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
|
|
226
|
+
print(f"[TRACE_DEBUG] RolloutTracingContext init: trace_format={self.trace_format} return_trace={self.return_trace}", flush=True)
|
|
226
227
|
self.session_trace = None
|
|
227
228
|
self.metadata_updates: dict[str, Any] = {}
|
|
228
229
|
self.policy_name = request.policy.policy_name or ""
|
|
@@ -244,19 +245,24 @@ class RolloutTracingContext:
|
|
|
244
245
|
|
|
245
246
|
async def start_session(self) -> None:
|
|
246
247
|
if not self.enabled or self.tracer is None:
|
|
248
|
+
print("[TRACE_DEBUG] start_session skipped: tracer disabled", flush=True)
|
|
247
249
|
return
|
|
248
250
|
try:
|
|
249
251
|
await self.tracer.initialize()
|
|
252
|
+
print("[TRACE_DEBUG] tracer initialized", flush=True)
|
|
250
253
|
except Exception as exc:
|
|
251
254
|
logger.debug("TRACING_INIT_FAIL: %s", exc)
|
|
255
|
+
# Hard fail: tracing requested but cannot initialize
|
|
256
|
+
raise
|
|
252
257
|
try:
|
|
253
258
|
await self.tracer.start_session(
|
|
254
259
|
session_id=self.run_id, metadata=dict(self.metadata_base)
|
|
255
260
|
)
|
|
261
|
+
print(f"[TRACE_DEBUG] start_session succeeded for run_id={self.run_id}", flush=True)
|
|
256
262
|
except Exception as exc:
|
|
257
263
|
logger.warning("TRACING_START_FAIL: %s", exc)
|
|
258
|
-
|
|
259
|
-
|
|
264
|
+
# Hard fail: tracing requested but cannot start session
|
|
265
|
+
raise
|
|
260
266
|
|
|
261
267
|
async def start_decision(self, turn_number: int) -> None:
|
|
262
268
|
self.current_turn = turn_number
|
|
@@ -317,6 +323,9 @@ class RolloutTracingContext:
|
|
|
317
323
|
)
|
|
318
324
|
except Exception as exc:
|
|
319
325
|
logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
|
|
326
|
+
if self.tracer and self.tracer._current_trace:
|
|
327
|
+
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
328
|
+
print(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages", flush=True)
|
|
320
329
|
|
|
321
330
|
def _content_to_text(self, content: Any) -> str:
|
|
322
331
|
if isinstance(content, str):
|
|
@@ -395,6 +404,11 @@ class RolloutTracingContext:
|
|
|
395
404
|
message_type="policy_tool_call",
|
|
396
405
|
metadata=self._message_metadata(),
|
|
397
406
|
)
|
|
407
|
+
if self.tracer._current_trace:
|
|
408
|
+
print(
|
|
409
|
+
f"[TRACE_DEBUG] After tool invocation: messages={len(self.tracer._current_trace.markov_blanket_message_history)}",
|
|
410
|
+
flush=True,
|
|
411
|
+
)
|
|
398
412
|
except Exception as exc:
|
|
399
413
|
logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
|
|
400
414
|
|
|
@@ -664,12 +678,24 @@ class RolloutTracingContext:
|
|
|
664
678
|
except Exception as exc:
|
|
665
679
|
logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
|
|
666
680
|
try:
|
|
681
|
+
if self.tracer._current_trace:
|
|
682
|
+
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
683
|
+
print(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace", flush=True)
|
|
667
684
|
self.session_trace = await self.tracer.end_session()
|
|
668
685
|
if self.session_trace is not None:
|
|
669
686
|
self.session_trace.metadata.update(self.metadata_updates)
|
|
687
|
+
print(
|
|
688
|
+
f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}",
|
|
689
|
+
flush=True,
|
|
690
|
+
)
|
|
691
|
+
print(
|
|
692
|
+
f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}",
|
|
693
|
+
flush=True,
|
|
694
|
+
)
|
|
670
695
|
except Exception as exc:
|
|
671
696
|
logger.debug("TRACING_END_SESSION_FAIL: %s", exc)
|
|
672
697
|
self.session_trace = None
|
|
698
|
+
print(f"[TRACE_DEBUG] end_session failed for run_id={self.run_id}: {exc}", flush=True)
|
|
673
699
|
with contextlib.suppress(Exception):
|
|
674
700
|
await self.tracer.close()
|
|
675
701
|
|
|
@@ -700,9 +726,13 @@ class RolloutTracingContext:
|
|
|
700
726
|
def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
|
|
701
727
|
if not self.return_trace or session_trace is None:
|
|
702
728
|
return None
|
|
703
|
-
if self.trace_format
|
|
729
|
+
if self.trace_format in ("full", "structured"):
|
|
704
730
|
payload = session_trace.to_dict()
|
|
705
731
|
payload.setdefault("metadata", {}).update(self.metadata_updates)
|
|
732
|
+
print(
|
|
733
|
+
f"[TRACE_DEBUG] build_trace_payload returning structured trace with messages={len(payload.get('markov_blanket_message_history') or [])}",
|
|
734
|
+
flush=True,
|
|
735
|
+
)
|
|
706
736
|
return payload
|
|
707
737
|
metadata = dict(session_trace.metadata)
|
|
708
738
|
metadata.update(self.metadata_updates)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Utility functions for the task service."""
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
|
+
from urllib.parse import urlparse, urlunparse
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
|
|
@@ -60,3 +61,69 @@ def sanitize_observation(observation: dict[str, Any]) -> dict[str, Any]:
|
|
|
60
61
|
sanitized[key] = convert_numpy_to_python(value)
|
|
61
62
|
|
|
62
63
|
return sanitized
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def force_normalize_chat_completions_url(raw_url: Any) -> Any:
|
|
70
|
+
"""
|
|
71
|
+
Convert ANY malformed inference URL into the correct chat-completions form.
|
|
72
|
+
Ensures path ends with /v1/chat/completions and that query has no '/' segments.
|
|
73
|
+
"""
|
|
74
|
+
if not isinstance(raw_url, str):
|
|
75
|
+
return raw_url
|
|
76
|
+
url = raw_url.strip()
|
|
77
|
+
if not url:
|
|
78
|
+
return raw_url
|
|
79
|
+
|
|
80
|
+
parsed = urlparse(url)
|
|
81
|
+
path = (parsed.path or "").rstrip("/")
|
|
82
|
+
query = parsed.query or ""
|
|
83
|
+
|
|
84
|
+
# If query contains a path, extract and repair
|
|
85
|
+
if query and "/" in query:
|
|
86
|
+
before_slash, after_slash = query.split("/", 1)
|
|
87
|
+
cut_positions = [i for i in [after_slash.find("&"), after_slash.find("?")] if i >= 0]
|
|
88
|
+
cut = min(cut_positions) if cut_positions else len(after_slash)
|
|
89
|
+
path_from_query = "/" + after_slash[:cut]
|
|
90
|
+
extra_query = after_slash[cut + 1 :] if cut < len(after_slash) else ""
|
|
91
|
+
merged_query = before_slash if before_slash else ""
|
|
92
|
+
if extra_query:
|
|
93
|
+
merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
|
|
94
|
+
final_path = (
|
|
95
|
+
path_from_query
|
|
96
|
+
if path_from_query.startswith(_CHAT_COMPLETIONS_SUFFIX)
|
|
97
|
+
else f"{path_from_query.rstrip('/')}{_CHAT_COMPLETIONS_SUFFIX}"
|
|
98
|
+
)
|
|
99
|
+
parsed = parsed._replace(path=final_path, query=merged_query)
|
|
100
|
+
url = urlunparse(parsed)
|
|
101
|
+
parsed = urlparse(url)
|
|
102
|
+
path = parsed.path or ""
|
|
103
|
+
query = parsed.query or ""
|
|
104
|
+
|
|
105
|
+
# Ensure path suffix
|
|
106
|
+
if not path.endswith(_CHAT_COMPLETIONS_SUFFIX):
|
|
107
|
+
new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}" if path else _CHAT_COMPLETIONS_SUFFIX
|
|
108
|
+
parsed = parsed._replace(path=new_path)
|
|
109
|
+
url = urlunparse(parsed)
|
|
110
|
+
parsed = urlparse(url)
|
|
111
|
+
path = parsed.path or ""
|
|
112
|
+
query = parsed.query or ""
|
|
113
|
+
|
|
114
|
+
# Last-resort: strip any '/' from query
|
|
115
|
+
if query and "/" in query:
|
|
116
|
+
safe_query = query.split("/")[0]
|
|
117
|
+
parsed = parsed._replace(query=safe_query)
|
|
118
|
+
url = urlunparse(parsed)
|
|
119
|
+
|
|
120
|
+
return url
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def ensure_chat_completions_url(raw_url: Any, mode: Any = None) -> Any:
|
|
124
|
+
"""
|
|
125
|
+
Mode-aware normalizer (RL/EVAL) that returns a valid chat completions URL and
|
|
126
|
+
preserves existing query parameters.
|
|
127
|
+
"""
|
|
128
|
+
# For now reuse force normalizer in both modes to guarantee correctness
|
|
129
|
+
return force_normalize_chat_completions_url(raw_url)
|
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "online"
|
|
3
|
+
method = "policy_gradient"
|
|
4
|
+
variety = "gspo"
|
|
5
|
+
|
|
1
6
|
[services]
|
|
2
7
|
task_url = "https://your-math-task.modal.run"
|
|
3
8
|
|
|
4
9
|
[model]
|
|
5
10
|
base = "Qwen/Qwen3-4B"
|
|
11
|
+
trainer_mode = "full"
|
|
12
|
+
label = "math-single-step-qwen3-4b"
|
|
6
13
|
|
|
7
14
|
[policy]
|
|
8
15
|
model = "Qwen/Qwen3-4B"
|
|
@@ -18,6 +25,8 @@ evaluation_split = "validation"
|
|
|
18
25
|
evaluation_episodes = 256
|
|
19
26
|
|
|
20
27
|
[training]
|
|
28
|
+
num_epochs = 1
|
|
29
|
+
iterations_per_epoch = 20
|
|
21
30
|
max_turns = 1
|
|
22
31
|
ops = ["agent", "env"]
|
|
23
32
|
batch_size = 128
|
|
@@ -31,5 +40,23 @@ learning_rate = 5e-6
|
|
|
31
40
|
gpu_type = "A10G"
|
|
32
41
|
gpu_count = 4
|
|
33
42
|
|
|
43
|
+
[topology]
|
|
44
|
+
type = "single_node_split"
|
|
45
|
+
gpus_for_vllm = 2
|
|
46
|
+
gpus_for_training = 2
|
|
47
|
+
gpus_for_ref = 0
|
|
48
|
+
tensor_parallel = 1
|
|
49
|
+
|
|
50
|
+
[rollout]
|
|
51
|
+
env_name = "math"
|
|
52
|
+
policy_name = "math-single-step"
|
|
53
|
+
max_turns = 1
|
|
54
|
+
episodes_per_batch = 256
|
|
55
|
+
|
|
56
|
+
[evaluation]
|
|
57
|
+
instances = 256
|
|
58
|
+
every_n_iters = 10
|
|
59
|
+
seeds = [0, 1, 2, 3, 4]
|
|
60
|
+
|
|
34
61
|
[tags]
|
|
35
62
|
experiment = "math_single_step"
|
|
@@ -8,6 +8,8 @@ task_url = "http://localhost:8101"
|
|
|
8
8
|
|
|
9
9
|
[model]
|
|
10
10
|
base = "Qwen/Qwen3-1.7B"
|
|
11
|
+
trainer_mode = "full"
|
|
12
|
+
label = "math-single-step-qwen3-1.7b"
|
|
11
13
|
|
|
12
14
|
[policy]
|
|
13
15
|
model = "Qwen/Qwen3-1.7B"
|
|
@@ -23,6 +25,8 @@ evaluation_split = "validation"
|
|
|
23
25
|
evaluation_episodes = 50
|
|
24
26
|
|
|
25
27
|
[training]
|
|
28
|
+
num_epochs = 1
|
|
29
|
+
iterations_per_epoch = 20
|
|
26
30
|
max_turns = 1
|
|
27
31
|
ops = ["agent", "env"]
|
|
28
32
|
batch_size = 2
|
|
@@ -61,9 +65,11 @@ health_max_wait_s = 180
|
|
|
61
65
|
health_interval_ms = 300
|
|
62
66
|
|
|
63
67
|
[rollout]
|
|
68
|
+
env_name = "math"
|
|
64
69
|
policy_name = "math-single-step"
|
|
65
70
|
max_turns = 1
|
|
66
71
|
episodes_per_batch = 32 # group_size * batch_size
|
|
72
|
+
task_app_origin_rewards_only = true
|
|
67
73
|
|
|
68
74
|
[evaluation]
|
|
69
75
|
instances = 32
|
synth_ai/api/train/builders.py
CHANGED
|
@@ -33,7 +33,7 @@ try:
|
|
|
33
33
|
except Exception as exc: # pragma: no cover - critical dependency
|
|
34
34
|
raise RuntimeError("Unable to load SFT payload helpers") from exc
|
|
35
35
|
|
|
36
|
-
from .configs import RLConfig, SFTConfig
|
|
36
|
+
from .configs import PromptLearningConfig, RLConfig, SFTConfig
|
|
37
37
|
from .supported_algos import (
|
|
38
38
|
AlgorithmValidationError,
|
|
39
39
|
ensure_model_supported_for_algorithm,
|
|
@@ -56,6 +56,12 @@ class SFTBuildResult:
|
|
|
56
56
|
validation_file: Path | None
|
|
57
57
|
|
|
58
58
|
|
|
59
|
+
@dataclass(slots=True)
|
|
60
|
+
class PromptLearningBuildResult:
|
|
61
|
+
payload: dict[str, Any]
|
|
62
|
+
task_url: str
|
|
63
|
+
|
|
64
|
+
|
|
59
65
|
def _format_validation_error(path: Path, exc: ValidationError) -> str:
|
|
60
66
|
lines: list[str] = []
|
|
61
67
|
for error in exc.errors():
|
|
@@ -74,12 +80,23 @@ def build_rl_payload(
|
|
|
74
80
|
idempotency: str | None,
|
|
75
81
|
allow_experimental: bool | None = None,
|
|
76
82
|
) -> RLBuildResult:
|
|
83
|
+
# Load and validate config with SDK-level checks
|
|
84
|
+
from synth_ai.api.train.utils import load_toml
|
|
85
|
+
from synth_ai.cli.commands.train.validation import validate_rl_config
|
|
86
|
+
|
|
77
87
|
try:
|
|
78
|
-
|
|
88
|
+
raw_config = load_toml(config_path)
|
|
89
|
+
validated_config = validate_rl_config(raw_config) # Adds defaults & validates
|
|
90
|
+
rl_cfg = RLConfig.from_mapping(validated_config)
|
|
79
91
|
except ValidationError as exc:
|
|
80
92
|
raise click.ClickException(_format_validation_error(config_path, exc)) from exc
|
|
81
93
|
|
|
82
94
|
data = rl_cfg.to_dict()
|
|
95
|
+
|
|
96
|
+
# Remove smoke section - it's CLI-only and should not be sent to the trainer
|
|
97
|
+
if "smoke" in data:
|
|
98
|
+
del data["smoke"]
|
|
99
|
+
|
|
83
100
|
# Ensure required [reference] section for backend validators
|
|
84
101
|
try:
|
|
85
102
|
ref_cfg = data.get("reference") if isinstance(data, dict) else None
|
|
@@ -110,8 +127,8 @@ def build_rl_payload(
|
|
|
110
127
|
"Task app URL required (provide --task-url or set services.task_url in TOML)"
|
|
111
128
|
)
|
|
112
129
|
|
|
113
|
-
model_source = (model_cfg.source or "").strip()
|
|
114
|
-
model_base = (model_cfg.base or "").strip()
|
|
130
|
+
model_source = (model_cfg.source or "").strip() if model_cfg else ""
|
|
131
|
+
model_base = (model_cfg.base or "").strip() if model_cfg else ""
|
|
115
132
|
override_model = (overrides.get("model") or "").strip()
|
|
116
133
|
if override_model:
|
|
117
134
|
model_source = override_model
|
|
@@ -343,9 +360,87 @@ def build_sft_payload(
|
|
|
343
360
|
return SFTBuildResult(payload=payload, train_file=dataset_path, validation_file=validation_file)
|
|
344
361
|
|
|
345
362
|
|
|
363
|
+
def build_prompt_learning_payload(
|
|
364
|
+
*,
|
|
365
|
+
config_path: Path,
|
|
366
|
+
task_url: str | None,
|
|
367
|
+
overrides: dict[str, Any],
|
|
368
|
+
allow_experimental: bool | None = None,
|
|
369
|
+
) -> PromptLearningBuildResult:
|
|
370
|
+
"""Build payload for prompt learning job (MIPRO or GEPA)."""
|
|
371
|
+
import os
|
|
372
|
+
|
|
373
|
+
from pydantic import ValidationError
|
|
374
|
+
|
|
375
|
+
from .configs.prompt_learning import load_toml
|
|
376
|
+
|
|
377
|
+
# SDK-SIDE VALIDATION: Catch errors BEFORE sending to backend
|
|
378
|
+
from .validators import validate_prompt_learning_config
|
|
379
|
+
|
|
380
|
+
raw_config = load_toml(config_path)
|
|
381
|
+
validate_prompt_learning_config(raw_config, config_path)
|
|
382
|
+
|
|
383
|
+
try:
|
|
384
|
+
pl_cfg = PromptLearningConfig.from_path(config_path)
|
|
385
|
+
except ValidationError as exc:
|
|
386
|
+
raise click.ClickException(_format_validation_error(config_path, exc)) from exc
|
|
387
|
+
|
|
388
|
+
# Source of truth: TOML only (ignore shell/env and CLI overrides)
|
|
389
|
+
final_task_url = (pl_cfg.task_app_url or "").strip()
|
|
390
|
+
|
|
391
|
+
if not final_task_url:
|
|
392
|
+
raise click.ClickException(
|
|
393
|
+
"Task app URL required (provide --task-url or set prompt_learning.task_app_url in TOML)"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Get task_app_api_key from config or environment
|
|
397
|
+
task_app_api_key = (
|
|
398
|
+
pl_cfg.task_app_api_key
|
|
399
|
+
or os.environ.get("ENVIRONMENT_API_KEY", "")
|
|
400
|
+
).strip()
|
|
401
|
+
|
|
402
|
+
if not task_app_api_key:
|
|
403
|
+
raise click.ClickException(
|
|
404
|
+
"Task app API key required (set prompt_learning.task_app_api_key in TOML or ENVIRONMENT_API_KEY env var)"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Build config dict for backend
|
|
408
|
+
config_dict = pl_cfg.to_dict()
|
|
409
|
+
|
|
410
|
+
# Ensure task_app_url and task_app_api_key are set
|
|
411
|
+
pl_section = config_dict.get("prompt_learning", {})
|
|
412
|
+
if isinstance(pl_section, dict):
|
|
413
|
+
pl_section["task_app_url"] = final_task_url
|
|
414
|
+
pl_section["task_app_api_key"] = task_app_api_key
|
|
415
|
+
else:
|
|
416
|
+
config_dict["prompt_learning"] = {
|
|
417
|
+
"task_app_url": final_task_url,
|
|
418
|
+
"task_app_api_key": task_app_api_key,
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
# Build payload matching backend API format
|
|
422
|
+
payload: dict[str, Any] = {
|
|
423
|
+
"algorithm": pl_cfg.algorithm,
|
|
424
|
+
"config_body": config_dict,
|
|
425
|
+
"overrides": overrides.get("overrides", {}),
|
|
426
|
+
"metadata": overrides.get("metadata", {}),
|
|
427
|
+
"auto_start": overrides.get("auto_start", True),
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
backend = overrides.get("backend")
|
|
431
|
+
if backend:
|
|
432
|
+
metadata_default: dict[str, Any] = {}
|
|
433
|
+
metadata = cast(dict[str, Any], payload.setdefault("metadata", metadata_default))
|
|
434
|
+
metadata["backend_base_url"] = ensure_api_base(str(backend))
|
|
435
|
+
|
|
436
|
+
return PromptLearningBuildResult(payload=payload, task_url=final_task_url)
|
|
437
|
+
|
|
438
|
+
|
|
346
439
|
__all__ = [
|
|
440
|
+
"PromptLearningBuildResult",
|
|
347
441
|
"RLBuildResult",
|
|
348
442
|
"SFTBuildResult",
|
|
443
|
+
"build_prompt_learning_payload",
|
|
349
444
|
"build_rl_payload",
|
|
350
445
|
"build_sft_payload",
|
|
351
446
|
]
|