PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show

examples/README.md +1 -0
examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -2
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +152 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +274 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +415 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +61 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +6 -6
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +62 -0
examples/rl/configs/rl_from_base_qwen17.toml +79 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +21 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +6 -6
examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +33 -3
examples/swe/task_app/grpo_swe_mini.py +4 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
examples/task_apps/enron/__init__.py +1 -0
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +155 -17
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +61 -69
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +21 -0
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +7 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +11 -0
synth_ai/cli/task_app_serve.py +11 -0
synth_ai/cli/task_apps.py +110 -1499
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +5 -0
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/http.py +8 -22
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +4 -5
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +4 -2
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +0 -1
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +294 -0
synth_ai/utils/http.py +172 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
synth_ai/cli/man.py +0 -106
synth_ai/cli/tui.py +0 -57
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -906
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/_utils.py ADDED Viewed

@@ -0,0 +1,92 @@
+from __future__ import annotations
+from collections.abc import Iterable, Sequence
+from synth_ai.task import (
+    RolloutEnvSpec,
+    RolloutPolicySpec,
+    RolloutRecordConfig,
+    RolloutRequest,
+    RolloutSafetyConfig,
+)
+DEFAULT_POLICY_NAME = "crafter-react"
+DEFAULT_ENV_NAME = "crafter"
+def parse_ops(spec: str | None) -> list[str] | None:
+    """Parse a comma-separated operations string into a list."""
+    if spec is None:
+        return None
+    ops = [op.strip() for op in spec.split(",") if op.strip()]
+    if not ops:
+        raise ValueError("Ops must contain at least one entry")
+    return ops
+def ops_from_pairs(max_llm_calls: int, *, cap: int | None = None) -> list[str]:
+    """Return alternating agent/env ops for the requested number of LLM calls."""
+    pairs = max(1, int(max_llm_calls or 0))
+    if cap is not None:
+        pairs = min(pairs, cap)
+    ops: list[str] = []
+    for _ in range(pairs):
+        ops.extend(["agent", "env"])
+    return ops
+def build_rollout_request(
+    *,
+    seed: int,
+    run_id: str,
+    model: str,
+    inference_url: str,
+    ops: Sequence[str] | Iterable[str],
+    inference_api_key: str | None = None,
+    extra_headers: dict[str, str] | None = None,
+    trace_format: str = "compact",
+    return_trace: bool = False,
+    policy_name: str = DEFAULT_POLICY_NAME,
+    env_name: str = DEFAULT_ENV_NAME,
+    max_policy_tokens: int | None = None,
+    record_trajectories: bool = True,
+) -> RolloutRequest:
+    """Construct a RolloutRequest shared across local rollout utilities."""
+    policy_config: dict[str, object] = {
+        "model": model,
+        "inference_url": inference_url,
+    }
+    if inference_api_key is not None:
+        policy_config["api_key"] = inference_api_key
+    if extra_headers:
+        policy_config["extra_headers"] = extra_headers
+    if max_policy_tokens is not None:
+        policy_config["max_completion_tokens"] = max_policy_tokens
+        policy_config["max_tokens"] = max_policy_tokens
+    record_cfg = RolloutRecordConfig(
+        trajectories=record_trajectories,
+        trace_format=trace_format,
+        return_trace=return_trace,
+    )
+    return RolloutRequest(
+        run_id=run_id,
+        env=RolloutEnvSpec(env_name=env_name, seed=seed, config={}),
+        policy=RolloutPolicySpec(policy_name=policy_name, config=policy_config),
+        ops=list(ops),
+        record=record_cfg,
+        on_done="reset",
+        safety=RolloutSafetyConfig(),
+    )
+__all__ = [
+    "DEFAULT_POLICY_NAME",
+    "DEFAULT_ENV_NAME",
+    "build_rollout_request",
+    "ops_from_pairs",
+    "parse_ops",
+]

examples/warming_up_to_rl/analyze_trace_db.py CHANGED Viewed

@@ -383,7 +383,7 @@ def main() -> None:
     parser.add_argument(
         "--db",
         type=Path,
-        default=Path("traces/v3/synth_ai.db"),
+        default=Path("traces/task_app_traces.db"),
         help="Path to the tracing_v3 SQLite database",
     )
     args = parser.parse_args()

examples/warming_up_to_rl/configs/crafter_fft.toml CHANGED Viewed

@@ -1,6 +1,11 @@
 # Crafter Full Finetune (FFT) example on H100
 # Adjust paths and hyperparameters to your environment before running.
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
 [job]
 model = "Qwen/Qwen3-4B"               # base model to finetune
 # Path to your SFT JSONL dataset

examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml CHANGED Viewed

@@ -1,5 +1,7 @@
 # Eval config for finetuned Qwen/Qwen3-4B (FFT) via task app rollout
+type = "sft"
 # Required
 task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
 # Replace with your finished job id if different

examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml CHANGED Viewed

@@ -1,6 +1,8 @@
 # Eval config for Groq Qwen3-32B
 # Fields mirror run_eval.py expectations
+type = "rl"
 # Required
 task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
 model = "qwen/qwen3-32b"

examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml CHANGED Viewed

@@ -1,5 +1,7 @@
 # Eval config for Synth Modal inference Qwen/Qwen3-4B via task app rollout
+type = "rl"
 # Required
 task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
 model = "Qwen/Qwen3-4B"
@@ -20,4 +22,3 @@ concurrency = 10
 #   fetch the vLLM base from the task app /info to use as inference_url.
 # - Ensure the task app mounts the openai-api-key secret if your vLLM gateway
 #   requires a bearer token (OPENAI_API_KEY). Otherwise it will call unauthenticated.

examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml CHANGED Viewed

@@ -5,7 +5,6 @@ type = "online"
 method = "policy_gradient"
 variety = "gspo"
 [services]
 task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
@@ -39,6 +38,7 @@ health_interval_ms = 300
 [model]
 # Base model start
 base = "Qwen/Qwen3-4B"
+trainer_mode = "full"
 label = "crafter-rl-from-base"
 [rollout]
@@ -61,6 +61,7 @@ seeds = [
 [training]
 num_epochs = 1
 iterations_per_epoch = 10
+max_turns = 10
 batch_size = 16
 group_size = 4
 gradient_accumulation_steps = 1

examples/warming_up_to_rl/configs/rl_from_ft.toml CHANGED Viewed

@@ -1,5 +1,7 @@
 # RL training starting from a finetuned model id (TOML-only model selection)
+type = "rl"
 [services]
 # Task app base URL used by the RL job for rollouts
 # task_url = "https://YOUR-TASK-APP.modal.run"

examples/warming_up_to_rl/export_trace_sft.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import argparse
 import json
+import os
 import sqlite3
 import sys
 from collections import Counter, defaultdict
@@ -12,6 +13,13 @@ from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
+from synth_ai._utils.prompts import ensure_required_args
+from synth_ai.tracing_v3.constants import (
+    TRACE_DB_BASENAME,
+    TRACE_DB_DIR,
+    canonical_trace_db_name,
+)
 Row = sqlite3.Row
@@ -489,55 +497,81 @@ def _validate_dataset(records: list[dict[str, Any]]) -> None:
 def _find_trace_database() -> Path | None:
-    """Automatically discover the trace database in common locations."""
+    """Automatically discover the most recent trace database in common locations."""
-    # Check for demo directory from state
-    try:
-        state_path = Path.home() / ".synth-ai" / "demo.json"
-        if state_path.exists():
-            import json
-            with state_path.open() as f:
-                data = json.load(f)
-                demo_dir = data.get("DEMO_DIR")
-                if demo_dir:
-                    candidate = Path(demo_dir) / "traces" / "v3" / "synth_ai.db"
-                    if candidate.exists():
-                        return candidate
-    except Exception:
-        pass
+    candidates: list[Path] = []
-    # Search upward from current directory
+    # Walk up parent directories from CWD
     cwd = Path.cwd()
     for parent in [cwd] + list(cwd.parents):
-        candidate = parent / "traces" / "v3" / "synth_ai.db"
-        if candidate.exists():
-            return candidate
-    # Check standard locations
-    standard_locations = [
-        Path("traces/v3/synth_ai.db"),
-        Path("../traces/v3/synth_ai.db"),
-        Path.home() / "synth-ai" / "traces" / "v3" / "synth_ai.db",
-    ]
-    for location in standard_locations:
+        candidates.append(parent / "traces" / "v3")
+    # Standard fallback locations
+    candidates.extend(
+        [
+            TRACE_DB_DIR,
+            Path("../traces"),
+            Path.home() / "synth-ai" / "traces" / "v3",
+        ]
+    )
+    found: list[Path] = []
+    for directory in candidates:
         try:
-            if location.exists():
-                return location.resolve()
+            if not directory.exists():
+                continue
+            for pattern in (
+                f"{TRACE_DB_BASENAME}_*.db",
+                canonical_trace_db_name(),
+            ):
+                for candidate in directory.glob(pattern):
+                    found.append(candidate.resolve())
         except Exception:
             continue
-    return None
+    if not found:
+        return None
+    found.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+    return found[0]
+def _discover_local_trace_dbs(root: Path) -> list[Path]:
+    """Return trace DBs under *root* (recursively), newest first."""
+    candidates: set[Path] = set()
+    ignore_dirs = {".git", ".venv", "__pycache__", "node_modules", "dist", "build"}
+    target_exact = canonical_trace_db_name()
+    for dirpath, dirnames, filenames in os.walk(root):
+        dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
+        for filename in filenames:
+            if filename == target_exact or (
+                filename.startswith(f"{TRACE_DB_BASENAME}_") and filename.endswith(".db")
+            ):
+                path = Path(dirpath) / filename
+                try:
+                    candidates.add(path.resolve())
+                except Exception:
+                    continue
+    return sorted(candidates, key=lambda p: p.stat().st_mtime, reverse=True)
 def main() -> None:
     parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--db", type=Path, default=None, help="Path to tracing_v3 SQLite DB")
     parser.add_argument(
-        "--output",
+        "--in",
+        dest="input_path",
+        type=Path,
+        default=None,
+        help="Path to tracing_v3 SQLite DB",
+    )
+    parser.add_argument(
+        "--out",
+        dest="output_path",
         type=Path,
-        required=False,
+        default=None,
         help="Destination JSONL path for the exported dataset",
     )
     parser.add_argument(
@@ -593,25 +627,109 @@ def main() -> None:
     )
     args = parser.parse_args()
-    # Auto-discover database if not specified
-    db_path = args.db
-    if db_path is None:
-        db_path = _find_trace_database()
-        if db_path:
-            print(f"Found trace database: {db_path}")
-        else:
-            print("\nTrace database configuration:")
-            db_input = input("Trace database path [traces/v3/synth_ai.db]: ").strip()
-            db_path = Path(db_input) if db_input else Path("traces/v3/synth_ai.db")
+    default_output_path = (Path.cwd() / "ft_data" / "crafter_sft.jsonl").resolve()
+    initial_path: Path | None = None
+    if args.input_path is not None:
+        initial_path = Path(args.input_path).expanduser().resolve()
+    else:
+        discovered = _find_trace_database()
+        if discovered is not None:
+            initial_path = discovered.expanduser().resolve()
+            args.input_path = initial_path
+    if args.output_path is None:
+        args.output_path = default_output_path
+    local_candidates = _discover_local_trace_dbs(Path.cwd())
+    if local_candidates:
+        print("\nDiscovered trace databases:")
+        for idx, path in enumerate(local_candidates, start=1):
+            marker = " <- most recent" if idx == 1 else ""
+            print(f"  {idx}) {path}{marker}")
+        print("  m) Enter path manually")
+        print("  0) Abort")
+        default_index = 1
+        if initial_path:
+            for idx, candidate in enumerate(local_candidates, start=1):
+                if candidate == initial_path:
+                    default_index = idx
+                    break
+        while True:
+            prompt = f"Select database [{default_index}]: "
+            choice = input(prompt).strip().lower()
+            if not choice:
+                args.input_path = local_candidates[default_index - 1]
+                break
+            if choice == "0":
+                raise SystemExit("Aborted by user.")
+            if choice in {"m", "manual"}:
+                manual = input("Enter trace database path: ").strip()
+                if manual:
+                    args.input_path = Path(manual)
+                    break
+                print("Path required; try again.")
+                continue
+            try:
+                idx = int(choice)
+            except ValueError:
+                print("Invalid selection; enter a number, 'm', or 0 to abort.")
+                continue
+            if 1 <= idx <= len(local_candidates):
+                args.input_path = local_candidates[idx - 1]
+                break
+            print(f"Select between 1 and {len(local_candidates)}, 'm', or 0.")
+    elif initial_path is not None:
+        args.input_path = initial_path
+    # If output wasn't overridden, derive it from the chosen DB name
+    if args.output_path == default_output_path and args.input_path:
+        db_name = Path(args.input_path).name  # e.g., task_app_traces_2025-10-23_13-23-02.db
+        timestamp = db_name[:-3] if db_name.endswith(".db") else db_name
+        if timestamp.startswith("task_app_traces_"):
+            timestamp = timestamp[len("task_app_traces_") :]
+        derived_name = f"sft_dataset_{timestamp}.jsonl"
+        args.output_path = (Path.cwd() / "ft_data" / derived_name).resolve()
+    input_default = (
+        Path(args.input_path).expanduser().resolve()
+        if args.input_path is not None
+        else (TRACE_DB_DIR / canonical_trace_db_name()).expanduser().resolve()
+    )
+    output_default = Path(args.output_path).expanduser().resolve() if args.output_path else default_output_path
+    args = ensure_required_args(
+        args,
+        {
+            "input_path": "Trace database path",
+            "output_path": "Output JSONL path",
+        },
+        coerce={
+            "input_path": lambda raw: Path(raw).expanduser().resolve(),
+            "output_path": lambda raw: Path(raw).expanduser().resolve(),
+        },
+        defaults={
+            "input_path": input_default,
+            "output_path": output_default,
+        },
+    )
+    db_path = Path(args.input_path).expanduser().resolve()
+    print(f"Trace database: {db_path}")
     if not db_path.exists():
-        print(f"Database not found: {db_path}", file=sys.stderr)
-        raise SystemExit(1)
+        discovered = _find_trace_database()
+        if discovered and discovered.exists():
+            discovered = discovered.resolve()
+            print(f"Discovered trace database: {discovered}")
+            db_path = discovered
+        else:
+            print(f"Database not found: {db_path}", file=sys.stderr)
+            raise SystemExit(1)
-    output_path = args.output
-    if not output_path:
-        output_path = Path("ft_data/crafter_traces.jsonl")
-        print(f"Output will be written to: {output_path.resolve()}")
+    output_path = Path(args.output_path).expanduser().resolve()
+    print(f"Output dataset: {output_path}")
     min_unique = args.min_unique
     if min_unique is None:
@@ -619,15 +737,11 @@ def main() -> None:
         print(f"Minimum unique achievements filter: {min_unique} (all traces)")
     # Override args with prompted values
-    args.db = db_path
-    args.output = output_path
+    args.input_path = db_path
+    args.output_path = output_path
     args.min_unique = min_unique
-    if not args.db.exists():
-        print(f"Database not found: {args.db}", file=sys.stderr)
-        raise SystemExit(1)
-    conn = connect(args.db)
+    conn = connect(args.input_path)
     try:
         (
             achievements_map,
@@ -708,11 +822,11 @@ def main() -> None:
             raise SystemExit(1)
         _validate_dataset(dataset)
-        write_jsonl(args.output, dataset)
+        write_jsonl(args.output_path, dataset)
         session_ids = {item.get("metadata", {}).get("session_id") for item in dataset}
         session_ids.discard(None)
         print(
-            f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output.resolve()}",
+            f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output_path.resolve()}",
             file=sys.stderr,
         )
     finally:

synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl