synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable, Sequence
|
|
4
|
+
|
|
5
|
+
from synth_ai.task import (
|
|
6
|
+
RolloutEnvSpec,
|
|
7
|
+
RolloutPolicySpec,
|
|
8
|
+
RolloutRecordConfig,
|
|
9
|
+
RolloutRequest,
|
|
10
|
+
RolloutSafetyConfig,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
DEFAULT_POLICY_NAME = "crafter-react"
|
|
14
|
+
DEFAULT_ENV_NAME = "crafter"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_ops(spec: str | None) -> list[str] | None:
|
|
18
|
+
"""Parse a comma-separated operations string into a list."""
|
|
19
|
+
|
|
20
|
+
if spec is None:
|
|
21
|
+
return None
|
|
22
|
+
ops = [op.strip() for op in spec.split(",") if op.strip()]
|
|
23
|
+
if not ops:
|
|
24
|
+
raise ValueError("Ops must contain at least one entry")
|
|
25
|
+
return ops
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def ops_from_pairs(max_llm_calls: int, *, cap: int | None = None) -> list[str]:
|
|
29
|
+
"""Return alternating agent/env ops for the requested number of LLM calls."""
|
|
30
|
+
|
|
31
|
+
pairs = max(1, int(max_llm_calls or 0))
|
|
32
|
+
if cap is not None:
|
|
33
|
+
pairs = min(pairs, cap)
|
|
34
|
+
ops: list[str] = []
|
|
35
|
+
for _ in range(pairs):
|
|
36
|
+
ops.extend(["agent", "env"])
|
|
37
|
+
return ops
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def build_rollout_request(
|
|
41
|
+
*,
|
|
42
|
+
seed: int,
|
|
43
|
+
run_id: str,
|
|
44
|
+
model: str,
|
|
45
|
+
inference_url: str,
|
|
46
|
+
ops: Sequence[str] | Iterable[str],
|
|
47
|
+
inference_api_key: str | None = None,
|
|
48
|
+
extra_headers: dict[str, str] | None = None,
|
|
49
|
+
trace_format: str = "compact",
|
|
50
|
+
return_trace: bool = False,
|
|
51
|
+
policy_name: str = DEFAULT_POLICY_NAME,
|
|
52
|
+
env_name: str = DEFAULT_ENV_NAME,
|
|
53
|
+
max_policy_tokens: int | None = None,
|
|
54
|
+
record_trajectories: bool = True,
|
|
55
|
+
) -> RolloutRequest:
|
|
56
|
+
"""Construct a RolloutRequest shared across local rollout utilities."""
|
|
57
|
+
|
|
58
|
+
policy_config: dict[str, object] = {
|
|
59
|
+
"model": model,
|
|
60
|
+
"inference_url": inference_url,
|
|
61
|
+
}
|
|
62
|
+
if inference_api_key is not None:
|
|
63
|
+
policy_config["api_key"] = inference_api_key
|
|
64
|
+
if extra_headers:
|
|
65
|
+
policy_config["extra_headers"] = extra_headers
|
|
66
|
+
if max_policy_tokens is not None:
|
|
67
|
+
policy_config["max_completion_tokens"] = max_policy_tokens
|
|
68
|
+
policy_config["max_tokens"] = max_policy_tokens
|
|
69
|
+
|
|
70
|
+
record_cfg = RolloutRecordConfig(
|
|
71
|
+
trajectories=record_trajectories,
|
|
72
|
+
trace_format=trace_format,
|
|
73
|
+
return_trace=return_trace,
|
|
74
|
+
)
|
|
75
|
+
return RolloutRequest(
|
|
76
|
+
run_id=run_id,
|
|
77
|
+
env=RolloutEnvSpec(env_name=env_name, seed=seed, config={}),
|
|
78
|
+
policy=RolloutPolicySpec(policy_name=policy_name, config=policy_config),
|
|
79
|
+
ops=list(ops),
|
|
80
|
+
record=record_cfg,
|
|
81
|
+
on_done="reset",
|
|
82
|
+
safety=RolloutSafetyConfig(),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
"DEFAULT_POLICY_NAME",
|
|
88
|
+
"DEFAULT_ENV_NAME",
|
|
89
|
+
"build_rollout_request",
|
|
90
|
+
"ops_from_pairs",
|
|
91
|
+
"parse_ops",
|
|
92
|
+
]
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
# Crafter Full Finetune (FFT) example on H100
|
|
2
2
|
# Adjust paths and hyperparameters to your environment before running.
|
|
3
3
|
|
|
4
|
+
[algorithm]
|
|
5
|
+
type = "offline"
|
|
6
|
+
method = "sft"
|
|
7
|
+
variety = "fft"
|
|
8
|
+
|
|
4
9
|
[job]
|
|
5
10
|
model = "Qwen/Qwen3-4B" # base model to finetune
|
|
6
11
|
# Path to your SFT JSONL dataset
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Eval config for Synth Modal inference Qwen/Qwen3-4B via task app rollout
|
|
2
2
|
|
|
3
|
+
type = "rl"
|
|
4
|
+
|
|
3
5
|
# Required
|
|
4
6
|
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
|
|
5
7
|
model = "Qwen/Qwen3-4B"
|
|
@@ -20,4 +22,3 @@ concurrency = 10
|
|
|
20
22
|
# fetch the vLLM base from the task app /info to use as inference_url.
|
|
21
23
|
# - Ensure the task app mounts the openai-api-key secret if your vLLM gateway
|
|
22
24
|
# requires a bearer token (OPENAI_API_KEY). Otherwise it will call unauthenticated.
|
|
23
|
-
|
|
@@ -5,7 +5,6 @@ type = "online"
|
|
|
5
5
|
method = "policy_gradient"
|
|
6
6
|
variety = "gspo"
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
[services]
|
|
10
9
|
task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
|
|
11
10
|
|
|
@@ -39,6 +38,7 @@ health_interval_ms = 300
|
|
|
39
38
|
[model]
|
|
40
39
|
# Base model start
|
|
41
40
|
base = "Qwen/Qwen3-4B"
|
|
41
|
+
trainer_mode = "full"
|
|
42
42
|
label = "crafter-rl-from-base"
|
|
43
43
|
|
|
44
44
|
[rollout]
|
|
@@ -61,6 +61,7 @@ seeds = [
|
|
|
61
61
|
[training]
|
|
62
62
|
num_epochs = 1
|
|
63
63
|
iterations_per_epoch = 10
|
|
64
|
+
max_turns = 10
|
|
64
65
|
batch_size = 16
|
|
65
66
|
group_size = 4
|
|
66
67
|
gradient_accumulation_steps = 1
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import argparse
|
|
7
7
|
import json
|
|
8
|
+
import os
|
|
8
9
|
import sqlite3
|
|
9
10
|
import sys
|
|
10
11
|
from collections import Counter, defaultdict
|
|
@@ -12,6 +13,13 @@ from collections.abc import Iterable
|
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
from typing import Any
|
|
14
15
|
|
|
16
|
+
from synth_ai._utils.prompts import ensure_required_args
|
|
17
|
+
from synth_ai.tracing_v3.constants import (
|
|
18
|
+
TRACE_DB_BASENAME,
|
|
19
|
+
TRACE_DB_DIR,
|
|
20
|
+
canonical_trace_db_name,
|
|
21
|
+
)
|
|
22
|
+
|
|
15
23
|
Row = sqlite3.Row
|
|
16
24
|
|
|
17
25
|
|
|
@@ -489,55 +497,81 @@ def _validate_dataset(records: list[dict[str, Any]]) -> None:
|
|
|
489
497
|
|
|
490
498
|
|
|
491
499
|
def _find_trace_database() -> Path | None:
|
|
492
|
-
"""Automatically discover the trace database in common locations."""
|
|
500
|
+
"""Automatically discover the most recent trace database in common locations."""
|
|
493
501
|
|
|
494
|
-
|
|
495
|
-
try:
|
|
496
|
-
state_path = Path.home() / ".synth-ai" / "demo.json"
|
|
497
|
-
if state_path.exists():
|
|
498
|
-
import json
|
|
499
|
-
|
|
500
|
-
with state_path.open() as f:
|
|
501
|
-
data = json.load(f)
|
|
502
|
-
demo_dir = data.get("DEMO_DIR")
|
|
503
|
-
if demo_dir:
|
|
504
|
-
candidate = Path(demo_dir) / "traces" / "v3" / "synth_ai.db"
|
|
505
|
-
if candidate.exists():
|
|
506
|
-
return candidate
|
|
507
|
-
except Exception:
|
|
508
|
-
pass
|
|
502
|
+
candidates: list[Path] = []
|
|
509
503
|
|
|
510
|
-
#
|
|
504
|
+
# Walk up parent directories from CWD
|
|
511
505
|
cwd = Path.cwd()
|
|
512
506
|
for parent in [cwd] + list(cwd.parents):
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
507
|
+
candidates.append(parent / "traces" / "v3")
|
|
508
|
+
|
|
509
|
+
# Standard fallback locations
|
|
510
|
+
candidates.extend(
|
|
511
|
+
[
|
|
512
|
+
TRACE_DB_DIR,
|
|
513
|
+
Path("../traces"),
|
|
514
|
+
Path.home() / "synth-ai" / "traces" / "v3",
|
|
515
|
+
]
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
found: list[Path] = []
|
|
519
|
+
for directory in candidates:
|
|
525
520
|
try:
|
|
526
|
-
if
|
|
527
|
-
|
|
521
|
+
if not directory.exists():
|
|
522
|
+
continue
|
|
523
|
+
for pattern in (
|
|
524
|
+
f"{TRACE_DB_BASENAME}_*.db",
|
|
525
|
+
canonical_trace_db_name(),
|
|
526
|
+
):
|
|
527
|
+
for candidate in directory.glob(pattern):
|
|
528
|
+
found.append(candidate.resolve())
|
|
528
529
|
except Exception:
|
|
529
530
|
continue
|
|
530
531
|
|
|
531
|
-
|
|
532
|
+
if not found:
|
|
533
|
+
return None
|
|
534
|
+
|
|
535
|
+
found.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
|
536
|
+
return found[0]
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def _discover_local_trace_dbs(root: Path) -> list[Path]:
|
|
540
|
+
"""Return trace DBs under *root* (recursively), newest first."""
|
|
541
|
+
|
|
542
|
+
candidates: set[Path] = set()
|
|
543
|
+
ignore_dirs = {".git", ".venv", "__pycache__", "node_modules", "dist", "build"}
|
|
544
|
+
target_exact = canonical_trace_db_name()
|
|
545
|
+
|
|
546
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
547
|
+
dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
|
|
548
|
+
for filename in filenames:
|
|
549
|
+
if filename == target_exact or (
|
|
550
|
+
filename.startswith(f"{TRACE_DB_BASENAME}_") and filename.endswith(".db")
|
|
551
|
+
):
|
|
552
|
+
path = Path(dirpath) / filename
|
|
553
|
+
try:
|
|
554
|
+
candidates.add(path.resolve())
|
|
555
|
+
except Exception:
|
|
556
|
+
continue
|
|
557
|
+
|
|
558
|
+
return sorted(candidates, key=lambda p: p.stat().st_mtime, reverse=True)
|
|
532
559
|
|
|
533
560
|
|
|
534
561
|
def main() -> None:
|
|
535
562
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
536
|
-
parser.add_argument("--db", type=Path, default=None, help="Path to tracing_v3 SQLite DB")
|
|
537
563
|
parser.add_argument(
|
|
538
|
-
"--
|
|
564
|
+
"--in",
|
|
565
|
+
dest="input_path",
|
|
566
|
+
type=Path,
|
|
567
|
+
default=None,
|
|
568
|
+
help="Path to tracing_v3 SQLite DB",
|
|
569
|
+
)
|
|
570
|
+
parser.add_argument(
|
|
571
|
+
"--out",
|
|
572
|
+
dest="output_path",
|
|
539
573
|
type=Path,
|
|
540
|
-
|
|
574
|
+
default=None,
|
|
541
575
|
help="Destination JSONL path for the exported dataset",
|
|
542
576
|
)
|
|
543
577
|
parser.add_argument(
|
|
@@ -593,25 +627,109 @@ def main() -> None:
|
|
|
593
627
|
)
|
|
594
628
|
args = parser.parse_args()
|
|
595
629
|
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
630
|
+
default_output_path = (Path.cwd() / "ft_data" / "crafter_sft.jsonl").resolve()
|
|
631
|
+
|
|
632
|
+
initial_path: Path | None = None
|
|
633
|
+
if args.input_path is not None:
|
|
634
|
+
initial_path = Path(args.input_path).expanduser().resolve()
|
|
635
|
+
else:
|
|
636
|
+
discovered = _find_trace_database()
|
|
637
|
+
if discovered is not None:
|
|
638
|
+
initial_path = discovered.expanduser().resolve()
|
|
639
|
+
args.input_path = initial_path
|
|
640
|
+
|
|
641
|
+
if args.output_path is None:
|
|
642
|
+
args.output_path = default_output_path
|
|
643
|
+
|
|
644
|
+
local_candidates = _discover_local_trace_dbs(Path.cwd())
|
|
645
|
+
if local_candidates:
|
|
646
|
+
print("\nDiscovered trace databases:")
|
|
647
|
+
for idx, path in enumerate(local_candidates, start=1):
|
|
648
|
+
marker = " <- most recent" if idx == 1 else ""
|
|
649
|
+
print(f" {idx}) {path}{marker}")
|
|
650
|
+
print(" m) Enter path manually")
|
|
651
|
+
print(" 0) Abort")
|
|
652
|
+
|
|
653
|
+
default_index = 1
|
|
654
|
+
if initial_path:
|
|
655
|
+
for idx, candidate in enumerate(local_candidates, start=1):
|
|
656
|
+
if candidate == initial_path:
|
|
657
|
+
default_index = idx
|
|
658
|
+
break
|
|
606
659
|
|
|
660
|
+
while True:
|
|
661
|
+
prompt = f"Select database [{default_index}]: "
|
|
662
|
+
choice = input(prompt).strip().lower()
|
|
663
|
+
if not choice:
|
|
664
|
+
args.input_path = local_candidates[default_index - 1]
|
|
665
|
+
break
|
|
666
|
+
if choice == "0":
|
|
667
|
+
raise SystemExit("Aborted by user.")
|
|
668
|
+
if choice in {"m", "manual"}:
|
|
669
|
+
manual = input("Enter trace database path: ").strip()
|
|
670
|
+
if manual:
|
|
671
|
+
args.input_path = Path(manual)
|
|
672
|
+
break
|
|
673
|
+
print("Path required; try again.")
|
|
674
|
+
continue
|
|
675
|
+
try:
|
|
676
|
+
idx = int(choice)
|
|
677
|
+
except ValueError:
|
|
678
|
+
print("Invalid selection; enter a number, 'm', or 0 to abort.")
|
|
679
|
+
continue
|
|
680
|
+
if 1 <= idx <= len(local_candidates):
|
|
681
|
+
args.input_path = local_candidates[idx - 1]
|
|
682
|
+
break
|
|
683
|
+
print(f"Select between 1 and {len(local_candidates)}, 'm', or 0.")
|
|
684
|
+
elif initial_path is not None:
|
|
685
|
+
args.input_path = initial_path
|
|
686
|
+
|
|
687
|
+
# If output wasn't overridden, derive it from the chosen DB name
|
|
688
|
+
if args.output_path == default_output_path and args.input_path:
|
|
689
|
+
db_name = Path(args.input_path).name # e.g., task_app_traces_2025-10-23_13-23-02.db
|
|
690
|
+
timestamp = db_name[:-3] if db_name.endswith(".db") else db_name
|
|
691
|
+
if timestamp.startswith("task_app_traces_"):
|
|
692
|
+
timestamp = timestamp[len("task_app_traces_") :]
|
|
693
|
+
derived_name = f"sft_dataset_{timestamp}.jsonl"
|
|
694
|
+
args.output_path = (Path.cwd() / "ft_data" / derived_name).resolve()
|
|
695
|
+
|
|
696
|
+
input_default = (
|
|
697
|
+
Path(args.input_path).expanduser().resolve()
|
|
698
|
+
if args.input_path is not None
|
|
699
|
+
else (TRACE_DB_DIR / canonical_trace_db_name()).expanduser().resolve()
|
|
700
|
+
)
|
|
701
|
+
output_default = Path(args.output_path).expanduser().resolve() if args.output_path else default_output_path
|
|
702
|
+
|
|
703
|
+
args = ensure_required_args(
|
|
704
|
+
args,
|
|
705
|
+
{
|
|
706
|
+
"input_path": "Trace database path",
|
|
707
|
+
"output_path": "Output JSONL path",
|
|
708
|
+
},
|
|
709
|
+
coerce={
|
|
710
|
+
"input_path": lambda raw: Path(raw).expanduser().resolve(),
|
|
711
|
+
"output_path": lambda raw: Path(raw).expanduser().resolve(),
|
|
712
|
+
},
|
|
713
|
+
defaults={
|
|
714
|
+
"input_path": input_default,
|
|
715
|
+
"output_path": output_default,
|
|
716
|
+
},
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
db_path = Path(args.input_path).expanduser().resolve()
|
|
720
|
+
print(f"Trace database: {db_path}")
|
|
607
721
|
if not db_path.exists():
|
|
608
|
-
|
|
609
|
-
|
|
722
|
+
discovered = _find_trace_database()
|
|
723
|
+
if discovered and discovered.exists():
|
|
724
|
+
discovered = discovered.resolve()
|
|
725
|
+
print(f"Discovered trace database: {discovered}")
|
|
726
|
+
db_path = discovered
|
|
727
|
+
else:
|
|
728
|
+
print(f"Database not found: {db_path}", file=sys.stderr)
|
|
729
|
+
raise SystemExit(1)
|
|
610
730
|
|
|
611
|
-
output_path = args.
|
|
612
|
-
|
|
613
|
-
output_path = Path("ft_data/crafter_traces.jsonl")
|
|
614
|
-
print(f"Output will be written to: {output_path.resolve()}")
|
|
731
|
+
output_path = Path(args.output_path).expanduser().resolve()
|
|
732
|
+
print(f"Output dataset: {output_path}")
|
|
615
733
|
|
|
616
734
|
min_unique = args.min_unique
|
|
617
735
|
if min_unique is None:
|
|
@@ -619,15 +737,11 @@ def main() -> None:
|
|
|
619
737
|
print(f"Minimum unique achievements filter: {min_unique} (all traces)")
|
|
620
738
|
|
|
621
739
|
# Override args with prompted values
|
|
622
|
-
args.
|
|
623
|
-
args.
|
|
740
|
+
args.input_path = db_path
|
|
741
|
+
args.output_path = output_path
|
|
624
742
|
args.min_unique = min_unique
|
|
625
743
|
|
|
626
|
-
|
|
627
|
-
print(f"Database not found: {args.db}", file=sys.stderr)
|
|
628
|
-
raise SystemExit(1)
|
|
629
|
-
|
|
630
|
-
conn = connect(args.db)
|
|
744
|
+
conn = connect(args.input_path)
|
|
631
745
|
try:
|
|
632
746
|
(
|
|
633
747
|
achievements_map,
|
|
@@ -708,11 +822,11 @@ def main() -> None:
|
|
|
708
822
|
raise SystemExit(1)
|
|
709
823
|
|
|
710
824
|
_validate_dataset(dataset)
|
|
711
|
-
write_jsonl(args.
|
|
825
|
+
write_jsonl(args.output_path, dataset)
|
|
712
826
|
session_ids = {item.get("metadata", {}).get("session_id") for item in dataset}
|
|
713
827
|
session_ids.discard(None)
|
|
714
828
|
print(
|
|
715
|
-
f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.
|
|
829
|
+
f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output_path.resolve()}",
|
|
716
830
|
file=sys.stderr,
|
|
717
831
|
)
|
|
718
832
|
finally:
|