synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
synth_ai/cli/traces.py
CHANGED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .core import register, train_command
|
|
4
|
+
from .errors import TrainCliError
|
|
5
|
+
from .validation import validate_train_environment
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"register",
|
|
9
|
+
"train_command",
|
|
10
|
+
"TrainCliError",
|
|
11
|
+
"validate_train_environment",
|
|
12
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from synth_ai.api.train.cli import (
|
|
5
|
+
register as _register_with_cli,
|
|
6
|
+
)
|
|
7
|
+
from synth_ai.api.train.cli import (
|
|
8
|
+
train_command as _train_command,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = ["register", "train_command"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def register(cli: click.Group) -> None:
|
|
15
|
+
"""Attach the train command to the root CLI."""
|
|
16
|
+
_register_with_cli(cli)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def train_command(*args, **kwargs):
|
|
20
|
+
"""Entrypoint used by the train CLI command."""
|
|
21
|
+
return _train_command(*args, **kwargs)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Tuple
|
|
6
|
+
|
|
7
|
+
from synth_ai.api.train.env_resolver import KeySpec, resolve_env
|
|
8
|
+
|
|
9
|
+
__all__ = ["validate_train_environment"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def validate_train_environment(
|
|
13
|
+
*,
|
|
14
|
+
config_path: Path | None,
|
|
15
|
+
explicit_env_paths: Iterable[str],
|
|
16
|
+
required_keys: list[KeySpec],
|
|
17
|
+
) -> Tuple[Path, Dict[str, str]]:
|
|
18
|
+
"""Validate and resolve environment secrets used by the train command."""
|
|
19
|
+
resolved_path, resolved_keys = resolve_env(
|
|
20
|
+
config_path=config_path,
|
|
21
|
+
explicit_env_paths=explicit_env_paths,
|
|
22
|
+
required_keys=required_keys,
|
|
23
|
+
)
|
|
24
|
+
return resolved_path, resolved_keys
|
synth_ai/cli/train.py
ADDED
synth_ai/cli/turso.py
CHANGED
synth_ai/cli/watch.py
CHANGED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Namespace for demo task apps (math, crafter, etc.)."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
|
|
5
|
+
# Ensure registry entries are loaded for CLI discovery.
|
|
6
|
+
with contextlib.suppress(Exception): # pragma: no cover - optional on downstream installs
|
|
7
|
+
from synth_ai.demos.math import task_app_entry # noqa: F401
|
|
8
|
+
|
|
9
|
+
with contextlib.suppress(Exception): # pragma: no cover - optional on downstream installs
|
|
10
|
+
from synth_ai.demos.crafter import grpo_crafter_task_app # noqa: F401
|
synth_ai/demos/core/__init__.py
CHANGED
|
@@ -1 +1,28 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Compatibility layer exposing the legacy demo helpers.
|
|
3
|
+
|
|
4
|
+
Historically these utilities lived in ``synth_ai.demos.core`` as a module.
|
|
5
|
+
Upstream refactors moved the implementation under
|
|
6
|
+
``synth_ai.demos.demo_task_apps.core``. Several call sites (including the new
|
|
7
|
+
vision tests) still import the older path, so we re-export everything here.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from synth_ai.demos.demo_task_apps import core as _demo_core
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
name
|
|
16
|
+
for name in dir(_demo_core)
|
|
17
|
+
if not name.startswith("_")
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
globals().update({name: getattr(_demo_core, name) for name in __all__})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def __getattr__(name: str):
|
|
24
|
+
if name in __all__:
|
|
25
|
+
value = getattr(_demo_core, name)
|
|
26
|
+
globals()[name] = value
|
|
27
|
+
return value
|
|
28
|
+
raise AttributeError(name)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Crafter demo task app
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
|
|
2
|
+
|
|
3
|
+
type = "sft"
|
|
4
|
+
|
|
5
|
+
[algorithm]
|
|
6
|
+
type = "offline"
|
|
7
|
+
method = "supervised_finetune"
|
|
8
|
+
variety = "fft"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
[job]
|
|
12
|
+
model = "Qwen/Qwen3-4B"
|
|
13
|
+
data = "ft_data/crafter_sft.jsonl"
|
|
14
|
+
|
|
15
|
+
[compute]
|
|
16
|
+
# Adjust as needed for your quota
|
|
17
|
+
gpu_type = "H100"
|
|
18
|
+
gpu_count = 1
|
|
19
|
+
nodes = 1
|
|
20
|
+
|
|
21
|
+
[data]
|
|
22
|
+
# Optional topology metadata (left empty for now)
|
|
23
|
+
topology = {}
|
|
24
|
+
|
|
25
|
+
# Optional local validation dataset path (JSONL). If set, the client will upload
|
|
26
|
+
# this file and wire up validation so the frontend can display val.loss.
|
|
27
|
+
# validation_path = "../ft_data/crafter_validation.jsonl"
|
|
28
|
+
|
|
29
|
+
[training]
|
|
30
|
+
mode = "sft_offline"
|
|
31
|
+
use_qlora = false
|
|
32
|
+
|
|
33
|
+
# Validation settings to emit val.loss on the frontend
|
|
34
|
+
[training.validation]
|
|
35
|
+
enabled = true
|
|
36
|
+
evaluation_strategy = "steps"
|
|
37
|
+
eval_steps = 20
|
|
38
|
+
save_best_model_at_end = true
|
|
39
|
+
metric_for_best_model = "val.loss"
|
|
40
|
+
greater_is_better = false
|
|
41
|
+
|
|
42
|
+
[hyperparameters]
|
|
43
|
+
# Minimal safe defaults; backend can override
|
|
44
|
+
n_epochs = 1
|
|
45
|
+
batch_size = 1
|
|
46
|
+
gradient_accumulation_steps = 64
|
|
47
|
+
sequence_length = 4096
|
|
48
|
+
learning_rate = 5e-6
|
|
49
|
+
warmup_ratio = 0.03
|
|
50
|
+
train_kind = "fft"
|
|
51
|
+
|
|
52
|
+
# Optional parallelism block example
|
|
53
|
+
#[hyperparameters.parallelism]
|
|
54
|
+
# tensor_parallel_size = 1
|
|
55
|
+
# pipeline_parallel_size = 1
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Compatibility wrapper for the GRPO Crafter task app.
|
|
2
|
+
|
|
3
|
+
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
|
+
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
|
+
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
|
+
Prefer using `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import importlib.util
|
|
13
|
+
from contextlib import suppress
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from fastapi.exceptions import RequestValidationError
|
|
17
|
+
from fastapi.responses import JSONResponse
|
|
18
|
+
from starlette.requests import Request
|
|
19
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, registry
|
|
20
|
+
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
21
|
+
from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_build_config():
|
|
25
|
+
import synth_ai
|
|
26
|
+
|
|
27
|
+
synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
|
|
28
|
+
module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
|
|
29
|
+
|
|
30
|
+
if not module_path.exists():
|
|
31
|
+
raise ImportError(
|
|
32
|
+
f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
spec = importlib.util.spec_from_file_location(
|
|
36
|
+
"warming_up_to_rl.task_app.grpo_crafter", module_path
|
|
37
|
+
)
|
|
38
|
+
if spec is None or spec.loader is None:
|
|
39
|
+
raise ImportError(f"Could not load task app module at {module_path}")
|
|
40
|
+
module = importlib.util.module_from_spec(spec)
|
|
41
|
+
import sys
|
|
42
|
+
|
|
43
|
+
sys.modules.setdefault(spec.name, module)
|
|
44
|
+
|
|
45
|
+
from synth_ai.task import apps as task_apps
|
|
46
|
+
|
|
47
|
+
original_register = task_apps.registry.register
|
|
48
|
+
|
|
49
|
+
def _safe_register(entry):
|
|
50
|
+
with suppress(ValueError):
|
|
51
|
+
original_register(entry)
|
|
52
|
+
|
|
53
|
+
task_apps.registry.register = _safe_register
|
|
54
|
+
try:
|
|
55
|
+
spec.loader.exec_module(module)
|
|
56
|
+
finally:
|
|
57
|
+
task_apps.registry.register = original_register
|
|
58
|
+
return module.build_config
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
build_config = _load_build_config()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
APP_ID = "grpo-crafter"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _build_base_config() -> TaskAppConfig:
|
|
68
|
+
return build_config()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
_REGISTERED_ENTRY = registry.get(APP_ID)
|
|
73
|
+
except Exception: # pragma: no cover - registry unavailable in some contexts
|
|
74
|
+
MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
|
|
75
|
+
else:
|
|
76
|
+
MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def build_task_app_config() -> TaskAppConfig:
|
|
80
|
+
base = _build_base_config()
|
|
81
|
+
return base.clone()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def fastapi_app():
|
|
85
|
+
app = create_task_app(build_task_app_config())
|
|
86
|
+
|
|
87
|
+
filtered_routes = []
|
|
88
|
+
for route in app.router.routes:
|
|
89
|
+
path = getattr(route, "path", None)
|
|
90
|
+
methods = getattr(route, "methods", set()) or set()
|
|
91
|
+
if path in {"/health", "/health/rollout"} and "GET" in methods:
|
|
92
|
+
continue
|
|
93
|
+
filtered_routes.append(route)
|
|
94
|
+
app.router.routes = filtered_routes
|
|
95
|
+
|
|
96
|
+
def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
|
|
97
|
+
if not env_key:
|
|
98
|
+
return None
|
|
99
|
+
prefix = env_key[: max(1, len(env_key) // 2)]
|
|
100
|
+
print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
|
|
101
|
+
return prefix
|
|
102
|
+
|
|
103
|
+
@app.get("/health")
|
|
104
|
+
async def health(request: Request):
|
|
105
|
+
env_key = normalize_environment_api_key()
|
|
106
|
+
if not env_key:
|
|
107
|
+
return JSONResponse(
|
|
108
|
+
status_code=503,
|
|
109
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
110
|
+
)
|
|
111
|
+
if not is_api_key_header_authorized(request):
|
|
112
|
+
prefix = _log_env_key_prefix("health", env_key)
|
|
113
|
+
content = {"status": "healthy", "authorized": False}
|
|
114
|
+
if prefix:
|
|
115
|
+
content["expected_api_key_prefix"] = prefix
|
|
116
|
+
return JSONResponse(status_code=200, content=content)
|
|
117
|
+
return {"status": "healthy", "authorized": True}
|
|
118
|
+
|
|
119
|
+
@app.get("/health/rollout")
|
|
120
|
+
async def health_rollout(request: Request):
|
|
121
|
+
env_key = normalize_environment_api_key()
|
|
122
|
+
if not env_key:
|
|
123
|
+
return JSONResponse(
|
|
124
|
+
status_code=503,
|
|
125
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
126
|
+
)
|
|
127
|
+
if not is_api_key_header_authorized(request):
|
|
128
|
+
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
129
|
+
content = {"status": "healthy", "authorized": False}
|
|
130
|
+
if prefix:
|
|
131
|
+
content["expected_api_key_prefix"] = prefix
|
|
132
|
+
return JSONResponse(status_code=200, content=content)
|
|
133
|
+
return {"ok": True, "authorized": True}
|
|
134
|
+
|
|
135
|
+
@app.exception_handler(RequestValidationError)
|
|
136
|
+
async def _on_validation_error(request: Request, exc: RequestValidationError):
|
|
137
|
+
try:
|
|
138
|
+
hdr = request.headers
|
|
139
|
+
snapshot = {
|
|
140
|
+
"path": str(request.url.path),
|
|
141
|
+
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
142
|
+
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
143
|
+
"have_authorization": bool(hdr.get("authorization")),
|
|
144
|
+
"errors": exc.errors()[:5],
|
|
145
|
+
}
|
|
146
|
+
print("[422] validation", snapshot, flush=True)
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
return JSONResponse(
|
|
150
|
+
status_code=422,
|
|
151
|
+
content={"status": "invalid", "detail": exc.errors()[:5]},
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return app
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def register_demo_entry() -> None:
|
|
158
|
+
description = "Crafter demo task app"
|
|
159
|
+
entry = TaskAppEntry(
|
|
160
|
+
app_id="crafter-demo",
|
|
161
|
+
description=description,
|
|
162
|
+
config_factory=build_task_app_config,
|
|
163
|
+
aliases=("crafter",),
|
|
164
|
+
modal=MODAL_DEPLOYMENT,
|
|
165
|
+
)
|
|
166
|
+
with suppress(ValueError):
|
|
167
|
+
registry.register(entry)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
register_demo_entry()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
|
|
175
|
+
parser.add_argument("--host", default="0.0.0.0")
|
|
176
|
+
parser.add_argument("--port", type=int, default=8001)
|
|
177
|
+
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
178
|
+
args = parser.parse_args()
|
|
179
|
+
|
|
180
|
+
run_task_app(
|
|
181
|
+
build_task_app_config,
|
|
182
|
+
host=args.host,
|
|
183
|
+
port=args.port,
|
|
184
|
+
reload=args.reload,
|
|
185
|
+
)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
|
|
2
|
+
|
|
3
|
+
type = "rl"
|
|
4
|
+
|
|
5
|
+
[algorithm]
|
|
6
|
+
type = "online"
|
|
7
|
+
method = "policy_gradient"
|
|
8
|
+
variety = "gspo"
|
|
9
|
+
|
|
10
|
+
[services]
|
|
11
|
+
task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
|
|
12
|
+
|
|
13
|
+
[compute]
|
|
14
|
+
# Cluster shape for RL pipeline
|
|
15
|
+
gpu_type = "H100"
|
|
16
|
+
gpu_count = 8
|
|
17
|
+
|
|
18
|
+
[topology]
|
|
19
|
+
# Split GPUs across vLLM, training, and reference
|
|
20
|
+
# Must sum to compute.gpu_count
|
|
21
|
+
type = "single_node_split"
|
|
22
|
+
gpus_for_vllm = 4
|
|
23
|
+
gpus_for_training = 3
|
|
24
|
+
gpus_for_ref = 1
|
|
25
|
+
tensor_parallel = 4
|
|
26
|
+
|
|
27
|
+
[vllm]
|
|
28
|
+
# Serving tensor parallel size
|
|
29
|
+
tensor_parallel_size = 4
|
|
30
|
+
max_model_len = 8192
|
|
31
|
+
|
|
32
|
+
[reference]
|
|
33
|
+
# Required by trainer/runtime; ensures dedicated/scoped scoring server config exists
|
|
34
|
+
placement = "dedicated"
|
|
35
|
+
port = 8002
|
|
36
|
+
tp = 1
|
|
37
|
+
health_max_wait_s = 180
|
|
38
|
+
health_interval_ms = 300
|
|
39
|
+
|
|
40
|
+
[model]
|
|
41
|
+
# Base model start
|
|
42
|
+
base = "Qwen/Qwen3-4B"
|
|
43
|
+
label = "crafter-rl-from-base"
|
|
44
|
+
|
|
45
|
+
[rollout]
|
|
46
|
+
max_turns = 10
|
|
47
|
+
episodes_per_batch = 64
|
|
48
|
+
policy_name = "crafter"
|
|
49
|
+
|
|
50
|
+
[evaluation]
|
|
51
|
+
# Run baseline evaluation over the first 100 seeds every 20 training iterations
|
|
52
|
+
instances = 10
|
|
53
|
+
every_n_iters = 10
|
|
54
|
+
seeds = [
|
|
55
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[training]
|
|
59
|
+
log_interval = 1
|
|
60
|
+
weight_sync_interval = 1
|
|
61
|
+
# Additional RL hyperparameters can go here
|
|
62
|
+
|
|
63
|
+
# Stepwise rewards (Crafter decision-level)
|
|
64
|
+
step_rewards_enabled = true
|
|
65
|
+
step_rewards_mode = "decision_stepwise" # "off" | "decision_stepwise" | "env_sparse"
|
|
66
|
+
step_rewards_beta = 0.0
|
|
67
|
+
step_rewards_indicator_lambda = 1.0
|
|
68
|
+
# Optional selector for decision scalar: "unique" | "absolute" (default unique)
|
|
69
|
+
event_rewards_kind = "unique"
|
|
70
|
+
|
|
71
|
+
[training.weight_sync]
|
|
72
|
+
enable = true
|
|
73
|
+
targets = ["policy"]
|
|
74
|
+
weight_sync_interval = 1
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class CopySpec:
|
|
12
|
+
source: str
|
|
13
|
+
destination: str
|
|
14
|
+
make_executable: bool = False
|
|
15
|
+
|
|
16
|
+
def absolute_source(self) -> Path:
|
|
17
|
+
return (REPO_ROOT / self.source).resolve()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class DemoTemplate:
|
|
22
|
+
template_id: str
|
|
23
|
+
name: str
|
|
24
|
+
description: str
|
|
25
|
+
copy_specs: tuple[CopySpec, ...]
|
|
26
|
+
default_subdir: str | None = None
|
|
27
|
+
env_lines: tuple[str, ...] = ()
|
|
28
|
+
config_source: str | None = None
|
|
29
|
+
config_destination: str = "demo_config.toml"
|
|
30
|
+
requires_modal: bool = False
|
|
31
|
+
post_copy: Callable[[Path], None] | None = None
|
|
32
|
+
default_secret_name: str | None = None
|
|
33
|
+
|
|
34
|
+
def iter_copy_specs(self) -> Iterator[CopySpec]:
|
|
35
|
+
yield from self.copy_specs
|
|
36
|
+
|
|
37
|
+
def config_source_path(self) -> Path | None:
|
|
38
|
+
if not self.config_source:
|
|
39
|
+
return None
|
|
40
|
+
return (REPO_ROOT / self.config_source).resolve()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _postprocess_math_modal(root: Path) -> None:
|
|
44
|
+
task_path = (root / "task_app.py").resolve()
|
|
45
|
+
if not task_path.exists():
|
|
46
|
+
return
|
|
47
|
+
text = task_path.read_text(encoding="utf-8")
|
|
48
|
+
text = text.replace('App("hendrycks-math-task-app")', 'App("hendrycks-math-task-app-demo")')
|
|
49
|
+
text = text.replace(
|
|
50
|
+
'DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-secret"',
|
|
51
|
+
'DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-demo-secret"',
|
|
52
|
+
)
|
|
53
|
+
task_path.write_text(text, encoding="utf-8")
|
|
54
|
+
|
|
55
|
+
deploy_script = root / "deploy_task_app.sh"
|
|
56
|
+
if deploy_script.exists():
|
|
57
|
+
import stat
|
|
58
|
+
|
|
59
|
+
mode = deploy_script.stat().st_mode
|
|
60
|
+
deploy_script.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
DEMO_TEMPLATES: dict[str, DemoTemplate] = {
|
|
64
|
+
"math-modal": DemoTemplate(
|
|
65
|
+
template_id="math-modal",
|
|
66
|
+
name="Math Single-Step (Modal deployment)",
|
|
67
|
+
description="Packaged modal task app matching examples/rl math environment.",
|
|
68
|
+
copy_specs=(
|
|
69
|
+
CopySpec(
|
|
70
|
+
"synth_ai/demos/math/modal_task_app.py",
|
|
71
|
+
"task_app.py",
|
|
72
|
+
),
|
|
73
|
+
CopySpec(
|
|
74
|
+
"synth_ai/demos/math/README.md",
|
|
75
|
+
"README.md",
|
|
76
|
+
),
|
|
77
|
+
CopySpec(
|
|
78
|
+
"synth_ai/demos/math/deploy_task_app.sh",
|
|
79
|
+
"deploy_task_app.sh",
|
|
80
|
+
make_executable=True,
|
|
81
|
+
),
|
|
82
|
+
CopySpec(
|
|
83
|
+
"synth_ai/demos/math/config.toml",
|
|
84
|
+
"configs/rl_from_base_qwen17.toml",
|
|
85
|
+
),
|
|
86
|
+
),
|
|
87
|
+
default_subdir="math_demo",
|
|
88
|
+
env_lines=(
|
|
89
|
+
"# Required for task app auth to environment service",
|
|
90
|
+
"ENVIRONMENT_API_KEY=",
|
|
91
|
+
"",
|
|
92
|
+
"# Optional: for CLI job submission and proxying OpenAI models",
|
|
93
|
+
"SYNTH_API_KEY=",
|
|
94
|
+
"OPENAI_API_KEY=",
|
|
95
|
+
"",
|
|
96
|
+
"# Optional: set to 'prod' to use production names",
|
|
97
|
+
"ENVIRONMENT=",
|
|
98
|
+
),
|
|
99
|
+
config_source="synth_ai/demos/math/config.toml",
|
|
100
|
+
requires_modal=True,
|
|
101
|
+
post_copy=lambda root: _postprocess_math_modal(root),
|
|
102
|
+
default_secret_name="hendrycks-math-task-app-demo-secret",
|
|
103
|
+
),
|
|
104
|
+
"crafter-local": DemoTemplate(
|
|
105
|
+
template_id="crafter-local",
|
|
106
|
+
name="Crafter GRPO (local FastAPI)",
|
|
107
|
+
description="Lightweight wrapper around examples/warming_up_to_rl/task_app/grpo_crafter for local experimentation.",
|
|
108
|
+
copy_specs=(
|
|
109
|
+
CopySpec(
|
|
110
|
+
"synth_ai/demos/crafter/grpo_crafter_task_app.py",
|
|
111
|
+
"task_app.py",
|
|
112
|
+
),
|
|
113
|
+
CopySpec(
|
|
114
|
+
"synth_ai/demos/crafter/README.md",
|
|
115
|
+
"README.md",
|
|
116
|
+
),
|
|
117
|
+
CopySpec(
|
|
118
|
+
"synth_ai/demos/crafter/configs/rl_from_base_qwen4b.toml",
|
|
119
|
+
"configs/rl_from_base_qwen4b.toml",
|
|
120
|
+
),
|
|
121
|
+
CopySpec(
|
|
122
|
+
"synth_ai/demos/crafter/configs/crafter_fft_4b.toml",
|
|
123
|
+
"configs/crafter_fft_4b.toml",
|
|
124
|
+
),
|
|
125
|
+
CopySpec(
|
|
126
|
+
"examples/warming_up_to_rl/task_app/grpo_crafter.py",
|
|
127
|
+
"grpo_crafter.py",
|
|
128
|
+
),
|
|
129
|
+
CopySpec(
|
|
130
|
+
"examples/warming_up_to_rl/task_app/synth_envs_hosted",
|
|
131
|
+
"synth_envs_hosted",
|
|
132
|
+
),
|
|
133
|
+
CopySpec(
|
|
134
|
+
"examples/warming_up_to_rl/run_local_rollout.py",
|
|
135
|
+
"run_local_rollout.py",
|
|
136
|
+
),
|
|
137
|
+
CopySpec(
|
|
138
|
+
"examples/warming_up_to_rl/run_local_rollout_traced.py",
|
|
139
|
+
"run_local_rollout_traced.py",
|
|
140
|
+
),
|
|
141
|
+
CopySpec(
|
|
142
|
+
"examples/warming_up_to_rl/shared.py",
|
|
143
|
+
"shared.py",
|
|
144
|
+
),
|
|
145
|
+
CopySpec(
|
|
146
|
+
"examples/warming_up_to_rl/export_trace_sft.py",
|
|
147
|
+
"export_trace_sft.py",
|
|
148
|
+
),
|
|
149
|
+
CopySpec(
|
|
150
|
+
"examples/warming_up_to_rl/run_fft_and_save.py",
|
|
151
|
+
"run_fft_and_save.py",
|
|
152
|
+
),
|
|
153
|
+
CopySpec(
|
|
154
|
+
"examples/warming_up_to_rl/run_local_rollout_modal.py",
|
|
155
|
+
"run_local_rollout_modal.py",
|
|
156
|
+
),
|
|
157
|
+
),
|
|
158
|
+
default_subdir="crafter_demo",
|
|
159
|
+
env_lines=(
|
|
160
|
+
"ENVIRONMENT_API_KEY=",
|
|
161
|
+
"SYNTH_API_KEY=",
|
|
162
|
+
"",
|
|
163
|
+
"# Optional: URL for existing Crafter task app",
|
|
164
|
+
"TASK_APP_BASE_URL=",
|
|
165
|
+
),
|
|
166
|
+
default_secret_name="grpo-crafter-demo-secret",
|
|
167
|
+
),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def list_demo_templates() -> tuple[DemoTemplate, ...]:
|
|
172
|
+
return tuple(DEMO_TEMPLATES.values())
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_demo_template(template_id: str) -> DemoTemplate | None:
|
|
176
|
+
return DEMO_TEMPLATES.get(template_id)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
4
|
`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
|
-
Prefer using `uvx synth-ai
|
|
6
|
+
Prefer using `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Package namespace for Math demo task app
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Minimal helpers for the math task app.
|
|
4
|
+
|
|
5
|
+
This module provides a local fallback for install_problem_bank_into_shared so
|
|
6
|
+
the modal task app can import it without requiring an external math_rl package.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def install_problem_bank_into_shared() -> None:
|
|
11
|
+
"""No-op placeholder for installing the Hendrycks MATH problem bank.
|
|
12
|
+
|
|
13
|
+
In production deployments, this can download or unpack the problem bank
|
|
14
|
+
into a shared directory. For the demo scaffold, it is a no-op.
|
|
15
|
+
"""
|
|
16
|
+
return None
|